114 morningside ave.
// . and that basically stops the address from telescoping
// prematurely
// . no i had basically an event section with no tod, that
// had a month/daynum range (panjea.org) and said to
// call for details and it had an "after at" address! so
// that after at address was causing all its neighboring
// events that had tods to get SEC_MULT_ADDRESSES
//if ( ! ( di->m_hasType & DT_TOD ) ) continue;
// skip if not in body
if ( di->m_a < 0 ) continue;
// get section
Section *sp = m_sections->m_sectionPtrs[di->m_a];
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// hash it, return NULL with g_errno set on error
if ( ! m_tt.addKey ( &sp , &di ) ) return false;
// counts
if ( ! m_tnt.addTerm32 ( (int32_t *)&sp, 1)) return false;
}
}
// sanity check
if ( ! m_nd->m_spideredTimeValid ) { char *xx=NULL;*xx=0; }
// int16_tcut
//Sections *ss = m_sections;
/*
////////////////
//
// . add votes for dates being in future/past/current time
// . then in order to add an event whose date is within 24 hrs of
// the current time we must have had other dates in that section
// with the SV_FUTURE_DATE set, to be sure it is not a comment date
// or a clock date.
//
////////////////
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// must have month daynum and year
//if ( ! ( di->m_hasType & (DT_DAYNUM|DT_MONTH|DT_YEAR) ) )
// continue;
// must have a valid timestamp then!
if ( ! di->m_timestamp ) continue;
// the section flag type
int32_t sectionType = SV_CURRENT_DATE;
// past, current or future?
if ( di->m_timestamp < m_nd->m_spideredTime - 24*3600 )
sectionType = SV_PAST_DATE;
if ( di->m_timestamp > m_nd->m_spideredTime + 24*3600 )
sectionType = SV_FUTURE_DATE;
// . get section that contains this date's first component
// . might be a telescoped date that touches several sections
Section *sn = di->m_section;
// the diff
int32_t delta = di->m_timestamp - m_nd->m_spideredTime;
// set the appropriate bit
if ( ! ss->addVote(sn,sectionType,delta) ) return false;
}
*/
//////////////////////////////
//
// set DF_BAD_RECURRING_DOW
//
//////////////////////////////
// this saves us that one bad recurring date in obits.abqjournal.com
// but it really hurts us in many more ways...
// this also fixes abqfolkdance.org by not allowing dates like
// "evening [[]] 8:15" through. that section is really meant as
// a tod breakdown of the main time range, so we could at some point
// put in support for that.
/*
suppflags_t sfmask = 0;
sfmask |= SF_FIRST;
sfmask |= SF_SECOND;
sfmask |= SF_THIRD;
sfmask |= SF_FOURTH;
sfmask |= SF_FIFTH;
sfmask |= SF_EVERY;
sfmask |= SF_PLURAL;
// stop recurring dates like "Tuesday from 3:00 p.m. until 7:30 p.m"
// in obits.abqjournal.com from being recurring.
// if we got just a DOW and no daynum, no dow range, and not plural
// DOW and does not have "each" or "every" before it, then we
// are invalid, not recurring. set DF_BADDATEFORMAT
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// must match all these
if ( ! (di->m_hasType & DT_DOW) &&
// look for 3pm [[]] evening or christmas [[]] 7pm too
! (di->m_hasType & DT_HOLIDAY) ) continue;
// if we got like "Monday-Friday" in our telescope, that's ok
if ( di->m_hasType & DT_RANGE_DOW ) continue;
// and no range of dows, "each Tuesday" or "Tuesdays" etc.
if ( di->m_suppFlags & sfmask ) continue;
// no daynum
if ( di->m_hasType & DT_DAYNUM ) continue;
// now, the last chance save. if we telescope up and find that
// before hitting a section of tods that we get a date that
// has a DOW range, we are saved! protects store hours
// get our section
Section *sp = di->m_section;
// the telescope loop
subloop:
// get first slot in this section
int32_t slot = m_tt.getSlot ( &sp );
// count all that are not us
int32_t count = 0;
// loop over all tods that telescoped up to this sec.
for ( ; slot >= 0 ; slot = m_tt.getNextSlot(slot,&sp) ) {
// get it
Date *dx = *(Date **)m_tt.getValueFromSlot(slot);
// skip if us
if ( dx == di ) continue;
// count it
count++;
// stop if it has a dow range
if ( dx->m_hasType & DT_RANGE_DOW ) break;
}
// if it had a dow range we are saved
if ( slot >= 0 ) continue;
// otherwise, if none found, telescope and keep going
if ( count == 0 ) {
// telescope
sp = sp->m_parent;
// and do some more
if ( sp ) goto subloop;
}
//
// and mark it so we know what's up
//
di->m_flags |= DF_BAD_RECURRING_DOW;
}
*/
//
// if a date is being used as a header and also as a base for
// a telescope, keep one and invalidate the other by setting the
// DF_MULTIUSE and DF_INVALID flag
//
///////////////////////////////////////
//
// part 9
//
// if we only have one year on the page, append that to dates
// missing the year. ignore copyright years.
//
///////////////////////////////////////
// NO! this failed on:
// http://www.zvents.com/albuquerque-nm/events/show/88543421-the-love-song-of-j-robert-oppenheimer-by-carson-kreitzer
/*
int32_t year = -1;
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// skip if has no year
if ( ! ( di->m_hasType & DT_YEAR ) ) continue;
// ignore copyright years
if ( di->m_flags & DF_COPYRIGHT ) continue;
// skip ranges, cuz "2008-2010" range has m_year set to 0
// since it is not valid
if ( di->m_type == DT_RANGE && di->m_year <= 0 ) {
// no assumed year
year = -2;
break;
}
// skip if invalid (has some disagreement in it)
if ( di->m_flags & DF_INVALID ) continue;
// must have a year if DT_YEAR is set
if ( di->m_year == 0 ) { char *xx=NULL;*xx=0; }
// . keep going if it already agrees with what we got
// . di->m_year is 0 if it has none.. how can that happen here?
if ( year == di->m_year ) continue;
// to be a candidate it must be bookended!
//if ( ! ( di->m_flags & DF_LEFT_BOOKEND ) ) continue;
//if ( ! ( di->m_flags & DF_RIGHT_BOOKEND ) ) continue;
if ( di->m_flags & DF_FUZZY ) continue;
// must have more than just a year
if ( di->m_type == DT_YEAR ) continue;
// stop if range of years
//if ( di->m_minYear != di->m_maxYear ) { year = -2; break; }
// get it
if ( year == -1 ) {
year = di->m_year; continue; }
// if in agreement, keep going
if ( di->m_year == year ) continue;
// otherwise, we have no unifed year
year = -2;
break;
}
// add in the assumed year
for ( int32_t i = 0 ; year > 0 && i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// skip if has year already
if ( di->m_hasType & DT_YEAR ) continue;
// or if just a daynum or modifier
if ( di->m_type == DT_DAYNUM ) continue;
//if ( di->m_type == DT_MOD ) continue;
// add it in
di->m_year = year;
// set flag
//di->m_flags |= DF_HAS_YEAR;
di->m_hasType |= DT_YEAR;
// this one too
di->m_flags |= DF_ASSUMED_YEAR;
}
*/
// . treat "tomorrow" as relative to pub date
/////////////////////////////////////
//
// set Date::m_penalty (for pub date detection)
//
/////////////////////////////////////
// assume no article
m_firstGood = m_lastGood = -1;
// . sets them to -1 if no article
// . otherwise they contain the article between them
//if ( m_sections ) ss->getArticleRange ( &m_firstGood , &m_lastGood );
dateflags_t uf = DF_MATCHESURLMONTH|DF_MATCHESURLDAY|DF_MATCHESURLYEAR;
// . ok, now rank the pub dates in order of most probable to least
// . the date with the smallest penalty wins
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// get flag
dateflags_t flags = di->m_flags;
// reset
di->m_penalty = 0;
// . NEW STUFF
// . get his section
Section *sn = NULL;
// set section
if ( di->m_a >= 0 ) sn = m_sections->m_sectionPtrs[di->m_a];
// use "vote" flags (taken from datedb voters)
if ( sn ) {
// big penalty if we are voted as a clock!
//if ( m_osvt->getScore(sn,SV_CLOCK) > 0.5 )
// di->m_penalty += 5000000;
// . HACK: SV_TEXTY_MAX_SAMPLED is a vote statistic!
// . if we are a unique section, reward ourselves
// . this means that at least one vote had 2+
// occurences of sections with this sn->m_tagHash
//if(m_osvt->getNumSampled(sn,SV_TEXTY_MAX_SAMPLED)<=1)
// di->m_penalty -= 100000;
}
// not a range or list
if(di->m_hasType&(DT_LIST_ANY|DT_RANGE_ANY)){
di->m_penalty += 5000000;
continue;
}
// needs a year!
if ( ! ( di->m_hasType & DT_YEAR ) ) {
di->m_penalty += 5000000;
continue;
}
// and how about a day or timestamp at least
if ( ! ( di->m_hasType & DT_DAYNUM ) &&
! ( di->m_hasType & DT_TIMESTAMP ) ) {
di->m_penalty += 5000000;
continue;
}
/*
// . put in our votes for clock
// . stores into Sections::m_nsvt which is added to Sectiondb
// when XmlDoc calls Sections::hash()
if ( flags & DF_CLOCK ) {
if ( ! ss->addVote(sn,SV_CLOCK,1.0) ) return false;
else
if ( ! ss->addVote(sn,SV_CLOCK,0.0) ) return false;
}
*/
// we put in our vote like this now
// set SEC_* flags for date format types
if ( ! (flags & DF_AMBIGUOUS) && (flags & DF_MONTH_NUMERIC) ) {
/*
// put in our vote for european
if ( flags & DF_EUROPEAN )
if ( ! ss->addVote(sn,SV_EURDATEFMT,1.0) )
return false;
// put in our vote for american
if ( flags & DF_AMERICAN )
if ( ! ss->addVote(sn,SV_EURDATEFMT,0.0) )
return false;
*/
// what did old voters say about this section?
/*
if ( sn ) {
// . ve = probability it is european format
// . includes the vote we just added above!
float ve = m_osvt->getScore(sn,SV_EURDATEFMT);
// disagreement is bad
if ( (flags & DF_AMERICAN) && ve > 0.5 )
di->m_penalty += 1000000;
if ( (flags & DF_EUROPEAN) && ve < 0.5 )
di->m_penalty += 1000000;
}
*/
}
// these are golden
if ( flags & DF_FROM_RSSINLINK ) di->m_penalty -= 2000000;
if ( flags & DF_FROM_RSSINLINKLOCAL ) di->m_penalty -= 2000000;
if ( flags & DF_FROM_URL ) di->m_penalty -= 1000000;
if ( flags & DF_FROM_META ) di->m_penalty -= 3000000;
// this is the worst
//if ( flags & DF_INDEX_CLOCK ) di->m_penalty += 5000000;
// this could have been set after the indexdb lookup, thus
// we are being re-called!
if ( flags & DF_CLOCK ) di->m_penalty += 5000000;
//if ( flags & DF_NOYEAR ) di->m_penalty += 5000000;
//if ( flags & DF_TOD ) di->m_penalty += 5000000;
// this is bad
if ( flags & DF_FUTURE ) di->m_penalty += 5000000;
// a slight penalty for this to break ties
if ( flags & DF_NOTIMEOFDAY ) di->m_penalty += 10;
// bad tag? script marquee, style, select?
if ( flags & DF_INBADTAG ) di->m_penalty += 900000;
// now only do body
if ( ! (flags & DF_FROM_BODY) ) continue;
// unique is very good
if ( flags & DF_UNIQUETAGHASH ) di->m_penalty -= 100000;
// just as good as the url i guess! should be better if
// we got a time of day!
if ( (flags & uf) == uf &&
! ( di->m_hasType & DT_LIST_ANY ) &&
! ( di->m_hasType & DT_RANGE_ANY ) )
di->m_penalty -= 1000000;
// if he is american and we are european
if ( ( flags & DF_AMBIGUOUS ) && ( flags & DF_AMERICAN ) &&
m_dateFormat == DF_EUROPEAN )
di->m_penalty += 5000000;
// or vice versa
if ( (flags & DF_AMBIGUOUS) && (flags & DF_EUROPEAN) &&
m_dateFormat == DF_AMERICAN )
di->m_penalty += 5000000;
// http://www.physorg.com/news148193433.html has a good
// pub date in a hyperlink!
//if ( flags & DF_INHYPERLINK )
// di->m_penalty += 5000000;
// if no positive scoring section skip this
if ( m_firstGood == -1 ) continue;
if ( m_lastGood == -1 ) continue;
// . and the closer to the top part of the positive scoring
// section, the better
// . get our word pos
// . the delta
int32_t delta1 = m_firstGood - di->m_a;
// make positive
if ( delta1 < 0 ) delta1 *= -1;
// . same for the bottom of the positive scoring section
// . that delta
int32_t delta2 = m_lastGood - di->m_a;
// make positive
if ( delta2 < 0 ) delta2 *= -1;
// make into scores. top is better than bottom a bit
delta2 *= 2;
// the bigger these are, the more the penalty
if ( delta1 <= delta2 ) di->m_penalty += delta1;
else di->m_penalty += delta2;
}
// assume no best pub date right now
m_best = NULL;
// . get the current best pub date
// . the best date is the one with the smallest penalty
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// must be a certified known date format
// set it if its the first
if ( ! m_best ) { m_best = di ; continue; }
// skip it if its penalty is higher than the best we got
if ( di->m_penalty >= m_best->m_penalty ) continue;
// otherwise, it is the new winner
m_best = di;
}
// if penalty is >= 1M that means definitely NOT a pub date, so if
// that is all we got, then we don't got nuthin'
if ( m_best && m_best->m_penalty >= 1000000 ) m_best = NULL;
// get the winner's flags
dateflags_t flags = 0;
if ( m_best ) flags = m_best->m_flags;
// in document body? (or url?)
bool inBody = ( flags & DF_FROM_BODY );
if ( inBody && ! ( flags & DF_UNIQUETAGHASH ) ) {
log("date: ignoring repeated tag hash pub dates until "
"comment detector is working.");
m_best = NULL;
}
// int16_tcut
char *u = ""; if ( m_url ) u = m_url->getUrl();
// or if its ambiguous and not sure sure if american or european
if (m_best &&inBody && (flags & DF_AMBIGUOUS) && m_dateFormat == -1 ) {
// set this flag
m_needQuickRespider = true;
// note it
log("date: url %s needs 25 hour ambiguous-based "
"respider.", u);
// and do not take any chances
m_best = NULL;
}
/*
// cancel out clocks
if ( m_best && inBody ) {
// get section
Section *sn = m_sections->m_sectionPtrs[m_best->m_a];
// . get the vote that we are a clock
// . only consult Sections::m_osvt for this one
float ons = m_osvt->getNumSampled(sn,SV_CLOCK);
// if he might be a clock but we do nto have any "voting proof"
// then reschedule for a quick respider 25 hours later
if ( sn && ! (flags & DF_NOTCLOCK) && ons == 0.0 ) {
// set this flag
m_needQuickRespider = true;
// note it
log("date: url %s needs 25 hour clock-based respider.",
u);
// and do not take any chances
m_best = NULL;
}
}
*/
// set Date::m_maxYearGuess for each date that
// does not have a year. use the year of the nearest neighbor
// to determine it. allow for that year minus or plus one if
// we also have a DOW. and also allow for that year minus one if
// we are from a month # greater than our neighbor that supplied
// the year, assuming he did have a month. so if they have a list
// like Dec 13th and the neighbor following is Jan 2nd 2011, we
// allow the year 2010 for Dec 13th. and only consider non-fuzzy
// years. so neighbors must be non-fuzzy dates.
setMaxYearGuesses();
// . now caller might have determined that the outlink to this page
// was added on its parent page between times A and B
// . fallback to pub date if could not get a min/max from the page
//if ( m_maxYearOnPage == -1 ) {
// if ( minPubDate >= 0 ) y1 = getYear ( minPubDate );
// if ( maxPubDate >= 0 ) y2 = getYear ( maxPubDate );
//}
// get year
//int32_t y1 = getYear ( minPubDate );
//int32_t y2 = getYear ( maxPubDate );
// if not same, bail!
//if ( y1 != y2 ) return true;
// set it
//int32_t year = y1;
// ok, now set the assumed year to this for all dates that do
// not have a year!
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// must agree on year! otherwise, not sure which it could be!
//if ( y1 != y2 ) break;
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// skip if in a telescope or subdate
//if ( di->m_subdateOf ) continue;
// for downtowncorvalis it doesn't set subdateOf when it shuold
// for September 19th - December 10th
//if ( di->m_flags & DF_USEDASHEADER ) continue;
// skip if has year already
if ( di->m_hasType & DT_YEAR ) continue;
// or if just a daynum or modifier
if ( di->m_type == DT_DAYNUM ) continue;
// do we have a daynum?
bool hasDaynum = ( di->m_hasType & DT_DAYNUM );
// fix for signmeup.com : "Thanksgiving [[]] 9:00 am"
//if ( di->m_suppFlags & SF_NORMAL_HOLIDAY )
if ( di->m_hasType & DT_HOLIDAY )
hasDaynum = true;
// must have a daynum though!
if ( ! hasDaynum ) continue;
// skip close dates
if ( di->m_flags & DF_CLOSE_DATE ) continue;
// limit to this interval
int32_t a = m_nd->m_spideredTime;
// this spidertime is in UTC and the date is not, so the spider
// time is like 5 hours off or so. so it made an event
// that started right around the spider time get a
// maxStartTime (a different maxStartTime) beyond the 4 hour
// clock detector limit in Events.cpp that sets EV_SAMEDAY.
// so EV_SAMEDAY was never getting set.
a -= 24 * 3600;
// eveisays.com/jazz.html is
// 8 months from when we spidered it
int32_t b = m_nd->m_spideredTime + DAYLIMIT*24*3600;
// get max guess year
int32_t gy = di->m_maxYearGuess;
// . restrict b some more
// . guess year is > 0 if valid
// . get max timepoint from end of max year from all dates
// we checked out on the page
if ( gy > 0 ) {
// get end of that year in time_t format
int32_t ye = getYearMonthStart(gy+1,1) - 1;
// go out 6 months into that year... if
// the date has a recurring dow like "ever wednesday"
// or "every day"
if ( di->m_suppFlags & SF_RECURRING_DOW )
// add about 8*30 days to it
ye += 8*30*86400;
// restrict b to that
if ( ye < a ) {
// forget it if too old
di->m_flags |= DF_YEAR_UNKNOWN;
continue;
}
// restrict?
if ( ye < b ) b = ye;
}
// . allow some time for new years eve parties
// . fixes eventvibe.com/...Yacht which was until 1am
b += 6 * 3600;
// set the limiting interval
di->m_minStartFocus = a;
di->m_maxStartFocus = b;
// flag it
di->m_flags |= DF_ASSUMED_YEAR;
}
// set the pub date here
m_pubDate = -1;
if ( m_best ) m_pubDate = m_best->m_timestamp;
/////////////////////
//
// see if pub date change from last old xml doc to now.
// that would probably indicate some parsing issues!
//
/////////////////////
if ( m_od && (time_t)m_od->m_pubDate != m_pubDate ) {
log("build: pub date change since last spider u=%s from "
"%"UINT32" to %"UINT32""
,u
,m_od->m_pubDate
,(uint32_t)m_pubDate);
// ignore it now
m_pubDate = -1;
//char *xx=NULL;*xx=0;
}
// when this doc was spidered
if ( ! m_nd->m_spideredTimeValid ) { char *xx=NULL;*xx=0; }
// no future pub dates allowed
//int32_t nowGlobal = getTimeGlobal();
if ( m_best &&
m_best->m_timestamp>(time_t)m_nd->m_spideredTime){//nowGlobal){
// i've seen this happen for
// http://www.lis.illinois.edu/events/2011/02/18/mix-it-lively-event-community-informatics-seed-fund-recipients-community-partners-
// because it has the future date in the url...
log("build: pub date in future utc u=%s !",u);
// ignore it now
m_pubDate = -1;
//char *xx=NULL;*xx=0;
}
///////////////////////
//
// set SEC_EVENT_BROTHER bits (PART 2)
//
// . fix for burtstikilounge.com which does not have recognized dates
// until after telescoping
//
///////////////////////
// use this one now that we have telescopes
setDayXors ();
// and hopefully we'll find event brothers
setEventBrotherBits();
// before when we called it we did not have telescoped dates,
// so call it again here
setDateHashes();
// flag it
m_setDateHashes = true;
// need this for printTextNorm()
setDateParents();
////////////
//
// set DF_IN_LIST if date is in a list
//
////////////
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// loop over the date elements of di
int32_t ne;
Date **de = getDateElements(di,&ne);
int32_t dh32 = 0;
// date elements are sorted by m_a
for ( int32_t j = 0 ; j < ne ; j++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *dj = de[j];
// scan up parents
Date *dp = dj;
Date *lastdp = NULL;
for ( ; dp ; dp = dp->m_dateParent ) {
// is one of our parents a list?
if ( ! (dp->m_type & DT_LIST_ANY ) ) {
lastdp = dp;
continue;
}
// yes...
dj->m_flags |= DF_IN_LIST;
// but are we the first in the list?
if ( dp->m_ptrs[0] == lastdp )
dj->m_flags |= DF_FIRST_IN_LIST;
// update this
lastdp = dp;
}
//
// we do have implied sections so set the
// m_turkTagHash of this date i guess
//
// if parent date is a list... skip, but
// make sure we do get the parent hashed in there
if ( (dj->m_flags & DF_IN_LIST) &&
!(dj->m_flags & DF_FIRST_IN_LIST) )
continue;
// preserve the turk tag hash as the dh32 if
// we only have one date element
if ( dh32 == 0 ) dh32 = dj->m_turkTagHash;
// incorporate the tag hash otherwise
else dh32 = hash32h ( dh32 , dj->m_turkTagHash );
// however, we also now add in the date type
dh32 = hash32h ( dh32 , (int32_t)dj->m_type );
}
// sanity
if ( dh32 == 0 ) { char *xx=NULL;*xx=0; }
// store it
di->m_dateTypeAndTagHash32 = dh32;
}
//////////////////
//
// call this a second time to capture telescoped store hours like
// for bostonmarket.com which has the dow range separated by text
// from the tod range.
//
//////////////////
setStoreHours ( true );
//////////////
//
// finally set DF_TIGHT if the date is "tight"
//
//////////////
//
// . all non-telscoped dates get this set by default
// . if there's any potential wrongness in the telscoped date we
// do not set this...
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// if not telescoped, set and we are done
if ( di->m_type != DT_TELESCOPE ) {
di->m_flags |= DF_TIGHT;
continue;
}
// skip this stuff for now. we should probably just
// make sure that these dates are in their own section
Section *ds = di->m_ptrs[0]->m_section;
// compute date's mina and maxb
int32_t mina = 9999999;
int32_t maxb = -1;
for ( int32_t j = 0 ; j < di->m_numPtrs ; j++ ) {
// get sub date
Date *sj = di->m_ptrs[j];
// skip if from url
if ( sj->m_a < 0 ) continue;
// test otherwise
if ( sj->m_a < mina ) mina = sj->m_a;
if ( sj->m_b > maxb ) maxb = sj->m_b;
}
// must have had something not from url
if ( maxb == -1 ) continue;
// blow that up until it contains all date ptrs
for ( ; ds ; ds = ds->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// stop if it contains mina/maxb
if ( ds->m_a > mina ) continue;
if ( ds->m_b < maxb ) continue;
break;
}
// how'd this happen?
if ( ! ds ) continue;
// ok, now scan all dates in this section. they must all
// belong to "di" in order for "di" to be tight.
int32_t fdi = ds->m_firstDate;
// assume "di" is a tight date
bool tight = true;
// scan
for ( int32_t j = fdi ; j < m_numDatePtrs ; j++ ) {
// breathe
QUICKPOLL(m_niceness);
// get that date
Date *dj = m_datePtrs[j];
// skip if nuked
if ( ! dj ) continue;
// stop if outside of "ds" now
if ( dj->m_a >= ds->m_b ) break;
// or telescope, stop
if ( dj->m_type == DT_TELESCOPE ) break;
// skip if belongs to us
int32_t k; for ( k = 0 ; k < di->m_numPtrs ; k++ ) {
// did the date "dj" match one of our
// date components??? break if so
if ( di->m_ptrs[k] == dj ) break;
}
// if we broke out early then "dj" matched one of
// the date components of "di"... we are a subdate
// so we are still tight!
if ( k < di->m_numPtrs ) continue;
// if we could not match date "dj" to one of the
// components in "di" then we are not "tight"
tight = false;
break;
}
// are we tight?
if ( tight ) {
di->m_flags |= DF_TIGHT;
continue;
}
// skip this stuff below for now... kinda too complicated
/*
// 1. otherwise expand the section around each date element
// 2. until right before it hits another element in the date
// 3. if the section contains date of different type than
// the date whose section we are expanding then its
// ambiguous and we should not set DF_TIGHT
int32_t j; for ( j = 0 ; j < di->m_numPtrs ; j++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *dj = di->m_ptrs[j];
// get section;
Section *sp = dj->m_section;
// skip if none
if ( ! sp ) continue;
// get initial date bits
datetype_t idt = sp->m_dateBits;
// expand it right up until it contains
// another one of the dates in our list
Section *ps = sp;
for ( ; ps ; ps = ps->m_parent ) {
// scan the other dates
int32_t k;for ( k = 0 ; k < di->m_numPtrs ; k++ ){
// breathe
QUICKPOLL(m_niceness);
// ge tit
Date *dk = di->m_ptrs[k];
// skip if us
if ( dk == dj ) continue;
// skip if in url
if ( dk->m_a < 0 ) continue;
// stop if all done
if ( ! ps->m_parent ) break;
// stop if would contain
if ( ps->m_parent->contains2(dk->m_a) )
break;
}
// if we not contain any, keep going, otherwise
// break/stop
if ( k < di->m_numPtrs ) break;
}
// if other dates are in "ps" like other dows that
// are not in our date then bail!
// if we've blown up the section around date component
// "dj" and it has different date types in it, it
// can not be tight!
if ( ps->m_dateBits & ~idt ) break;
// now scan for other dates like us, like DOWs i
// guess... allow multiple tods if we are a tod,
// but do not allow a list of other dates, because
// we might have made a combination error when we
// build this telscoped date.
// mdw left off here
//...
// ok, now "ps" is the section that contains "dj"
// and NONE other of the dates in this telescoped
// dates. so make sure "ps" has no additional date
// types that could give rise to some "confusion".
// if this is non-zero then there is extra crap in
// there, so do not consider it a tight date
//if ( ps->m_dateBits & ~idt ) break;
// otherwise, consider it tight so far
}
// if we didn't break out and went all the way, then there
// was no "confusion".
if ( j >= di->m_numPtrs )
di->m_flags |= DF_TIGHT;
*/
}
//
// set DF_INCRAZYTABLE if one of the date elements is in a
// table that has both a row and col header of dates. like
// http://www.the-w.org/poolsched.html . this is beyond our
// parsing ability for now. it has dows in the row header and
// tods in the first column header.
//
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
int32_t ne;
Date **de = getDateElements(di,&ne);
// so scan each date element then
int32_t x; for ( x = 0 ; x < ne ; x++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *dx = de[x];
// get section
Section *ds = dx->m_section;
// skip if non
if ( ! ds ) continue;
// in crazy table?
if ( ! ds->m_tableSec ) continue;
// check it out
if (!(ds->m_tableSec->m_flags & SEC_HASDATEHEADERROW))
continue;
if (!(ds->m_tableSec->m_flags & SEC_HASDATEHEADERCOL))
continue;
// ok, it be crazy
di->m_flags |= DF_INCRAZYTABLE;
break;
}
}
return true;
}
// set these for printing normalized crap
void Dates::setDateParents ( ) {
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// start it up
setDateParentsRecursive ( di , NULL );
}
}
void Dates::setDateParentsRecursive ( Date *di , Date *parent ) {
// set parent for him
di->m_dateParent = parent;
// scan his ptrs
for ( int32_t j = 0 ; j < di->m_numPtrs ; j++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *dj = di->m_ptrs[j];
// set em up
setDateParentsRecursive ( dj , di );
}
}
// for blackbird buvette "first Thursday" telescopes up to
// the section right above and grabs the "December 3, 12:30PM" when
// it shouldn't because it has "December 3, 12:00-12:00AM" in its section...
//
// when telescoping date X with date Y, get the biggest section
// around each one, such that they do not intersect, then make sure
// that between those two sections, one does not contain all the
// date types of the other. BUT if no such two sections exist, i.e.
// if one section fully contains the other, then this algorithm does
// not apply. this is a "list of dates" only algorithm. it makes sure
// that elements in a list do not telescope up with each other when
// they should not. it stops list elements of similar date types from
// pairing/telescoping together.
//
// we must allow individual days to telescope to a header row eg.:
// dec 2009
// dec 1
// dec 2 ...
//
// "every tuesday" should NOT telescope to "dec 5 2009" here:
// dec 5 2009 1-3pm
// beginning every tuesday from dec to january
// (*"every tuesday" is fuzzy so it can't be a telescope header here.)
// (*a DOW is treated as multiple dayNums for these purposes!)
//
// Often we have a list of table rows and some rows are date headers and
// other rows are non-header but have dates in them.
// for http://www.salsapower.com/cities/us/newmexico.htm we have
// all the table rows as being individual events, and no table rows are
// date headers. yet one event row's day of week, Tuesday, is being
// seens as an event header.
//
// how to fix this? well let's now require that in order to be a header
// the date should be the only text in the section IF AND ONLY IF both
// di and dp's s1 and s2 respectively equal each other, because they
// are list elements of the same parent container. so basically, if you
// have a list of things, the only way that one of the things can be a
// header is if its section, as taken as an item in that list, contains
// only date words.
// --------------
// . for that panjea.org url it was picking up the phone number from one
// event as the phone number of another, and since the address had its
// own phone number, it thought there was a phone number disagreement.
// so now we stem the telescoping of a phone number to the first section
// containing a TOD to prevent that from happening again. that is
// what getPhoneTable() does now.
// . we also do this for other items, and may do it for subfields like "at:"
// or phrases like "at the" in the future.
// . if we determine that "s1" is the same ~type~ of section as "s2" then
// this returns false, otherwise it returns true
// . we are looking to pair up two sections because they have different data,
// but if the sections are "brothers" then we should not do that!
// . the core problem is determining if given two sections, s1 and s2, if they
// are really the same type of section or if one of them is a "header"
// section that is meant to augment the other.
// . merely looking at the tag hash of the largest section element is not
// sufficient because it is very common to insert header rows into a table
// that look very similar to the non-header rows.
// . the approach we take is to check to see if the two sections "own"
// different items of the same type, which would show them to be the same
// type of section.
// . some item examples:
// inlined addresses or verified addresses
// unverified streets
// phone numbers
// email addresses
// subsections containing exactly the same text (subfields)
// prices ($4)
// TODs (timesofday)
// . the problem is exacerbated when you have a section that has multiple
// subsections each containing its own TOD (effectively an event) and
// then outside those subsections you have a phone number. each of the
// tods technically owns the phone number, but when telescoping out the
// tod section you do not want to own necessarily any other phone number
// contained by the telescoped section because it might belong to another
// tod or list of tods entirely! that is why we have to make a hash table
// for every item we want to compare, so we have getPhoneTable() etc. and
// those tables telescope out their items until they hit a TOD section.
// in that way we can properly contain them and bind them to the TOD or
// TOD sections they are assocaited with.
// . is "s1" a compatible "header" of "s2" (or vice versa)
// . return -1 on error with g_errno set
// . "di" is the first date in the telescope
int32_t Dates::isCompatible ( Date *di ,
Date *dp ,
HashTableX *ht ,
Date *DDarg ,
bool *hasMultipleHeaders ) {
// we are always looping over the same header candidates... so
// cache the result!
// no double daynums! slows us down
if ( di->m_hasType == dp->m_hasType ) return 0;
// if you are not ticket registration date and he is, do not use him
// and vice versa. fixes signmeup.com turkeytrot which has
// "Thanksgiving" in a paragraph with the word "register" so it thinks
// it has to do with that.
if ( (di->m_flags & DF_NONEVENT_DATE)!=(dp->m_flags&DF_NONEVENT_DATE))
return 0;
if ( (di->m_flags & DF_REGISTRATION) != (dp->m_flags &DF_REGISTRATION))
return 0;
if ( di->m_flags5 & DF5_IGNORE )
return 0;
Date *DD = DDarg;
if ( ! DD ) DD = di;
/*
if ( di->m_a == 1630 &&
dp->m_a == 1476 ) // 4th
log("hey");
if ( DD->m_numPtrs == 2 &&
DD->m_ptrs[0]->m_a== 1630 &&
DD->m_ptrs[1]->m_a== 1476 &&
dp->m_a>=581 && dp->m_a<=600 )
log("hey2");
*/
// allow "Every Friday [[]] before 9PM [[]] 7:30PM - 11:30PM"
/*
mdw left off here
if ( ( DD->m_hasType & DT_TOD ) &&
( DD->m_flags & DF_ONGOING ) &&
( dp->m_hasType & DT_TOD ) &&
( dp->m_hasType & DT_RANGE_TOD ) )
return 1;
if ( ( dp->m_hasType & DT_TOD ) &&
( dp->m_flags & DF_ONGOING ) &&
( DD->m_hasType & DT_TOD ) &&
( DD->m_hasType & DT_RANGE_TOD ) )
return 1;
*/
bool exclude = true;
// but if its a DOW and matches us then that is ok
// fixes funkefiredarts.com so we can telescope the Friday to
// another recurring friday:
// "June 28 - August 14, 2010 [[]] Friday [[]]
// every 2nd and 4th Friday of the Month [[]] ..."
if ( dp->m_hasType == DT_DOW &&
DD->m_dow >= 1 &&
//DD->m_minDow >= dp->m_minDow &&
//DD->m_maxDow <= dp->m_maxDow )
// if base dow bits are subset of parent...
(DD->m_dowBits & dp->m_dowBits) == DD->m_dowBits )
exclude = false;
// stop "Sundays at 2 PM [[]] Fridays & Saturdays at 8 PM"
datetype_t d1 = dp->m_hasType;
datetype_t d2 = DD->m_hasType;
// take out dow list bit
d1 &= ~DT_LIST_DOW;
d2 &= ~DT_LIST_DOW;
if ( (d1 & d2) == d1 && exclude )
return 0;
// if we have a tod and parent is "afternoons", etc. skip it
if ( (d2 & DT_TOD) && (dp->m_type == DT_SUBDAY) )
return 0;
// . special store horus fix for burtstikilounge.com
// for "1 [[]] Nov 2009 [[]] 8pm-2am [[]] M-F"
// . strip the TOD for "until midnight[[]]1[[]]Nov 2009[[]] 8pm..."
datetype_t base = DD->m_hasType & ~DT_TOD;
// . stop "December [[]] 2pm" for graffiti.org
// . just do not let month names by themselves telescope up
if ( di->m_type == DT_MONTH )
return 0;
// if we already heave a weak DOW and dp is a weak DOW, do not allow it
if ( (DD->m_flags & DF_HAS_WEAK_DOW) &&
dp->m_hasType == DT_DOW &&
(dp->m_flags & DF_HAS_WEAK_DOW) )
return 0;
// try to fix "Friday, 29 January, 2010 [[]] 07:00 PM" in the
// upcoming events menue of newmexico.org. drat, it hurts
// stuff like "open at 2:30pm on x y and z".
//if ( dp->m_type == DT_TOD )
// return 0;
// . think of special dow like "SundayS" or "every sunday" like mon/day
// . "every sunday" or "first sunday of the month" or "sundays"
// is not allowed to telescope to a section that has a
// month/daynum because that would defeat the purpose of
// saying "every sunday". stops "Sundays 9am-9pm [[]]
// August 29th" for guysndollsllc.com. however we should
// allow August 29th to telescope to sundays 9am-9pm if
// we want.
// . but this makes us miss "Every Friday night [[]] 7 - 10 p.m. [[]]
// August 1" for salsapower.com and we
if ( ( DD->m_suppFlags & SF_RECURRING_DOW ) &&
// unless we got something like "Fridays: Dec 11, 18 at 8:00pm"
// like santafeplayhouse.org has cause we want that to telescope
// to "December 10, 2009 - January 3, 2010" because it has a
// year...
! ( DD->m_hasType & DT_DAYNUM ) &&
( dp->m_hasType & DT_MONTH ) &&
( dp->m_hasType & DT_DAYNUM ) &&
// allow monthday ranges though to fix salsapower.com
! ( dp->m_hasType & DT_RANGE_MONTHDAY ) &&
// really any kind of range at this point so that
// we can telescope "Saturdays (10am-5pm)" to
// "Nov 6, 2009 - jan 9, 2010" for flavorpill.com/newyork/...
! ( dp->m_hasType & DT_RANGE ) )
return 0;
// a tod or tod range is not allowed to telescope to a single
// daynum... i guess ever??? because it is in a calendar i guess...
// UNLESS that daynum is the only potential header...
if ( dp->m_calendarSection &&
dp->m_calendarSection != DD->m_calendarSection )
return 0;
// don't allow calendar daynum to telescope to tod outside of
// the calendar
if ( DD->m_calendarSection && (dp->m_hasType & DT_TOD) )
return 0;
//
// BEGIN TABLE SCANNING ALGO
//
// if both in table, must be in same row/col if the table has
// a date row OR col header (or both)
//
Section *di2 = di->m_section;
Section *dp2 = dp->m_section;
// if what we are trying to telescope to is in a subtable, then
// grow him out until his row/col count is for OUR table
while ( di2 &&
dp2 &&
di2->m_tableSec &&
dp2->m_tableSec &&
di2->m_tableSec != dp2->m_tableSec &&
di2->m_tableSec->contains ( dp2->m_tableSec ) )
// grow dp2 out
dp2 = dp2->m_tableSec->m_parent;
// likewise, repeat for di2 if in a subtable of dp2's table
while ( dp2 &&
di2 &&
dp2->m_tableSec &&
di2->m_tableSec &&
dp2->m_tableSec != di2->m_tableSec &&
dp2->m_tableSec->contains ( di2->m_tableSec ) )
// grow di2 out
di2 = di2->m_tableSec->m_parent;
// now if in same table do our ABOVE scanning
if ( di2->m_tableSec &&
di2->m_tableSec == dp2->m_tableSec &&
(di2->m_tableSec->m_flags & SEC_HASDATEHEADERROW) ) {
// sanity
if ( ! m_dateBitsValid ) { char *xx=NULL;*xx=0; }
// useful
datetype_t myBits = di2->m_dateBits;
// make di2 into table cell
for ( ; di2 ; di2 = di2->m_parent ) {
QUICKPOLL(m_niceness);
if ( di2->m_tagId == TAG_TD ) break;
if ( di2->m_tagId == TAG_TH ) break;
}
// make dp2 into table cell
for ( ; dp2 ; dp2 = dp2->m_parent ) {
QUICKPOLL(m_niceness);
if ( dp2->m_tagId == TAG_TD ) break;
if ( dp2->m_tagId == TAG_TH ) break;
}
// how'd this happen?
if ( ! di2 ) return false;
if ( ! dp2 ) return false;
// if nothing above, forget it! maybe di is the header!
if ( ! di2->m_aboveCell ) return false;
// left scan that cell
for (Section *above = di2; above; above = above->m_aboveCell) {
QUICKPOLL(m_niceness);
// a match?
if ( above == dp2 ) break;
// if cell has different date type, not compatible
if ( above->m_dateBits & ~myBits ) return false;
}
}
//
// END TABLE SCANNING ALGO
//
if ( DD &&
base == (DT_DAYNUM|
DT_MONTH|
DT_YEAR|
DT_COMPOUND|
DT_TELESCOPE) &&
// must have "hours" or "open" in its section to indicate
// it is when the store is open
(dp->m_flags & DF_STORE_HOURS) &&
dp->m_hasType == (DT_TOD|DT_RANGE_TOD) )
return 1;
if ( DD &&
base == (DT_DAYNUM|
DT_MONTH|
DT_YEAR|
DT_COMPOUND|
DT_TELESCOPE|
DT_RANGE_TOD) &&
// must have "hours" or "open" in its section to indicate
// it is when the store is open
(dp->m_flags & DF_STORE_HOURS) &&
dp->m_hasType == (DT_DOW|DT_RANGE_DOW) )
return 1;
// store hours should not telescope to kitchen hours
// for blackbirdbuvette.com
if ( (DD->m_flags & DF_STORE_HOURS) &&
(dp->m_flags & DF_KITCHEN_HOURS) )
return 0;
// and kitchen hours to kitchen hours is bad too i guess
// for blackbirdbuvette.com
if ( (DD->m_flags & DF_KITCHEN_HOURS) &&
(dp->m_flags & DF_KITCHEN_HOURS) )
return 0;
// stop "9:15 and 11:00 AM [[]] Sunday [[]] Oct 18, 1:15PM" for
// abqcsl.org
if ( (DD->m_hasType & DT_LIST_TOD) &&
!(dp->m_hasType & DT_LIST_TOD) &&
(dp->m_hasType & DT_TOD) )
return 0;
// allow "Monday, November 23, 2009" to go to "Mon | Tue | Wed" for
// mrmovietimes.com (and vice versa)
//if ( (di->m_hasType & DT_DOW) &&
// (dp->m_type == (DT_DOW|DT_LIST_DOW)) )
// return 1;
// stop "9:00am - 5:00pm [[]] February 2010" for the
// smithsonianmag.com url where February 2010 occurs
// below the hours in a different section
//if ( DD->m_hasType == (DT_TOD|DT_RANGE_TOD) &&
// dp->m_hasType == (DT_COMPOUND|DT_MONTH|DT_YEAR) &&
// dp->m_a > DD->m_a )
// return 0;
//if ( dp->m_hasType == (DT_TOD|DT_RANGE_TOD) &&
// DD->m_hasType == (DT_COMPOUND|DT_MONTH|DT_YEAR) &&
// DD->m_a > dp->m_a )
// return 0;
// . this is unneccsary and wrong for burtstikiloung.com
// . stops "21 [[]] November 2009 [[]] Sat" which prevents us from
// then telescoping to the store hours i think
// . is this hurting "July 19, 2010 [[]] *Sunday* [[]] noon - 5:00pm"
// for woodencow.com??? we need to telescope to store hours now
//if ( (DD->m_hasType & DT_DAYNUM) && dp->m_type == DT_DOW )
// return 0;
// for santafeplayhouse stop
// "Dec 10, 17, at 8:00pm [[]] Fridays: Dec 11, 18, Jan 1, at 8:00pm"
if ( (DD->m_hasType & DT_LIST_DAYNUM) &&
(dp->m_hasType & DT_LIST_DAYNUM) )
return 0;
// for santafeplayhouse stop
// "Dec 10, 17, at 8:00pm [[]] Wednesday: Dec 30 at 8:00pm"
if ( (DD->m_hasType & DT_LIST_DAYNUM) &&
(dp->m_hasType & DT_DAYNUM) &&
// allow "December 10, 2009 - January 3, 2010" to be header
// for santefeplayhouse.org
!(dp->m_hasType & DT_RANGE_ANY) )
return 0;
// stop "Thursday, Feb 10 7:30p [[]] 2010-11" for
// www.zvents.com/san-jose-ca/events/show/159288785-in-the-mood-a-
// 1940s-musical-revue
//if ( (DD->m_hasType & DT_DOW) &&
// (DD->m_hasType & DT_DOM) &&
// (dp->m_hasType & DT_RANGE_YEAR) )
// return 0;
// for santafeplayhouse stop
// "Dec 31, 8:00pm to 12:30am [[]]
// Sundays: Dec 13, 20, 27, Jan 3, at 2:00pm"
datetype_t simple1 = (DT_MONTH|DT_DAYNUM|DT_TOD|DT_YEAR);
if ( (DD->m_hasType & simple1) == (DT_MONTH|DT_DAYNUM|DT_TOD) &&
(dp->m_hasType & simple1) == (DT_MONTH|DT_DAYNUM|DT_TOD) )
return 0;
// stop "1-11 [[]] Sun Noon to 6pm" for collectorsguide.com
if ( di->m_type == DT_RANGE_DAYNUM )
return 0;
// for blackbirbuvette.com stop
// "Monday thru Friday 5pm - 8 pm [[]] 5th & 6th"
if ( dp->m_type == DT_LIST_DAYNUM )
return 0;
// stop "18th and 19th [[]] Fridays and Saturdays at 8pm" for
// piratecateradio.com
if ( di->m_type == DT_LIST_DAYNUM && !(dp->m_hasType & DT_MONTH) )
return 0;
// fix "Thanksgiving [[]] Jan & Sept" for collectorsguide.com
if ( di->m_type == DT_HOLIDAY &&
// covers xmas, etc. but not "every day"
// MDW di->m_num <= HD_SPECIFIC_HOLIDAY_MAX &&
(dp->m_hasType & DT_MONTH ) &&
!(dp->m_hasType & DT_TOD) &&
!(dp->m_hasType & DT_YEAR) )
return 0;
// . a daynum can only use a header date that contains a month!
// . fixes "1 [[]] Monday - Friday 9:00 am.-3:00 pm" for
// unm.edu url
if ( DD->m_type == DT_DAYNUM && !(dp->m_hasType & DT_MONTH) )
return 0;
// stop "December 6, 7:30PM [[]] November 2009"
if ( DD->m_month >= 1 &&
dp->m_month >= 1 &&
DD->m_month != dp->m_month )
return 0;
// stop those daynums in burtstikilounge.com from calling is
// compatible so much!
if ( dp->m_dayNum >= 1 &&
DD->m_dayNum >= 1 &&
dp->m_dayNum != DD->m_dayNum )
return 0;
// stop "Monday nights [[]] 8 [[]] 9:30 p.m [[]] 9:30 [[]] 10:30"
// from salsapower.com
if ( (DD->m_hasType & DT_TOD) && dp->m_type == DT_TOD )
return 0;
// stop "Tuesday Night [[]] 7:30-10:30pm [[]] 2:30-4:30pm" for
// abqfolkfest.org
if ( (DD->m_hasType & DT_RANGE_TOD) &&
(dp->m_hasType & DT_RANGE_TOD) &&
// and dp is NOT adding anything new. this fixes soul power
// on publicbroadcasting for "November 23 - 27 on * Monday 4:00PM-12:00AM
(DD->m_hasType & dp->m_hasType) == dp->m_hasType )
return 0;
// stop "[[]] thanksgiving 2008" but allow "on thanksgiving 2008"
if ( dp->m_hasType == (DT_HOLIDAY|DT_YEAR|DT_COMPOUND) &&
!(dp->m_suppFlags & SF_ON_PRECEEDS) )
return 0;
// stop telescoping to "thanksgiving" but allow "on thanksgiving"
if ( dp->m_hasType == DT_HOLIDAY &&
!(dp->m_suppFlags & SF_ON_PRECEEDS) )
return 0;
// "1st and 15th of each month [[]] last day of the month" for unm.edu
// . crap! this kills
// "Saturday morning from 10:00 am - noon [[]] March 19, 2005" for
// http://www.patpendergrass.com/albnews.html because we think of
// "morning" as a holiday, so let's fix that
/*
if ( (DD->m_hasType & DT_HOLIDAY) &&
(dp->m_hasType & DT_DAYNUM ) )
return 0;
if ( (dp->m_hasType & DT_HOLIDAY) &&
(DD->m_hasType & DT_DAYNUM ) )
return 0;
*/
// TODO:
// if its a closed date it can't telescope outside of its section
// or outside of its sentence section!!!
// . stop "March/April [[]] THURSDAYS, 7:30-8:30pm ..." for panjea.org
// . month names should only be headers...
// . no, abqtango.org has "7:30-10:30pm" on "10/4 10/18 ..."
//if ( di->m_hasType == (DT_MONTH )) return 0;
//if ( di->m_hasType == (DT_MONTH|DT_LIST) ) return 0;
//if ( di->m_hasType == (DT_MONTH|DT_RANGE) ) return 0;
// . a range of DOWs does not like a daynum type
// . crap, this stops "5 [[]] until Midnight [[]] November 2009"
// in burtstikilounge.com from pairing up with the store hours
//if ( (DD->m_hasType & DT_RANGE_DOW) && di->m_dayNum >= 1 )
// return 0;
// crap, this hurts
// www.thewoodencow.com/2010/07/19/a-walk-on-the-wild-side/ from
// telescoping the store hours to the "September 3" reception date
if ( (DD->m_hasType & DT_RANGE_DOW) && dp->m_dayNum >= 1 &&
// no, no, this hurts "8 am. Mon - Fri [[]] March 15 - Oct. 15"
// for unm.edu... so add this exception in i guess
! ( dp->m_hasType & DT_RANGE_MONTHDAY ) &&
// so to fix woodencow.com:
! ( DD->m_flags & DF_STORE_HOURS) )
return 0;
// stop "Every Monday from 7:45-9 pm [[]] Monday November 23, 2009"
if ( (DD->m_suppFlags & SF_EVERY) &&
(dp->m_hasType & DT_DAYNUM) &&
// allow "Every Monday from 7:45-9 pm [[]] Apr 19 - May 20, 2008"
!(dp->m_hasType & DT_RANGE_MONTHDAY ) &&
// allow "Every Monday from 7:45-9 pm [[]] Apr 19 - 23, 2008"
!(dp->m_hasType & DT_RANGE_DAYNUM ) )
return 0;
// stop "December 10, 2009 - January 3, 2010 [[]] Dec 10, 17, at 8:00p"
// from santefeplayhouse, just because doesn't make sense
if ( di->m_hasType ==(DT_DAYNUM|DT_MONTH|DT_YEAR|DT_RANGE|DT_COMPOUND))
return 0;
// . if in the same table, must either be in same row or column
// . no! because people pad tables up with various cells so a
// date might only be in one column when it is intended for all. well
// when we see that we will fix it.
if ( di->m_tableCell &&
dp->m_tableCell &&
di->m_tableCell->m_tableSec &&
di->m_tableCell->m_tableSec == dp->m_tableCell->m_tableSec &&
di->m_tableCell->m_colNum != dp->m_tableCell->m_colNum &&
di->m_tableCell->m_rowNum != dp->m_tableCell->m_rowNum )
return 0;
// stop "Fridays, 7:00 PM [[]] Tuesday, June 02, 2000"
// http://www.abqfolkfest.org/resources.shtml
// because it has "Last Updated Tuesday, June 02, 2000"
// BUT this hurts us on "5:00 PM[[]]Mon[[]]28[[]]Sep" for meetup.com
//if ( (DD->m_hasType & DT_TOD) &&
// (DD->m_hasType & DT_DOW) &&
// (dp->m_hasType & DT_DAYNUM) &&
// (dp->m_hasType & DT_DOW) )
// return 0;
// stop "9am-5pm [[]] Tue-Sun [[]] September 24, 2007" ETC.
// for http://www.collectorsguide.com/ab/abmud.html
// CRAP! this hurts santafeplayhouse's
// "Saturdays: Dec 12, 19, Jan 2, at 8:00pm" from going to
// "December 10, 2009 - January 3, 2010"
//if ( ( dp->m_hasType & DT_DAYNUM) &&
// ( dp->m_hasType & DT_MONTH ) &&
// ( dp->m_hasType & DT_YEAR ) &&
// ( (DD->m_hasType & DT_DOW ) ||
// (DD->m_hasType & DT_HOLIDAY) ) )
// return 0;
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
// stop "every day of the week, 9am-5pm [[]] Saturdays and Sundays"
// for http://www.collectorsguide.com/ab/abmud.html
if ( (DD->m_hasType & specialTypes ) &&
// fix "6:30-7:30pm Weekend[[]]4[[]]April 2011 *M*T*W..."
// so Weekend doesn't stop telescope to "April 2011-M-T-W-R-F-S"
!(dp->m_hasType & DT_MONTH) &&
(dp->m_hasType & DT_DOW) )
return 0;
// fix "Sunday, Nov 29 2:00p [[]] In the summer of 2001" for
// http://events.kgoradio.com/san-francisco-ca/events/show/88047269-san-francisco-symphony-chorus-sings-bachs-christmas-oratorio
if ( (DD->m_hasType & DT_DAYNUM) &&
(dp->m_hasType & DT_SEASON) )
return 0;
// stop "Every 3rd Thursday [[]] Every Last Thursday of the Month"
// for http://www.rateclubs.com/clubs/catch-one_14445.html
suppflags_t rmask =
SF_FIRST|
SF_LAST|
SF_SECOND|
SF_THIRD|
SF_FOURTH|
SF_FIFTH;
if ( (DD->m_suppFlags & rmask) &&
(dp->m_suppFlags & rmask) &&
(DD->m_suppFlags & rmask) !=
(dp->m_suppFlags & rmask) )
return 0;
// DF_CLOSE_DATE can never be the basis of a date! can only be
// headers for a non-close date
//if ( di->m_flags & DF_CLOSE_DATE )
// return 0;
// . non-close dates can NOT telescope to close dates now
// . events.cpp should just use the close dates in the events
// section! it can subtract them from the intervals of the open
// dates.
if ( ! ( DD->m_flags & DF_CLOSE_DATE ) &&
( dp->m_flags & DF_CLOSE_DATE ) )
return 0;
// likewise a close date cannot telescope to a non-close date
if ( ! ( dp->m_flags & DF_CLOSE_DATE ) &&
( DD->m_flags & DF_CLOSE_DATE ) )
return 0;
// stop "Thanksgiving [[]] Christmas"
if ( di->m_type == DT_HOLIDAY && dp->m_type == DT_HOLIDAY )
return 0;
// for http://www.collectorsguide.com/ab/abmud.html prevent
// "10am [[]] 3-12 [[]] Mondays"
if ( dp->m_type == DT_RANGE_DAYNUM && (DD->m_hasType & DT_DOW) )
return 0;
// . for salsapower.com fix "Tuesdays [[]] 1 [[]] 7 - 8:30 p.m"
// . no! we should recognize that 1 as part of an address now
// that we set addresses part way through setting dates
// . and also that hurt us on "5:00 PM[[]]Mon[[]]28[[]]Sep"
// for meetup.com
//if ( ( DD->m_hasType & DT_DOW ) && dp->m_type == DT_DAYNUM )
// return 0;
// . for salsapower.com fix "1 [[]] Tuesdays [[]] 7 - 8:30 p.m"
// . no fuxs up calendars that need "1 [[]] November 2009"
//if ( di->m_type == DT_DAYNUM )
// return 0;
// . quick empty intersection checks
// . fix "first Wednesday of each month [[]] Every Sunday before 1pm"
if ( DD->m_dow >= 0 && dp->m_dow >= 0 &&
DD->m_dow != dp->m_dow &&
// but for southgatehouse.com this thinks the band "Sunday Valley"
// is all Sundays and it can't telescope to "Friday, Jul 30" which
// is the correct date. thus only apply this constraint if the
// DOW we already have is strong, i.e. meaning it is not in
// a potentially incorrect format like "Sunday Valley" which is
// considered a weak DOW
(DD->m_flags & DF_HAS_STRONG_DOW) )
return 0;
// get dp/di section
//Section *s1 = m_sections->m_sectionPtrs[dp->m_a];
//Section *s2 = m_sections->m_sectionPtrs[di->m_a];
Section *s1 = dp->m_section;
Section *s2 = di->m_section;
Date *dlast = NULL;
if ( DD && DD->m_numPtrs >= 2 ) dlast = DD->m_ptrs[DD->m_numPtrs-1];
// "di" and its corresponding last2 represent the first date in
// the telscope. to fix "December 2009 [[]] 12/01 [[]] 4pm" for
// 770kob.com we need to also check out the next to last date ptr
// if DD is not di and has 2 or more ptrs to it!
Section *s3 = NULL;
if ( dlast ) s3 = dlast->m_section;
// kill s3 if same section as s2
if ( s3 == s2 ) s3 = NULL;
// if we have DF_CLOSE_DATE set we are a date when the venue is
// closed, and we are not allowed to telescope outside out sentence!
// so blow up until we hit our sentence section.
// this fixes unm.edu's:
// "Closed the 1st and 15th of each month; last day of the month
// closes at 2:30 pm." where we were getting
// "1st and 15th of each month [[]] 2:30pm"
if ( (DD->m_flags & DF_CLOSE_DATE) && s1 != s2 ) {
// telecsope up
for ( ; s2 ; s2 = s2->m_parent )
if ( s2->m_flags & SEC_SENTENCE ) break;
// . if we do not include dp, bail
// . we can equal it as well
if ( s1 != s2 && ! s2->contains(s1) )
return 0;
// reset s2
s2 = di->m_section;
}
// save them
//Section *orig1 = s1;
//Section *orig2 = s2;
// if they initially contain each other, then they are compatible
//if ( s1->contains ( s2 ) ) return 1;
//if ( s2->contains ( s1 ) ) return 1;
/*
// loop over all text sections owned
for ( int32_t k = last1->m_a ; k < last1->m_b ; k++ ) {
// need a tid
if ( ! m_tids[k] ) continue;
// get its smallest containing section
Section *sp = m_sections->m_sectionPtrs[k];
// get its content hash
int32_t ch = sp->m_contentHash;
// hash that
if ( ! ct.addKey (
// skip if in date
if ( m_bits[k] & D_IS_IN_DATE ) continue;
// otherwise, that's bad, we can't be a header
return 0;
}
*/
// . fix for http://www.salsapower.com/cities/us/newmexico.htm
// . if last1 and last2 are brothers in the same list
// . crap, but we got " monday nights 7pm" so we have to
// be able to telescope from one item to another in the list
/*
if ( s1 == s2 &&
// must not be in same item in same container
last1 != last2 &&
// and the same tagHash (section type)
last1->m_tagHash == last2->m_tagHash ) {
// now if both have an email, phone number or
// place or other items that might indicate they are
// describing different things, then "dp" cannot be a header
// date.
//if ( last->m_numPlaces > 0 ) return 0;
//if ( last->m_numAddresses > 0 ) return 0;
// then basically they are like rows in the same table
// so for s1 to be a header it must contain only date text
// in its section, otherwise we might be mistaking it
// for another event or something!
// so scan for words not in a date
for ( int32_t k = last1->m_a ; k < last1->m_b ; k++ ) {
// skip if in date
if ( m_bits[k] & D_IS_IN_DATE ) continue;
// otherwise, that's bad, we can't be a header
return 0;
}
}
*/
bool subRange = false;
/*
if ( ( dp->m_hasType & DT_RANGE_TOD ) &&
( di->m_hasType & DT_RANGE_TOD ) &&
di->m_minTod >= dp->m_minTod &&
// folkmads.org potluck is actually after the event tod range!
// aw, that sux... even if we allow it to be after it still
// gets empty times when doing the intersection. so we'd have
// to change that as well.
di->m_maxTod <= dp->m_maxTod )
subRange = true;
*/
// you can not put two different tod ranges together... strange
// ** no, might have a subrange of tods
if ( ( dp->m_hasType & DT_RANGE_TOD ) &&
( di->m_hasType & DT_RANGE_TOD ) &&
// it's ok if subrange though i guess
! subRange &&
// allow "Every Sunday before 1pm" to telescope to "Tue-Sun 9-5"
// for http://www.collectorsguide.com/ab/abmud.html
! ( di->m_flags & DF_ONGOING ) &&
! ( dp->m_flags & DF_ONGOING ) )
return 0;
// allow store hours headers all the time
/*
// use his telescope if he got one
// fixes "8pm-2am [[]]Mon-Sat" for
// burtstikilounge.com url
Date *te = dp;
if ( te->m_telescope ) te = te->m_telescope;
// get it
datetype_t hdt = te->m_hasType;
// mask it out
hdt &= DT_DOW |
DT_TOD |
DT_DAYNUM |
DT_MONTH |
DT_YEAR |
DT_RANGE_DOW |
DT_RANGE_TOD ;
// . if we have a dow range, do not do this
// . fixes blackbirdbuvette.com kitchen hours mixing with store hours
if ( DD->m_hasType & DT_RANGE_DOW ) hdt = 0;
// . or if base date has a single DOW and a tod range
// . fixes "Every Tuesday 7-9pm [[]] Fri. + Sat. until Midnight" for
// blackbirdbuvette.com
// . TODO: make an exception to this later for half open tod range
// intervals like "after 11am" or "until 2pm", those will need to
// pair up with store hours
if ( DD->m_hasType & (DT_RANGE_TOD|DT_DOW) == (DT_RANGE_TOD|DT_DOW) )
hdt = 0;
// skip if date is store hours
if ( hdt ==
( DT_DOW |
DT_TOD |
DT_RANGE_DOW |
DT_RANGE_TOD ) )
return 1;
// or if just a single dow and tod
// range, that is ok, but watch out
if ( hdt ==
( DT_DOW |
DT_TOD |
DT_RANGE_TOD ) )
return 1;
*/
bool check = true;
// STORE HOURS header exception.
// allow "Every Sunday before 1pm" to telescope to "Tue-Sun 9-5" for
// http://www.collectorsguide.com/ab/abmud.html
// well, not quite we get
// "6 am. Mon. - Sat. [[]] Mon. - Fri. 8 am. - 2 pm." for
// http://www.unm.edu/~willow/homeless/services.html if we do this,
// because it is basically a list of store hours!!!
// so just set check to false i guess...
// really we need to identify store hours as being in a separate
// unique section i guess... at least unique relative to telescoping
// di up, incase we have a list of different stores' hours...
if ( ( dp->m_hasType & DT_RANGE_TOD ) &&
( dp->m_hasType & DT_RANGE_DOW ) )
// this was returning true, but it caused unm.edu which
// is basically a list of headers to telescope to headers
// in different sections in that list of headers
check = false;
// but if di already has a tod range we do not need store hours
if ( (DD->m_hasType & DT_RANGE_TOD ) )
check = true;
// if intersection is empty, obviously can not be header!!
// fix Saturday 9:30-4pm [[]] Mon-Fri 9:30-5pm for
// http://www.collectorsguide.com/ab/abmud.html
// you can mix types if one is closed!
//if ( dp->m_flags & DF_CLOSE_DATE )
// check = false;
// allow "8:30 to midnight" to team up with
// "Wednesdays at 7:00 p.m" for salsapower.com because its
// tod fits in dp's tod range.
// actually do not try to fit TOD ranges because of things like:
// " Dance Sunday 4:30 to 5:45 p.m. Another intermediate class
// at 6 p.m. too"
if ( ( di->m_type == DT_RANGE_TOD || di->m_type == DT_TOD ) &&
(dp->m_hasType & DT_DOW) &&
(dp->m_hasType & DT_TOD) )
check = false;
// . if you are telescoping to kitchen hours, check it!
// . fixes "Sun. - Thur. until 10pm [[]] Monday thru Friday 5pm - 8 pm"
// which is kitchen hours telescoping to happy hour
// for blackbirdbuvette.com
if ( dp->m_flags & DF_KITCHEN_HOURS )
check = true;
// mask out range, list, composite
datetype_t hasType = dp->m_hasType;
// so that "2:00 pm" would telescope to "last day of the month" for
// unm.edu url. treat it like a daynum.
if ( dp->m_type == DT_SUBMONTH && dp->m_num == HD_MONTH_LAST_DAY )
hasType = DT_DAYNUM;
if ( dp->m_type == DT_SUBMONTH && dp->m_num == HD_MONTH_FIRST_DAY )
hasType = DT_DAYNUM;
// now that we are super symmetric...
datetype_t ditype3 = di->m_hasType;
if ( di->m_type == DT_SUBMONTH && di->m_num == HD_MONTH_LAST_DAY )
ditype3 = DT_DAYNUM;
if ( di->m_type == DT_SUBMONTH && di->m_num == HD_MONTH_FIRST_DAY )
ditype3 = DT_DAYNUM;
// but if its a DOW and matches us then that is ok
if ( ! exclude )
check = false;
// add DT_DOW for "every Wednesday from
// 3:00pm to 4:00 pm, starting
// September 9 and continuing through
// December 9."
// so that "every Wednesday" is telescoped to
// for http://www.dailylobo.com/calendar/
// . add DT_LIST_DOW so for mrmovietimes.com "Monday, Nov 23, 2009"
// would telescope to "Mon | Tue | Wed ..." and we can dedup that
// date format with "11:15am, 2:00, 4:50, 7:45, 10:30 [[]]
// Mon | Tue | Wed | Thu | Fri | Sat | Sun [[]]
// Monday, November 23, 2009" which does include the dow menu/list
hasType &= DT_DOW|DT_MONTH|DT_DAYNUM|DT_YEAR|DT_TOD|DT_LIST_DOW;
// all the date types we've accumulated so far
datetype_t accum = 0;
if ( DD ) accum = DD->m_hasType;
// or in what we got
accum |= ditype3; // di->m_hasType;
// mask that out
accum &= hasType;
// do not check if DD has all dp's date types, except dp has
// DF_ONGOING set! allows:
// "Every 3rd Thursday [[]]before 9PM" to telescope to "7:30PM-11:30PM"
// for http://www.rateclubs.com/clubs/catch-one_14445.html
if ( dp->m_flags & DF_ONGOING ) check = false;
if ( DD->m_flags & DF_ONGOING ) check = false;
// do not telescope if the header has all our date types, with the
// store hours exception above which we return 1 for
// MDW: added "hasType != 0" because we had a single "halloween" for
// the "dp" which made hasType 0... this should fix
// http://www.wpd4fun.org/Events/Halloween.htm
if ( hasType != 0 && accum == hasType && check )
return 0;
int32_t ret = isCompatible2 ( s1 , s2 , true );
// return -1 on error with g_errno set
if ( ret == -1 ) {
if ( ! g_errno ) { char *xx=NULL;*xx=0;}
return -1;
}
if ( ret == 0 ) return 0;
// crap! problem with this algo is on panjea.org we have
// "shona language class 8:30-9pm" that needs to telescope to
// the TUESDAYS in the section above it, but the section above it
// also contains a TOD, so we need to stop this...
//return 1; -- make exception for br sect w/ same parent!!
if ( s1->m_parent == s2->m_parent &&
to_lower_a(m_wptrs[s1->m_a][1])=='b' &&
to_lower_a(m_wptrs[s1->m_a][2])=='r' &&
s1->m_tagHash == s2->m_tagHash )
return 1;
// fix "Friday, Feb. 11 at 7:30 p.m." going to "2/2/2011 - 2/09/2011"
// which would make it empty times for http://www.denver.org/events/2fo
// r1tix?utm_source=onsite&utm_medium=rightrail&utm_campaign=D&D_2for1
if ( DD->m_dayNum > 0 &&
(dp->m_hasType & DT_RANGE) &&
(DD->m_hasType & DT_MONTH) &&
(DD->m_hasType & DT_MONTH) &&
di->m_month == dp->m_month &&
dp->m_maxDayNum < DD->m_dayNum )
return 0;
//
// if dates are in the same section do not do this logic below!
//
if ( dp->m_section == di->m_section ) return 1;
// or if dates are both in sections and have same parents
// likewise, do not do this logic below. this should allow
// "8:30 to midnight" to telescope to "Wednesdays at 7:00 p.m."
// in salsapower.com, because they are in different br sections,
// and actually Wednesdays is in a font subsection! so maybe we
// need to ignore font,b,br,i sections and telescope out until we
// hit a section that is not one of those! yeah!!!
// -- we could extend this to tags too to fix abqcsl.org --
// -- but what would that break??? --
// . crap, but this is allowing "Dec 31, 8:00pm to 12:30am" to
// telescope to "Thursday" which is in a section that already has
// "Dec 10, 17, at 8:00pm" for santafeplayhouse.org
// . well, let's keep this rule then but set a flag, "inSame"
// and mask out all but a month and daynum then. that way if
// our biggest section has a month and daynum and so does his, then
// the dates are not compatible... that should fix
// santafeplayhouse.org and at the same time let the tods continue
// to pair up with each other where applicable.
bool inSame = ( dp->m_hardSection == di->m_hardSection );
//if ( dp->m_hardSection == di->m_hardSection ) return 1;
/*
this is not worth it to just get romy keegan on panjea.org to
use the storehours/tod header in the brother section above it
// . s1 or s2 is the smallest section that contains both dp and di
// . this should ultimately replace the acc1/acc2 algorithm
// . the idea is is that if two or more headers of consisting of
// the same basic date types (dow/tod/month/daynum/year) exist
// in s1/s2 then there is some ambguity and we do not pick either!
// . NO! messes up guildcinema which has multiple headers for the
// date header but you are supposed to pick the one right above you.
// . also messes up facebook.com and we lose the one event we had
// because for the same reason i guess
if ( ! s1 ) { char *xx=NULL;*xx=0; }
// only do this for dp being store hours
if ( storeHours ) {
// basic type mask
datetype_t mask1;
mask1=DT_DOW|DT_MONTH|DT_DAYNUM|DT_YEAR|DT_TOD|DT_RANGE_ANY;
// get a header date in the s1 section
int32_t slot = ht->getSlot(&s1);
// loop over all dates that telescoped up to that section
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&s1 ) ) {
// breathe
QUICKPOLL(m_niceness);
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *hd = m_datePtrs[pn];
// skip if us
if ( hd == di ) continue;
if ( hd == dp ) continue;
// if its a dup then break, and return 0
if ( (hd->m_hasType & mask1)==(dp->m_hasType & mask1) )
break;
}
// if we had another date in s1 with the same basic date types
// as hd then let's stop!
if ( slot >= 0 ) return 0;
// otherwise, it's a unique store hours header
return 1;
}
*/
// double crap, texas drums has stuff like:
// "TUESDAYS: 5:30-7pm" in one row
// and "7:30-9pm" in the next row and it is supposed to refer
// to TUESDAYS...
// maybe try another approach?
//return 1;
Section *last1 = s1;
Section *last2 = s2;
Section *last3 = s3;
// blow up "s1" until we hit last section that does NOT contain "s2"
for ( ; s1 ; s1 = s1->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// if s1 contains s2 then stop
if ( s1->contains ( di->m_section ) ) break;
// . stop if contains a piece of our telescope too
// . fixes July ... [[]] noon - 5:00pm [[]] * Sunday *"
// for woodencow.com otherwise acc3 & acc1 == acc3 because
// acc1 grows to contain acc3 unless we stop it here
if ( s3 && s1->contains ( s3 ) ) break;
// assign this if doesn't contain it yet
last1 = s1;
// . do not telescope to date in a menu
// . fixes http://www.residentadvisor.net/event.aspx?221238
// which is telescoping a "no rentry after 4am" to an
// event name in the menu: "2nd Sunday Nyc wit..".
//if ( s1->m_flags & SEC_MENU )
// return false;
}
// blow up "s2" until we hit last section that does NOT contain "s1"
for ( ; s2 ; s2 = s2->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// if s2 contains header date then stop
if ( s2->contains ( dp->m_section ) ) break;
// assign this if doesn't contain it yet
last2 = s2;
// . do not telescope FROM date in a menu
// . fixes http://www.residentadvisor.net/event.aspx?221238
// which is telescoping a "no rentry after 4am" to an
// event name in the menu: "2nd Sunday Nyc wit..".
//if ( s2->m_flags & SEC_MENU )
// return false;
}
// blow up "s3" until we hit last section that does NOT contain "s1"
for ( ; s3 ; s3 = s3->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// if s3 contains header date then stop
if ( s3->contains ( dp->m_section ) ) break;
// assign this if doesn't contain it yet
last3 = s3;
}
// if you are brothers, check for SEC_TOD_EVENT bit
//if ( last1->m_parent == last2->m_parent &&
// // do not allow telescoping into a brother if we are basically
// // a list of events
// (last1->m_flags & SEC_TOD_EVENT) &&
// // allow brothers that are headings though
// !(last1->m_flags & SEC_HEADING_CONTAINER) &&
// // or would have been heading containers
// !(last1->m_flags & SEC_NIXED_HEADING_CONTAINER) )
// return 0;
// if one date's section contains the other
if ( last1->contains(last2) ) return 1;
if ( last2->contains(last1) ) return 1;
//
// . fix "Monday through Friday, 8 a.m.-4:30 p.m [[]] The Fall [[]] "
// Fall 2011" for www.advising.ufl.edu
// . do not telescope to seasons unless they are clear headers for us
//
// 1. season header must be in same sentence as di
if ( ( dp->m_hasType == (DT_SEASON|DT_YEAR|DT_COMPOUND) ||
dp->m_hasType == (DT_SEASON) ) &&
last1 != last2 )
return false;
// this allowed "July 19, 2010 [[]] noon - 5:00pm" to telescope
// further to "Wednesday - Saturday" which it shouldn't have, it
// because it already went to Sunday..
//if ( last3 && last3->contains(last1) ) return 1;
/*
// . try to fix www.woodencow.com
// . the times in its hour section where telscoping to
// "July 19, 2010" in the list of dates below it.
// . there is nothing too wrong with that if 1) each store hour
// date telescoped to each "calls for art" monthday date
// and 2) each monthday date telscoped to each hours date.
// . but for now rather than do multiple headers we just return
// false... TODO: fix
// . telescope header section thdr until it has a brother
// that has the same date types in it
// . if thdr has such a brother and they are in a container that
// does not contain di->m_section then do not telescope to them
// because they are probably not related to s2
// . and s2 must be outside this list
// . so the dates in the list can't be header dates then
// . WELL WE CAN indeed telescope to them, but we must telescope
// to EACH ONE separately... i.e. MULTIPLE HEADER ALGO
datetype_t dmask = DT_RANGE_TOD | DT_RANGE_DOW;
// get the list/container of headers, if any
Section *thdr = dp->m_section;
// telescope until hits header brother section, if any...
for ( ; thdr ; thdr = thdr->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// stop if contains di section already
if ( thdr->contains ( di->m_section ) ) break;
// check brothers
Section *bro = thdr->m_prevBrother;
// get header dates in there
int32_t slot ;
// come up here for "next" bro as well
subloop:
// get it
if ( bro ) slot = ht->getSlot(&bro);
else slot = -1;
// scan dates to see if one matches us
for (; slot >= 0 ; slot = ht->getNextSlot(slot,&bro) ) {
// breathe
QUICKPOLL(m_niceness);
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dx = m_datePtrs[pn];
// sanity check
if ( dx == dp ) { char *xx=NULL;*xx=0; }
// mask our DT_RANGE_TOD and DT_RANGE_DOW
// same type as us? then break
if ( (dx->m_hasType & ~dmask) ==
(dp->m_hasType & ~dmask) ) break;
}
// try next if we need too
if ( slot < 0 && bro != thdr->m_nextBrother ) {
bro = thdr->m_nextBrother;
goto subloop;
}
// telescope up more if no hits
if ( slot < 0 ) continue;
// set this
*hasMultipleHeaders = true;
// ok, we got a container of headers
//return 0;
}
*/
//int32_t slot = -1;
/*
// now loop over all dates in each section and see what types we got
if ( thdr ) slot = ht->getSlot ( &thdr ); // last1 );
// loop over all dates that telescoped up to this sec.
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&thdr)) { //last1) ) {
// breathe
QUICKPOLL(m_niceness);
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dx = m_datePtrs[pn];
// skip if our guy
if ( dx == dp ) continue;
// skip if our section
if ( dx->m_section == dp->m_section ) continue;
// get the type
dateflags_t ht = dx->m_hasType;
// take out telescope and ranges
ht &= ~(DT_TELESCOPE|DT_RANGE_TOD|DT_RANGE_DOW);
// make exception if its a list of tods/dows because
// those could be store hours!!
// see www.thewoodencow.com
// which has two lists, one is store hours, the others is
// days of the month.
if ( dx->m_hasType & DT_TOD ) continue;
// for "July 19, 2010 [[]] Wednesday - Saturday [[]] 10am-6pm"
// for www.thewoodencow.com
if (dx->m_hasType == (DT_DOW) ) continue;
if (dx->m_hasType == (DT_DOW|DT_TOD) ) continue;
// otherwise, its a brother section, so we are a list
// separate from last2
return 0;
}
*/
// ----> break Dates.cpp:7710 if DD->m_numPtrs==2 && DD->m_ptrs[0]->m_a==1978 && dp->m_a==1574
/*
// convert a holiday like thanksgiving to a month and daynum
datetype_t ditype2 = di->m_hasType;
//if ( di->m_type==DT_HOLIDAY && (di->m_suppFlags&SF_NORMAL_HOLIDAY)){
// MDW: will this be good enough now that holiday is actually a holiday
if ( di->m_type & specialTypes ) { // di->m_type == DT_HOLIDAY
ditype2 &= ~specialTypes; // ~DT_HOLIDAY;
ditype2 |= DT_MONTH|DT_DAYNUM;
}
// furthermore we have to fix southgatehouse.com which has a band
// named "Sunday Valley" playing on a friday, and it was unable
// to telescope to that "Friday" because of the constraint below
// that since ditype (the last date ptr in DD) was "Sunday" and
// "Friday" was in the date header we were trying to telescope to,
// we ended up returning false, so, to fix that, ignore weak DOWs
// for this purpose
if ( (di->m_flags & DF_HAS_WEAK_DOW) &&
!(di->m_flags & DF_HAS_STRONG_DOW) )
ditype2 &= ~DT_DOW;
*/
// if last1 and last2 have the same tag hash then the header date
// must be ABOVE the base date. this doesn't fix anything i know of
// but it was originally intended for abqtango.org
//if( last1->m_tagHash == last2->m_tagHash && last1->m_a > last2->m_a )
// return 0;
dateflags_t acc1 = last1->m_dateBits;
dateflags_t acc2 = last2->m_dateBits;
dateflags_t acc3 = 0; if ( last3 ) acc3 = last3->m_dateBits;
/*
slot = -1;
// now loop over all dates in each section and see what types we got
if ( last1 ) slot = ht->getSlot ( &last1 );
// reset
dateflags_t acc1 = 0;
// loop over all dates that telescoped up to this sec.
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&last1) ) {
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dx = m_datePtrs[pn];
// get its type
datetype_t dxtype = dx->m_hasType ;
// convert a holiday like thanksgiving to a month and daynum
if ( dxtype & DT_HOLIDAY ) {
// take it out
dxtype &= ~DT_HOLIDAY;
// put something else in its place maybe
if ( dx->m_suppFlags & SF_NORMAL_HOLIDAY ) {
//dxtype |= DT_MONTH|DT_DAYNUM;
dxtype |= DT_DAYNUM;
}
// everything except generic "holiday" word
else if ( dx->m_num != HD_HOLIDAYS ) {
dxtype |= DT_DAYNUM;
}
}
// . if a date in the header date's blown up section contains
// a date, "dx", which has all the same date components as
// "di" then forget it... "dp" can't be a header for "di"
// . ultimately for st-margarets.com i put this in here to fix
// "thanksgiving" telescoping to a tod or tod range in
// another section which also contained a month/daynum pair
// but the thanksgiving's section contained additional
// date components that the header section did not and
// so the "acc1" algo below was not working since neither
// acc1 or acc2 was a proper subset of the other, BUT by
// looking at the individual dates in the header's section
// to see if they are similar to "di" we can be even more
// accurate
// . excpetion: do not do this if inSame is true
if ( (dxtype & ditype2) == ditype2 && ! inSame )
return 0;
// skip if isolated daynum - they are noisy and often wrong
// . no, this stops "JAN [[]] 8:00PM" from being
// "JAN [[]] 28 [[]] 8:00PM" for
// http://www.reverbnation.com/venue/448772
//if ( dx->m_type == DT_DAYNUM ) continue;
// skip if used in loop above! (in s1 contains s2)
//if ( dx->m_flags & DF_USED3 ) continue;
// accumulate the date types
acc1 |= dxtype;//dx->m_hasType;
}
slot = -1;
// now loop over all dates in each section and see what types we got
if ( last2 ) slot = ht->getSlot ( &last2 );
// reset
dateflags_t acc2 = 0;
// loop over all dates that telescoped up to this sec.
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&last2) ) {
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dx = m_datePtrs[pn];
// get its type
datetype_t dxtype = dx->m_hasType ;
// convert a holiday like thanksgiving to a month and daynum
if ( dxtype & DT_HOLIDAY ) {
// take it out
dxtype &= ~DT_HOLIDAY;
// put something else in its place maybe
if ( dx->m_suppFlags & SF_NORMAL_HOLIDAY ) {
//dxtype |= DT_MONTH|DT_DAYNUM;
dxtype |= DT_DAYNUM;
}
// everything except generic "holiday" word
else if ( dx->m_num != HD_HOLIDAYS ) {
dxtype |= DT_DAYNUM;
}
}
// skip if isolated daynum - they are noisy and often wrong
//if ( dx->m_type == DT_DAYNUM ) continue;
// accumulate the date types
acc2 |= dxtype;//dx->m_hasType;
// mark this date as used
//dx->m_flags |= DF_USED3;
}
slot = -1;
// now loop over all dates in each section and see what types we got
if ( last3 ) slot = ht->getSlot ( &last3 );
// reset
dateflags_t acc3 = 0;
// loop over all dates that telescoped up to this sec.
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&last3) ) {
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dx = m_datePtrs[pn];
// get its type
datetype_t dxtype = dx->m_hasType ;
// convert a holiday like thanksgiving to a month and daynum
if ( dxtype & DT_HOLIDAY ) {
// take it out
dxtype &= ~DT_HOLIDAY;
// put something else in its place maybe
if ( dx->m_suppFlags & SF_NORMAL_HOLIDAY ) {
//dxtype |= DT_MONTH|DT_DAYNUM;
dxtype |= DT_DAYNUM;
}
// everything except generic "holiday" word
else if ( dx->m_num != HD_HOLIDAYS ) {
dxtype |= DT_DAYNUM;
}
}
// accumulate the date types
acc3 |= dxtype;//dx->m_hasType;
}
*/
// . convert DT_DOW to DT_DAYNUM
// . this was stopping "Fridays & Saturdays at 8pm" from telescoping
// to "November 27 - December 19, 2009"
//if ( acc1 & DT_DOW ) { acc1 &= ~DT_DOW; acc1 |= DT_DAYNUM; }
//if ( acc2 & DT_DOW ) { acc2 &= ~DT_DOW; acc2 |= DT_DAYNUM; }
// . treat holidays as daynums too
// . fix so "valentine's day" does not telescope to tod range
// in above section that has a month/daynum for
// http://outside.in/places/albuquerque-mennonite-church-albuquerque
//if ( acc1 & DT_HOLIDAY ) { acc1 &= ~DT_HOLIDAY; acc1 |= DT_DAYNUM; }
//if ( acc2 & DT_HOLIDAY ) { acc2 &= ~DT_HOLIDAY; acc2 |= DT_DAYNUM; }
//if ( acc3 & DT_HOLIDAY ) { acc3 &= ~DT_HOLIDAY; acc3 |= DT_DAYNUM; }
// fix for santafeplayhouse.org from allowing one tod to telescope
// into another soft section that also has a month/daynum and use its
// dow... see above where inSame in set for more comments
if ( inSame ) {
// just look at month and daynums
if ( ! (acc1 & DT_DAYNUM ) ) return 1;
if ( ! (acc1 & DT_MONTH ) ) return 1;
if ( ! (acc2 & DT_DAYNUM ) ) return 1;
if ( ! (acc2 & DT_MONTH ) ) return 1;
}
// . if both have daynums, then do not allow
// . fixes peachpundit.com from telescoping its pubdate to the
// tod range in the article, which belongs to a month/daynum
// mentioned in the article.
// . crap this also breaks signmeup.com
// . this breaks thewoodencow.com because the store hours section
// contains a list of monthdays below it which is included in acc1
// . it no longer seems to be needed by peachpundit.com but taking
// it out caused a few anomalies. not sure if good or bad really,
// so i left this in and fixed thewoodencow.com
if ( (acc1 & DT_DAYNUM) && (acc2 & DT_DAYNUM) &&
// fix thewoodencow.com
! ( dp->m_flags & DF_STORE_HOURS) &&
// but this breaks santefeplayhouse.org from telescoping
// a daynum to a range "Dec x - Jan y", so allow range headers
// to be compatible
!(acc1 & (DT_RANGE|DT_RANGE_MONTHDAY|DT_RANGE_DAYNUM)) &&
// along the same lines allow lists
// breaks www.missioncvb.org which has
// "10:15 pm on both Friday and Saturday night" telescoping to
// "Friday, May 9 and Saturday, May 10, 2008"
!(acc1 & (DT_LIST_ANY)) )
return 0;
// if the header section we are trying to telescope to, completely
// contains the date types in our last date in the telscope, then
// do not allow it. fixes "April 2011 [[]] SUN [[]] 10 [[]] 3pm"
// where the "3pm" was actually in the same
section as
// another daynum, "3", so the "10" daynum should not have been
// allowed to telescope to it! fix for url
// http://www.zvents.com/z/las-cruces-nm/classics-performance-by-
// terrence-wilson-piano--events--129171945
// like above rule but we use "DD" not acc2...
if ( (acc1 & DT_DAYNUM) && (DD->m_hasType & DT_DAYNUM) &&
// fix thewoodencow.com
! ( dp->m_flags & DF_STORE_HOURS) &&
// if its a range of daynums, probably ok
!(acc1 & (DT_RANGE|DT_RANGE_MONTHDAY|DT_RANGE_DAYNUM)) )
return 0;
// for blackbird buvette "first Thursday" telescopes up to
// the section right above and grabs the "December 3, 12:30PM" when
// it shouldn't all because our date is "December 3, 12:00-12:00AM"
// which is a range. so make a mask now to exclude ranges, lists,
// etc. and just focus on the basic types.
dateflags_t mask = DT_DOW|DT_TOD|DT_MONTH|DT_YEAR|DT_DAYNUM;
// now include a monthday range so the header can be like
// "April 19 - November 30, 2008" and all dates in its subsection can
// telescope up to it. fixes graffiti.org
mask |= DT_RANGE_MONTHDAY;
// allow "Monday, November 23, 2009" to go to "Mon | Tue | Wed" for
// mrmovietimes.com (and vice versa)
// no, because it hurts st-margarets.com, so put an exception for
// mrmovietimes.com up above specifically for this case
//mask |= DT_LIST_DOW;
// mask out
acc1 &= mask;
acc2 &= mask;
acc3 &= mask;
// if the section under the header section has an acc2 of zero that
// basically means all the dates stemmed each other off in a subsection
// and none were able to "blow out" of that subsection. this fixes
// santfeplayhouse tuna xmas from not getting the range:
// "dec 10 2009 - jan 3 2010"
if ( acc2 == 0 ) return 1;
// not sure if we should do this one though...
// yeah because acc1 only has DT_HOLIDAY set and the mask makes it zero
// so we lose "Thanksgiving", and acc2 has a single TOD, and this
// prevents them from being compatible. fixes signmeup.com.
if ( acc1 == 0 ) return 1;
// see if one is subset of the other
if ( (acc1 & acc2) == acc1 ) return 0;
// . fix folkmads.org so tods can telescope to the month/day/tod date
// . allows "5-6 pm[[]] Saturday, July 24, 2-5 pm"
// . allows "7-10:30 pm[[]] Saturday, July 24, 2-5 pm"
// . allow a tod/todrange to telescope to another date that has a tod
// provided of course they are not datebrothers
// . this changed caused a title change for abqcsl.org but that's it
// . crap this was causing the comment tod for piratecatradio.com
// to telescope to the play time and address, so until we somehow
// are sure the tod is not a comment tod we have to leave this out
//if ( ! s3 && ( acc2 == (DT_RANGE_TOD|DT_TOD) || acc2 == DT_TOD ) )
// return 1;
// likewise, a fix for villr.com so a monthday can telescope to
// a store hours thingy
// screws up http://www.glsc.org/visit/omnimax.php?id=45
// TODO: fix better by allowing to also telescope to the
// monthday range in the parent section, then it will be ok.
//if ( ! s3 && acc2 == (DT_MONTH|DT_DAYNUM) )
// return 1;
// fix "8:30 to midnight [[]] Wednesdays at 7:00 pm" for
// salsapower.com.
if ( di->m_hasType == (DT_TOD|DT_RANGE_TOD) &&
(dp->m_hasType & DT_TOD) &&
dp->m_minTod <= di->m_minTod )
return 1;
// this hurts culturemob.com by stopping:
// "9:00pm Wed [[]] Wednesday, November 25, 2009 9:00 PM"
// but without this constraint like 15 pages are bad! this rule
// functions like our email/phone number algo, by preventing one
// event from sharing datees with another...
if ( (acc1 & acc2) == acc2 ) return 0;
// . now to fix "December 2009 [[]] 12/01 [[]] 4pm" for 770kob.com
// . if the section that contains 4pm fully contains all types in
// s3, then we should not telescope to it
if ( s3 && (acc1 & (acc2|acc3)) == (acc2|acc3) ) return 0;
// stop Sep 1 - Sep 25 [[]] Wed - Sat [[]] noon - 5pm
// for thewoodencow.com.
// s3 is "Wed - Sat" section and acc1 is "noon - 5pm" section
// the "noon - 5pm" belongs with "Sunday" not "Wed - Sat". these two
// critters are in different sections i think, so this fixes that.
//if ( s3 && (acc1 & acc3) == acc1 ) return 0;
// no, header can completely contain the last date's section because
// for adobetheater we have "sunday 2pm [[]] July " and the needs to
// telescope to "July 9th - August 1st, 2010"
//if ( s3 && (acc1 & acc3) == acc3 ) return 0;
// fix burtstikilounge.com so once we telescope to the store hours
// tod range of 8pm - 2am then we do not go on to telescope to
// a dow in the calendar, but only to the "Monday - Saturday" in
// the s3 section. so if s3 fully contains all our header section
// types, do not allow it to be telescoped to.
// so we have
// "29 [[]] November 2009 [[]] 8pm - 2am [[]] Monday - Saturday"
// which is good, but we also have the bad:
// "29 [[]] November 2009 [[]] 8pm - 2am [[]] Sun" which is bad
// because we are closed sundays! and once you telescope to those
// store hours range of "8pm - 2am" you shouldn't be allowed to
// telescope to those dows back in the calendar since
// "Monday - Saturday" is your topologically nearest dow/dowrange
// from "8pm - 2am" in the store hours sections.
if ( s3 && (acc1 & acc3) == acc1 ) return 0;
// otherwise, they are not the same elements in a list per se, so
// we can pair them together
return 1;
}
// return 0 for false, 1 for true and -1 on error
int32_t Dates::isCompatible2 ( Section *s1 , Section *s2 , bool useXors ) {
if ( s1 == s2 ) return 1;
// get our phone table, will set it if needs to
//HashTableX *pt = NULL;
// get our email table, will set it if needs to
//HashTableX *et = NULL;
// null if not used
//HashTableX *at = NULL;
//HashTableX *rt = NULL;
//if ( usePhoneTable ) pt = getPhoneTable();
// get our email table, will set it if needs to
//if ( useEmailTable ) et = getEmailTable();
// only get if requested
//if ( usePlaceTable ) at = m_addresses->getPlaceTable();
// different events have different prices
//if ( usePriceTable ) rt = getPriceTable();
// if last and last2 both have the same item like a phone number,
// email, subfield name in tags, field name with a colon after it,
// same "at the" phrase, or "at: " phrase, "location" word, cost/price,
// tod, etc. then they are not compatible. the header should
// not have such things in common with the headee, otherwise it is
// more likely just another item in a list!
int32_t phFinal1 = 0;
int32_t ehFinal1 = 0;
int32_t ahFinal1 = 0;
// . -1 indicates none, since free is a cost of "0".
// . no, now free is like 999999
int32_t priceFinal1 = 0;
// blow up "s1" until we hit last section that does NOT contain "s2"
Section *last1 = s1;
for ( Section *si = s1 ; si ; si = si->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// if si contains s2 then stop
if ( si->contains ( s2 ) ) break;
/*
if ( pt ) {
int64_t *ph = (int64_t *)pt->getValue ( &si );
if ( ph && ( phFinal1 ^ *ph ) ) phFinal1 ^= *ph;
}
// get any email hash as we go along
if ( et ) {
int64_t *eh = (int64_t *)et->getValue ( &si );
if ( eh && ( ehFinal1 ^ *eh ) ) ehFinal1 ^= *eh;
}
if ( rt ) {
int32_t *price = (int32_t *)rt->getValue ( &si );
if ( price && priceFinal1 == -1 )
priceFinal1 = *price;
}
// address table, data values are Address ptrs really
//if ( at ) {
// // these are Address indexes really
// int32_t *ah = (int32_t *)at->getValue ( &si );
// if ( ah && ( ahFinal1 ^ *ah ) ) ahFinal1 ^= *ah;
//}
// . address table, data values are Address ptrs really
// . we now try to get the address because for graffiti.org
// the Denver Botanical Gardnes was mentioned in the date
// header (last1) and had its address in last2, which was
// what the alias was referring to, but we didn't realize
// they were really the same place! so fix that with this
// new logic here.
if ( at ) {
// key mixing now
int32_t key = hash32h((int32_t)si,456789);
// these are Address indexes really
Place **pp = (Place **)at->getValue ( &key );
// get the address?
if ( pp ) {
// get that
Address *ad = (*pp)->m_address;
// assume none
int32_t h = 0;
// or alias
if ( ! ad ) ad = (*pp)->m_alias;
// or just use place hash i guess!
if ( ! ad ) h = (int32_t)*pp;
// otherwise hash up address street etc.
else {
h =(int32_t)ad->m_street->m_hash;
h^=(int32_t)ad->m_street->m_streetNumHash;
//h ^= ad->m_adm1->m_cid; // country id
//h ^= (int32_t)ad->m_adm1Bits;
//h ^= (int32_t)ad->m_cityHash;
h ^= (int32_t)ad->m_cityId32;
// sanity check
//if ( ! ad->m_adm1Bits ||
// ! ad->m_cityHash ) {
if ( ! ad->m_cityId32 ) {
//! ad->m_adm1->m_cid ) {
char *xx=NULL;*xx=0; }
}
// old way
//h = (int32_t)*pp;
// and use that now
if ( ( ahFinal1 ^ h ) ) ahFinal1 ^= h;
}
}
*/
// assign this if doesn't contain it yet
last1 = si;
}
// . if no such section, i guess we share the same section
// . i guess we are compatible then...
//if ( ! last1 ) return 1;
// get any phone number hash as we go along
if ( last1 && useXors ) {
phFinal1 = last1->m_phoneXor;
ehFinal1 = last1->m_emailXor;
priceFinal1 = last1->m_priceXor;
ahFinal1 = last1->m_addrXor;
}
int64_t phFinal2 = 0;
int64_t ehFinal2 = 0;
int64_t ahFinal2 = 0;
int32_t priceFinal2 = 0;
// blow up "s2" until we hit last section that does NOT contain "s2"
Section *last2 = s2;
for ( Section *si = s2 ; si ; si = si->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// if si contains s1 then stop
if ( si->contains ( s1 ) ) break;
/*
// get any phone number hash as we go along
if ( pt ) {
int64_t *ph = (int64_t *)pt->getValue ( &si );
if ( ph && ( phFinal2 ^ *ph ) ) phFinal2 ^= *ph;
}
// get any email hash as we go along
if ( et ) {
int64_t *eh = (int64_t *)et->getValue ( &si );
if ( eh && ( ehFinal2 ^ *eh ) ) ehFinal2 ^= *eh;
}
if ( rt ) {
int32_t *price = (int32_t *)rt->getValue ( &si );
if ( price && priceFinal2 == -1 )
priceFinal2 = *price;
}
// address table, data values are Address ptrs really
//if ( at ) {
// // these are Address indexes really
// int32_t *ah = (int32_t *)at->getValue ( &si );
// if ( ah && ( ahFinal2 ^ *ah ) ) ahFinal2 ^= *ah;
//}
// address table, data values are Address ptrs really
if ( at ) {
// key mixing now
int32_t key = hash32h((int32_t)si,456789);
// these are Address indexes really
Place **pp = (Place **)at->getValue ( &key );
// get the address?
if ( pp ) {
// get that
Address *ad = (*pp)->m_address;
// assume none
int32_t h = 0;
// or alias
if ( ! ad ) ad = (*pp)->m_alias;
// or just use place hash i guess!
if ( ! ad ) h = (int32_t)*pp;
// otherwise hash up address street etc.
else {
h =(int32_t)ad->m_street->m_hash;
h^=(int32_t)ad->m_street->m_streetNumHash;
//h ^= ad->m_adm1->m_cid; // country id
//h ^= (int32_t)ad->m_adm1Bits;
//h ^= (int32_t)ad->m_cityHash;
h ^= (int32_t)ad->m_cityId32;
// sanity check
//if ( ! ad->m_adm1Bits ||
// ! ad->m_cityHash ) {
if ( ! ad->m_cityId32 ) {
//! ad->m_adm1->m_cid ) {
char *xx=NULL;*xx=0; }
}
// old way
//h = (int32_t)*pp;
// and use that now
if ( ( ahFinal2 ^ h ) ) ahFinal2 ^= h;
}
}
*/
// assign this if doesn't contain it yet
last2 = si;
}
// get any phone number hash as we go along
if ( last2 && useXors ) {
phFinal2 = last2->m_phoneXor;
ehFinal2 = last2->m_emailXor;
priceFinal2 = last2->m_priceXor;
ahFinal2 = last2->m_addrXor;
}
// likewise, sanity check
//if ( ! last2 ) return 1;
// . if not brothers do not bother with this algo really
// . fixes santafeplayhouse's December 10, 2009 - January 3, 2010
// date range which has a location "at Widgetbox" in it which
// was preventing it from being a header!
//if ( last1->m_tagHash != last2->m_tagHash ) return 1;
if ( phFinal1 && phFinal2 && phFinal1 != phFinal2 )
return 0;
if ( ehFinal1 && ehFinal2 && ehFinal1 != ehFinal2 )
return 0;
if ( ahFinal1 && ahFinal2 && ahFinal1 != ahFinal2 )
return 0;
if ( priceFinal1 && priceFinal2 && priceFinal1 != priceFinal2 )
return 0;
//////////////////////////////////
//
// subfield detection
//
// . compare fields between last1 and last2, if they have some
// tags that have exactly the same text phrase in them
// those are probably fields.
//
//////////////////////////////////
if ( last1 == last2 ) return 1;
if ( last1->contains(last2) ) return 1;
if ( last2->contains(last1) ) return 1;
//return 1;
// . hash each subsection's tagHash and content into here
// for subfield detection
// . if s1 and s2 share one or more such hash then they are not
// compatible
HashTableX *sft = getSubfieldTable();
// now make the subfield table map a section ptr to a bit array
// (32 bits initially) where each bit stands for some field that
// is repeated. then if last1 and last2 have a bit in common that
// means they have a field in common and are not compatible
// use new method for testing against old
int32_t *bits1 = (int32_t *)m_bitTable.getValue(&last1);
int32_t *bits2 = (int32_t *)m_bitTable.getValue(&last2);
bool compat = true;
int32_t ni = m_numLongs; // InBitTable;
if ( ! bits1 || ! bits2 ) ni = 0;
for ( int32_t i = 0 ; i < ni ; i++ ) {
if ( bits1[i] & bits2[i] ) { compat = false; break; }
}
// new code only for now
return compat;
// accumulate subfield hashes into this table, we will get a list
// of them and we have to compare lists
HashTableX cmp1;
char cbuf1[130000];
// just init it, fast and does not allocate
cmp1.set(4,0,256,cbuf1,130000,false,m_niceness,"dates-cmp1");
// for log
int64_t start = gettimeofdayInMilliseconds();
// now for section "last1" get range of all subsections to scan
//for(int32_t i = last1->m_sortedIndex ; im_numSections;i++){
for ( Section *si1 = last1 ; si1 ; si1 = si1->m_next ) {
// this section may have hashed multiple keys if it had
// multiple fields in it
//Section *si1 = m_sections->m_sorted[i];
// stop if this section not contain in last1
if ( si1->m_a >= last1->m_b ) break;
// scan last1 and all all text sections into cmp1
int32_t slot1 = sft->getSlot ( &si1 );
for ( ; slot1 >= 0 ; slot1 = sft->getNextSlot(slot1,&si1) ) {
// breathe
QUICKPOLL(m_niceness);
// get its tagHash^contentHash value
int32_t h = *(int32_t *)sft->getValueFromSlot(slot1);
// add to table, just the key
if ( ! cmp1.addKey ( &h ) ) {
if ( ! g_errno ) { char *xx=NULL;*xx=0; }
return -1;
}
}
}
int64_t took = start - gettimeofdayInMilliseconds();
// for log
if ( took > 2 ) log("dates: CHECK subfield took %"INT64" ms",took);
// do the same subsection scan for last2
//for( int32_t i = last2->m_sortedIndex;im_numSections;i++) {
for ( Section *si2 = last2 ; si2 ; si2 = si2->m_next ) {
// this section may have hashed multiple keys if it had
// multiple fields in it
//Section *si2 = m_sections->m_sorted[i];
// stop if this section not contain in last1
if ( si2->m_a >= last2->m_b ) break;
// now scan the hashes in last2 and see which are in "cmp1"
int32_t slot2 = sft->getSlot ( &si2 );
for ( ; slot2 >= 0 ; slot2 = sft->getNextSlot(slot2,&si2) ) {
// breathe
QUICKPOLL(m_niceness);
// get its tagHash^contentHash value
int32_t h = *(int32_t *)sft->getValueFromSlot(slot2);
// if this same guy is in last1, that is bad
if ( cmp1.isInTable(&h) ) {
// sanity check
if ( compat ) { char *xx=NULL;*xx=0;}
return 0;
}
}
}
// sanity check
if ( ! compat ) { char *xx=NULL;*xx=0; }
return 1;
}
#define MAXBYTES 1024
HashTableX *Dates::getSubfieldTable ( ) {
// return it if we got it
if ( m_sftValid ) return &m_sft;
// scan the sections
int32_t ns = m_sections->m_numSections ;
// for log
//log("dates: subfield start");
// just init it, fast and does not allocate
//m_sft.set(4,4,128,NULL,0,true,m_niceness);
// count what we need
int32_t needSlots = 0;
// loop it
for ( int32_t k = 0 ; k < ns ; k++ ) {
// breathe
QUICKPOLL(m_niceness);
// get section
Section *sk = &m_sections->m_sections[k];
// skip if has no text itself
if ( sk->m_flags & SEC_NOTEXT ) continue;
// get its depth
needSlots += sk->m_depth;
}
// double it for speed
needSlots *= 4;
// alloc it. return NULL with g_errno set on error
if ( ! m_sft.set(4,4,needSlots,NULL,0,true,m_niceness,"m_sft") )
return NULL;
// dup field table
HashTableX dt;
if (!dt.set(4,4,5000,NULL,0,false,m_niceness,"dupfields")) return NULL;
// maps 32bit field name hash to sections that have it directly
HashTableX hts;
if(!hts.set(4,4,5000,NULL,0,true,m_niceness,"sec-fields")) return NULL;
// loop it
for ( int32_t k = 0 ; k < ns ; k++ ) {
// breathe
QUICKPOLL(m_niceness);
// get section
Section *sk = &m_sections->m_sections[k];
// skip if has no text itself
if ( sk->m_flags & SEC_NOTEXT ) continue;
// how is this?
if ( sk->m_contentHash64 == 0 ) { char *xx=NULL;*xx=0; }
// get the tag id the delimits section if any
//int32_t a = sk->m_a;
// might not be there
//nodeid_t tid = m_tids[a];
// if it is bold, ignore it
//if ( tid == TAG_B ) continue;
// hash tag id and its content hash together
int32_t h = sk->m_contentHash64 ^ sk->m_tagHash;
// 0 is bad
if ( h == 0 ) { char *xx=NULL;*xx=0; }
// debug point
//if ( h == -508009735 ) { char *xx=NULL;*xx=0; }
// just one section now
if ( ! m_sft.addKey ( &sk , &h ) ) return NULL;
// sanity
if ( ! sk ) { char *xx=NULL;*xx=0; }
// find duplicated subfields
if ( ! dt.addTerm32 ( &h ) ) return NULL;
// map hash to section as well now for new loop below
if ( ! hts.addKey ( &h , &sk ) ) return NULL;
// test this
//continue;
// gotta add to all parents!
//for ( ; sk ; sk = sk->m_parent ) {
// // breathe
// QUICKPOLL(m_niceness);
// // . key is the section ptr!
// // . return NULL with g_errno set on error
// if ( ! m_sft.addKey ( &sk , &h ) ) return NULL;
//}
}
// now scan the words for fields preceeding colons, like "At:"
// or "Squares:"
for ( int32_t i = 0 ; i < m_nw ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not alnum
if ( ! m_wids[i] ) continue;
// colon must follow
if ( i+1>= m_nw || m_wptrs[i+1][0] != ':' ) continue;
// must not be a digit (stop 4:30pm)
if ( is_digit(m_wptrs[i][0]) ) continue;
// back up up to 4 alnum words to look for tag
int32_t kmin = i - 8;
if ( kmin < 0 ) kmin = 0;
// make hash of alnum words
int32_t h = 0;
// loop
int32_t k ; for ( k = i ; k >= kmin ; k-- ) {
// hash wids together
if ( m_wids[k] ) {
// see if its zero
int32_t newh = h ^ (uint32_t)m_wids[k];
// use that if not zero
if ( newh ) h = newh;
}
// skip if not tid
if ( ! m_tids[k] ) continue;
// got it
break;
}
// skip word if not good
if ( k < kmin ) continue;
// 0 is bad
if ( h == 0 ) { char *xx=NULL;*xx=0; }
// debug point
//if ( h == -508009735 ) { char *xx=NULL;*xx=0; }
// get section
Section *sk = m_sections->m_sectionPtrs[i];
// just one section now
if ( ! m_sft.addKey ( &sk , &h ) ) return NULL;
// sanity
if ( ! sk ) { char *xx=NULL;*xx=0; }
// find duplicated subfields
if ( ! dt.addTerm32 ( &h ) ) return NULL;
// map hash to section as well now for new loop below
if ( ! hts.addKey ( &h , &sk ) ) return NULL;
// test this
//continue;
// gotta add to all parents!
//for ( ; sk ; sk = sk->m_parent ) {
// // breathe
// QUICKPOLL(m_niceness);
// // . key is the section ptr!
// // . return NULL with g_errno set on error
// if ( ! m_sft.addKey ( &sk , &h ) ) return NULL;
//}
}
// no longer use bitnum, use a list of 32-bit hashes for the fields
// we contain. really just using sth (section to hash) table would
// be nice. or better yet just make a buffer and store a ptr into
// the section class that points into this buffer into a list of
// "bit #'s" that are on. so a list like "5,33,99" or something.
//uint64_t bitNum = 1LL;
int32_t numBits = 0;
// scan for the duplicated subfields, those are the only important
// ones. then map them to an array of bits, up to 32 bits.
for ( int32_t i = 0 ; i < dt.m_numSlots ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip empty slots
if ( dt.m_flags[i] == 0 ) continue;
// skip if not duplicated
if ( dt.getScoreFromSlot(i) <= 1 ) continue;
// count them
numBits++;
}
// reset
m_numLongs = 0;
// do not redo this logic
m_sftValid = true;
// if none, bail
if ( numBits <= 0 ) return &m_sft;
int32_t bitNum = 0;
if ( numBits > MAXBYTES*8 ) numBits = MAXBYTES*8;
int32_t numLongs = (numBits+31)/32;
// make it int32_t aligned for speed in checking intersections of
// two different bitBufs in m_bitTable
char bitBuf[MAXBYTES];
if ( numLongs*4 > MAXBYTES ) { char *xx=NULL;*xx=0; }
memset(bitBuf,0,numLongs*4);
// init this now
if ( ! m_bitTable.set(4,numLongs*4,256,NULL,0,false,m_niceness,
"subfields") )
return NULL;
// save this for checking bittable above
m_numLongs = numLongs;
for ( int32_t i = 0 ; i < dt.m_numSlots ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip empty slots
if ( dt.m_flags[i] == 0 ) continue;
// skip if not duplicated
if ( dt.getScoreFromSlot(i) <= 1 ) continue;
// get the hash (wordId limited to 32 bits)
uint32_t *h = (uint32_t *)dt.getKeyFromSlot(i);
// now what sections have that hash?
int32_t slot = hts.getSlot(h);
// must be there! dup table, dt, says so!
if ( slot < 0 ) { char *xx=NULL;*xx=0; }
// scan all sections that had this field name and make sure
// their bit table entry has the bit for this field name
for ( ; slot >= 0 ; slot = hts.getNextSlot(slot,h) ) {
// breathe
QUICKPOLL(m_niceness);
// get that section ptr
Section **skp = (Section **)hts.getValueFromSlot(slot);
// must be valid
if ( ! skp || ! *skp ) { char *xx=NULL;*xx=0;}
// get his bit array from the section ptr
char *bits = (char *)m_bitTable.getValue(skp);
// if not there add it
if ( ! bits ) {
// store it,return NULL if failed
if ( ! m_bitTable.addKey(skp,bitBuf) )
return NULL;
// get it
bits = (char *)m_bitTable.getValue(skp);
// must be there now since we added it
if ( ! bits ) { char *xx=NULL;*xx=0; }
}
// make a bitvec for this
int32_t byteOff = bitNum / 8;
char bitOff = bitNum % 8;
// set that
bits[byteOff] |= (1<= numBits ) break;
}
// . now propagate your section's bits to all your parents
// . use int32_t ptrs for speed
for ( Section *si = m_sections->m_rootSection ; si ; si = si->m_next) {
// breathe
QUICKPOLL(m_niceness);
// get our bits
char *bits = (char *)m_bitTable.getValue(&si);
// skip if none
if ( ! bits ) continue;
// otherwise telescope up
Section *sp = si->m_parent;
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// get parent bits
char *pbits = (char *)m_bitTable.getValue(&sp);
// if not there add it
if ( ! pbits ) {
// store it,return NULL if failed
if ( ! m_bitTable.addKey(&sp,bitBuf) )
return NULL;
// get it
pbits = (char *)m_bitTable.getValue(&sp);
// must be there now since we added it
if ( ! pbits ) { char *xx=NULL;*xx=0; }
// re-get this since hashtable might have
// and moved all the data around
bits = (char *)m_bitTable.getValue(&si);
}
// or in each int32_t
int32_t *dst = (int32_t *)pbits;
int32_t *src = (int32_t *) bits;
int32_t count = 0;
for ( ; count < numLongs ; count++ )
// or in each int32_t
*dst++ |= *src++;
}
}
// for log
//log("dates: subfield end");
return &m_sft;
}
// set Section::phoneXor member
void Dates::setPhoneXors ( ) {
if ( m_phoneXorsValid ) return;
m_phoneXorsValid = true;
// set it
for ( int32_t k = 0 ; k < m_nw ; k++ ) {
// breathe
QUICKPOLL(m_niceness);
// use this so we can inc it
int32_t i = k;
// skip if tag or punct word
if ( ! m_wids[i] ) continue;
// need 3 digit number followed by hyphen then 4 digits
if ( ! is_digit(m_wptrs[i][0]) ) continue;
// must be 3 int32_t
if ( m_wlens[i] != 3 ) continue;
// skip that
if ( ++i >= m_nw ) break;
// this must have a hyphen
if ( ! m_words->hasChar( i, '-' ) &&
// or could be 505.866.0715
! m_words->hasChar( i, '.' ) )
continue;
// skip that
if ( ++i >= m_nw ) break;
// need 3 digit number followed by hyphen then 4 digits
if ( ! is_digit(m_wptrs[i][0]) ) continue;
// must be 3 int32_t
if ( m_wlens[i] != 4 ) continue;
// we got one!
int64_t h64 = m_wids[i-2] ^ m_wids[i];
// only need 32 bits
int32_t h32 = (int32_t)h64;
// get section
Section *sp = m_sections->m_sectionPtrs[k];
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// propagate
sp->m_phoneXor ^= h32;
if ( ! sp->m_phoneXor ) sp->m_phoneXor = h32;
}
}
}
// set Section::emailXor member
void Dates::setEmailXors ( ) {
if ( m_emailXorsValid ) return;
m_emailXorsValid = true;
// set it
for ( int32_t i = 0 ; i < m_nw ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if NOT punct word
if ( m_wids[i] ) continue;
if ( m_tids[i] ) continue;
// int16_tcut
char *p = m_wptrs[i];
// skip if not @ sign
if ( *p != '@' ) continue;
// must be single char
if ( m_wlens[i] != 1 ) continue;
// scan that punct word
char *pmin = p - 30;
char *pmax = p + 30;
if ( pmin < m_wptrs[0] ) pmin = m_wptrs[0];
char *wend = m_wptrs[m_nw-1] + m_wlens[m_nw-1];
if ( pmax > wend ) pmax = wend;
// scan left
char *left = p - 1 ;
for ( ; left >= pmin ; left-- ) {
// stop if we hit non name char
if ( is_alnum_a (*left) ) continue;
if ( *left == '.' ) continue;
if ( *left == '-' ) continue;
if ( *left == '_' ) continue;
break;
}
// now the right for the subdomain
char *right = p + 1;
for ( ; right < pmax ; right++ ) {
// stop if we hit non domain char
if ( is_alnum_a (*right) ) continue;
if ( *right == '.' ) continue;
if ( *right == '-' ) continue;
break;
}
// left starts with punct usually... unless hit pmin?
if ( ! is_alnum_a(*left) ) left++;
// stop if failed
if ( right - p < 3 ) continue;
if ( p - left < 1 ) continue;
// hash it up
int64_t h32 = hash32Lower_utf8 ( left , right - left );
// if a not found, keep scanning
if ( h32 == 0LL ) continue;
// get section
Section *sp = m_sections->m_sectionPtrs[i];
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// propagate
sp->m_emailXor ^= h32;
if ( ! sp->m_emailXor ) sp->m_emailXor = h32;
}
}
}
// . set price xors
// . use this two determine when two different sections are talking about
// different events - the idea being they might list two different prices
// . include prices like "free" or references to prices, like "pay what u want"
void Dates::setPriceXors ( ) {
if ( m_priceXorsValid ) return;
m_priceXorsValid = true;
int32_t price;
// init?
static bool s_init56 = false;
static int64_t h_free;
if ( ! s_init56 ) {
s_init56 = true;
h_free = hash64n("free");
}
// set it
for ( int32_t k = 1 ; k < m_nw ; k++ ) {
// breathe
QUICKPOLL(m_niceness);
// use this so we can inc it
int32_t i = k;
// skip if tag or punct word
if ( ! m_wids[i] ) continue;
// might be free
if ( m_wids[i] == h_free ) {
// but we need hard punt on either side
if ( i+1isSpaces(i+1) ) continue;
if ( m_words->isSpaces(i-1) ) continue;
price = 99999999;
goto addToTable;
}
// need 3 digit number followed by hyphen then 4 digits
if ( ! is_digit(m_wptrs[i][0]) ) continue;
// must have a dollar sign before
if ( m_wptrs[i][-1] != '$' ) continue;
// get as number, ignore after floating point
price = m_words->getAsLong(i);
// jump here
addToTable:
// hash that price
int32_t h32 = (int32_t)m_wids[i];
// we got one!
//int64_t h = m_wids[i-2] ^ m_wids[i];
// get section
Section *sp = m_sections->m_sectionPtrs[k];
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// propagate
sp->m_priceXor ^= h32;
if ( ! sp->m_priceXor ) sp->m_priceXor = h32;
}
}
}
// set Section::m_todXor
void Dates::setTODXors ( ) {
if ( m_todXorsValid ) return;
m_todXorsValid = true;
// set it
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// skip if not in body
if ( di->m_a < 0 ) continue;
// . skip if pub date
// . fixes trumba.com so pub date does not count as a todxor
// and cause eventbrothers in an rss item which makes us
// lose a lot of the event description
if ( di->m_flags & DF_PUB_DATE ) continue;
// skip if registration date
if ( di->m_flags & DF_REGISTRATION ) continue;
if ( di->m_flags & DF_NONEVENT_DATE ) continue;
if ( di->m_flags5 & DF5_IGNORE ) continue;
// require a tod
if ( ! (di->m_hasType & DT_TOD) ) continue;
// skip if like in format:
// "Feb 22, 2011 9:00 AM - Feb 24, 2011 4:00 PM"
// call that DF_RANGE_DAYNUMTOD format
if ( di->m_hasType & DT_RANGE_TIMEPOINT ) continue;
// do not venture into telescope section
//if ( di->m_type == DT_TELESCOPE ) break;
// if we are a telescope, get the first date in telescope
if ( di->m_type == DT_TELESCOPE ) {
// must telescope TO a tod in this case like
// burtstikilounge.com
//if ( di->m_ptrs[0] &
// "24 [[]] November 2009 [[]] 8pm - 2am [[]]
// Monday - Saturday" for burtstikilounge.com
di = di->m_ptrs[0];
}
// telescopes are not fuzzy
else {
// skip if fuzzy
if ( di->m_flags & DF_FUZZY ) continue;
}
/*
// get its hash
int32_t a = di->m_a;
int32_t b = di->m_b;
// skip if not in body
if ( a < 0 ) continue;
char *sa = m_wptrs[a];
char *sb = m_wptrs[b-1] + m_wlens[b];
int64_t h = hash64 ( sa , sb - sa );
*/
// try this now
uint64_t h = di->m_dateHash64;
// make sure not zero
if ( h == 0LL ) { char *xx=NULL;*xx=0; }
// set section::todxor
Section *sp = di->m_section;
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// propagate
sp->m_todXor ^= h;
if ( sp->m_todXor == 0LL ) sp->m_todXor = h;
}
}
}
// . set Section::m_dayXor
// . for dates that telescope to a TOD but do not have a tod in their
// first date...
// . really JUST to fix calendar formats like burtstikilounge.com which have
// a monthdaynum in the table cell which we need to call an eventbrother,
// but it contains no tod per se
// . "24 [[]] November 2009 [[]] 8pm - 2am [[]]
// Monday - Saturday" for burtstikilounge.com
void Dates::setDayXors ( ) {
if ( m_dayXorsValid ) return;
m_dayXorsValid = true;
// set it
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// if we are a telescope, get the first date in telescope
if ( di->m_type != DT_TELESCOPE ) continue;
// skip if pub date
if ( di->m_flags & DF_PUB_DATE ) continue;
//if ( di->m_flags & DF_COMMENT_DATE ) continue;
// skip if registration date
if ( di->m_flags & DF_REGISTRATION ) continue;
if ( di->m_flags & DF_NONEVENT_DATE ) continue;
if ( di->m_flags5 & DF5_IGNORE ) continue;
// require a tod somewhere in the telescope
if ( ! (di->m_hasType & DT_TOD) ) continue;
// get first date in telescope
Date *fd = di->m_ptrs[0];
// tod not allowed as first date, it should be covered
// by the todxor logic
if ( fd->m_hasType & DT_TOD ) continue;
// . and base date must be daynum
// . this means this is really only meant for calendars
// like you see on burtstikilounge.com...
if ( fd->m_hasType != DT_DAYNUM ) continue;
// get its hash
int32_t a = fd->m_a;
int32_t b = fd->m_b;
// skip if not in body
if ( a < 0 ) continue;
char *sa = m_wptrs[a];
char *sb = m_wptrs[b-1] + m_wlens[b];
int64_t h = hash64 ( sa , sb - sa );
// set section::m_dayXor
Section *sp = fd->m_section;
// telescope up!
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// propagate
sp->m_dayXor ^= h;
if ( sp->m_dayXor == 0LL ) sp->m_dayXor = h;
}
}
}
// . hash all possible fields owned by a section
// . just hash all the fragments
// . a fragment is a string of words delimeted by a tag or . : ( ) [ ]
// or a date!
// . the period must be ending a sentence...
/*
HashTableX *Dates::getFieldTable ( ) {
// . return it if we already computed it
// . useful because Events.cpp needs us as well as Dates.cpp when
// calling Dates::isCompatible(), which Events.cpp also callsw
if ( m_ftValid ) return &m_ft;
// just init it, fast and does not allocate
m_ft.set(4,4,0,NULL,0,false,m_niceness);
// declare this
HashTableX *tt = NULL;
int64_t h = 0LL;
// set it
for ( int32_t i = 0 ; i < m_nw ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if NOT punct word
if ( ! m_wids[i] ) continue;
// scan the words for fields...
// hash each field's hash into this section and all
// parent sections until we hit a section containing a TOD
}
return &m_ft;
}
*/
//
// i would like to use this for the santefeplayhouse problem instead of
// using getFirstParentOfType() function.
//
//
// wednesday: dec 30 at 8pm
// Dec 11th
//
// Where "Dec 11th" telescopes to "8pm" when it shouldn't, but since
// it is in a section that contains then isCompatible() does not apply.
// we could probably fix it so that "dp" will "blow up" until it reaches
// the section containing it, "div1" in the example.
// telescope up date "dd" until it hits a section containing a date equal to dt
/*
Date *Dates::getFirstParentOfType ( Date *di, Date *last , HashTableX *ht) {
// get our section
Section *pp = m_sections->m_sectionPtrs[di->m_a];
Date *best = NULL;
// loop over "pp" and all its parents
for ( ; pp ; pp = pp->m_parent ) {
// pick the one *right above* us
int32_t slot = ht->getSlot ( &pp );
// loop over all dates that telescoped up to this sec.
for ( ; slot >= 0 ; slot = ht->getNextSlot(slot,&pp) ) {
// get it
int32_t pn = *(int32_t *)ht->getValueFromSlot(slot);
// get the date index
Date *dp = m_datePtrs[pn];
// skip if me
if ( dp == di ) continue;
// if us, return NULL
if ( dp == last ) return NULL;
// get the best
if ( dp->m_hasType != last->m_hasType ) continue;
// set best
best = dp;
}
if ( best ) return best;
}
return best;
}
*/
bool Dates::addRanges ( Words *words , bool allowOpenEndedRanges ) {
char **wptrs = words->getWords ();
int32_t *wlens = words->getWordLens ();
int64_t *wids = words->getWordIds ();
nodeid_t *tids = words->getTagIds ();
// do not create ranges of ranges or lists
dateflags_t skipFlags = DT_LIST_ANY | DT_RANGE_ANY;
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
for ( int32_t i = 0 ; i < m_numDatePtrs - 1 ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if url date
if ( di->m_a < 0 ) continue;
//
// first see if this is an open-ended range
//
// get previous two words
int32_t pre = di->m_a - 1;
// set a min
int32_t min = di->m_a - 10;
if ( min < 0 ) min = 0;
// . previous word must be in same sentence otherwise
// "No cover before 9PM - $5 After" "Every Sunday:..."
// puts the "after" with the "every sunday" for
// http://www.rateclubs.com/clubs/catch-one_14445.html
// . we do not have sections at this point unfortunately so we
// can not check the sentence section...
// . backup over tags and punct
char brcount = 0;
for ( ; pre > min ; pre-- ) {
QUICKPOLL(m_niceness);
if ( wids[pre] ) break;
if ( ! tids[pre] ) continue;
if ( ! isBreakingTagId(tids[pre]) ) continue;
if ( ++brcount >= 2 ) break;
}
// get word before that
int32_t pre2 = pre - 1;
for ( ; pre2 > min ; pre2-- ) {
QUICKPOLL(m_niceness);
if ( wids[pre2] ) break;
if ( ! tids[pre2] ) continue;
if ( ! isBreakingTagId(tids[pre2]) ) continue;
if ( ++brcount >= 2 ) break;
}
// zero it out if not there
if ( pre2 <= min ) pre2 = -1;
// do not check for open ended ranges if we shouldn't
if ( ! allowOpenEndedRanges ) pre = min;
// skip if nothing
if ( pre > min && wids[pre] ) {
dateflags_t of = 0;
bool leftpt = false;
if ( wids[pre] == h_thru ) of = DF_ONGOING ;
if ( wids[pre] == h_through ) of = DF_ONGOING ;
if ( wids[pre] == h_though ) of = DF_ONGOING ; //misp
if ( wids[pre] == h_before ) of = DF_ONGOING ;
if ( wids[pre] == h_until ) of = DF_ONGOING ;
if ( wids[pre] == h_til ) of = DF_ONGOING ;
if ( wids[pre] == h_till ) of = DF_ONGOING ;
if ( wids[pre] == h_after ) {
leftpt = true; of = DF_ONGOING ; }
// "begins|starts Nov 1" for unm.edu
if ( di->m_hasType==(DT_MONTH|DT_DAYNUM|DT_COMPOUND)&&
( wids[pre] == h_starts ||
wids[pre] == h_begins ) ){
leftpt = false; of = DF_ONGOING ; }
// "begins|starts on Nov 1"
if ( di->m_hasType==(DT_MONTH|DT_DAYNUM|DT_COMPOUND)&&
wids[pre] == h_on &&
pre2 >= 0 &&
( wids[pre2] == h_starts ||
wids[pre2] == h_begins ) ){
leftpt = false; of = DF_ONGOING ; }
if ( of ) {
// through the summer means in the summer
// TODO: use a subset of specialTypes here
if ( ! ( di->m_type & specialTypes ) )
di->m_flags |= of;
// special flag for tods
if ( di->m_type == DT_TOD ) {
if ( leftpt )
di->m_flags |= DF_AFTER_TOD;
else
di->m_flags |= DF_BEFORE_TOD;
// remove exact flag
di->m_flags &= ~DF_EXACT_TOD;
}
//di->m_flags |= DF_RANGE;
di->m_a = pre;
// might have
// "begins nov 1 and runs though dec 12" so we
// can't give up on the connection now, we
// might hight "through" next
if ( leftpt ) continue;
}
// "ends at june 20th" "ends on june 20th"
if ( (wids[pre] == h_at ||
wids[pre] == h_on ) &&
pre2 >= 0 &&
(wids[pre2] == h_ends ||
wids[pre2] == h_concludes ||
wids[pre2] == h_conclude) ) {
di->m_flags |= DF_ONGOING;
di->m_a = pre2;
if ( di->m_type == DT_TOD )
di->m_flags |= DF_BEFORE_TOD;
// remove exact flag
di->m_flags &= ~DF_EXACT_TOD;
continue;
}
}
// fix "schedule varies from weekday to weekend" because
// it was causing a core in the addInterval logic because
// the weekend is like endpoints for the weekday
if ( di->m_type == DT_SUBWEEK ) continue;
// get the neighbor to the right now
int32_t j = i + 1;
// int16_tcut
Date *dj = NULL;
// advance while ptr is NULL
for ( ; j < m_numDatePtrs ; j++ ) {
// assign
dj = m_datePtrs[j];
// skip modifiers like "through"
//if ( dj && dj->m_type == DT_MOD ) continue;
// skip if empty
if ( dj ) break;
}
// forget it if still empty
if ( ! dj ) break;
// skip if url date, try another di then
if ( dj->m_a < 0 ) continue;
// . must be the same type
// . exception: jan 3 to dec 4 2009 (ignore year)
bool good = false;
if ( (di->m_hasType & ~DT_YEAR) == (dj->m_hasType & ~DT_YEAR) )
good = true;
// also allow "Nov - Dec 5, 2008" (graffiti.org)
if ( di->m_hasType == DT_MONTH &&
dj->m_hasType == (DT_MONTH|DT_DAYNUM|DT_YEAR|DT_COMPOUND))
good = true;
// also allow "Nov - Dec 5" (???)
if ( di->m_hasType == DT_MONTH &&
dj->m_hasType == (DT_MONTH|DT_DAYNUM|DT_COMPOUND))
good = true;
// if not a good fit, skip this potential range right now
if ( ! good ) continue;
// skip if either is a list
if ( di->m_hasType & skipFlags ) continue;
if ( dj->m_hasType & skipFlags ) continue;
// must be in ascending order! be it a dow,dom,month, year...
//if ( di->m_num < dj->m_num ) continue;
// get the word range between the two atomic dates
int32_t a = di->m_b;
int32_t b = dj->m_a;
// too many words in between? forget it!
if ( b - a > 20 ) continue;
// count the associated alnums
int32_t alnumcount = 0;
// get word before us. looking for "between X and Y" phrase
if ( pre > min && wids[pre] == h_between )
alnumcount++;
else
pre = di->m_a;
// init j for the scan of the junk between the two dates
int32_t k = a;
// count em
int32_t andcount = 0;
bool hyphen = false;
int64_t prevWid = 0LL;
bool brokenRange = false;
int32_t badWords = 0;
// scan what is between them to determine if is a range!
for ( ; k < b ; k++ ) {
// count em
if ( wids[k] ) alnumcount++;
// word? allow "to" for like "5 to 6pm"
// word? allow "to" for like "aug 9 to aug 12"
if ( wids[k] == h_to ||
wids[k] == h_through ||
wids[k] == h_though || // misspelling
wids[k] == h_before ||
wids[k] == h_thru ||
// "from nov 6 until nov 9"
wids[k] == h_until ||
wids[k] == h_til ||
wids[k] == h_till ||
// "starting nov 6 and ongoing until dec 7"
wids[k] == h_ongoing ) {
hyphen = true;
continue;
}
// ends at: y
if ( wids[k] == h_ends ) {
hyphen = true;
prevWid = h_ends;
brokenRange = true;
continue;
}
// end: y
if ( wids[k] == h_end ) {
hyphen = true;
prevWid = h_end;
brokenRange = true;
continue;
}
// ends at: OR end at:
if ( wids[k] == h_at && prevWid == h_ends ) continue;
if ( wids[k] == h_at && prevWid == h_ends ) continue;
// fix activedatax.com:
// "start date: 10/7/2011 start time: 6:00pm
// end date: 10/7/2011 end time: 11:59pm"
if ( wids[k] == h_date && prevWid == h_end ) continue;
// facebooks's ....
if ( tids[k] == (TAG_FBSTARTTIME | BACKBIT) )
continue;
if ( tids[k] == TAG_FBENDTIME ) {
hyphen = true;
//brokenRange = true;
continue;
}
// . this are not hyphens but they are transparent
// . "starting nov 6 and continuing through dec 7"
if ( wids[k] == h_continuing ) continue;
if ( wids[k] == h_lasting ) continue;
if ( wids[k] == h_runs )
continue;
if ( wids[k] == h_lasts ) continue;
if ( wids[k] == h_and ) {
andcount++;
continue;
}
// all other words break it
//if ( wids[k] ) break;
// no, might be an event end time field
if ( wids[k] ) { badWords++; continue; }
// tag ok for microsoft front page
//if ( tids[k] == TAG_BR ) continue;
//if ( tids[k] == TAG_I ) continue;
//if ( tids[k] == TAG_B ) continue;
// all others, stop. unless non-breaking tag or a
// because i've seen
// 8:00 - 10:30pm. for abqtango.org
if ( tids[k] ) {
// ok if not breaking
if ( ! isBreakingTagId(tids[k]) ) continue;
// br is ok for microsoft front page
if ( tids[k] == TAG_BR ) continue;
// is ok for ci.tualatin.or.us which
// has "starts at: x ends at: y"
if ((tids[k]&BACKBITCOMP) == TAG_TD ) continue;
if ((tids[k]&BACKBITCOMP) == TAG_TR ) continue;
// sometimes they use xml-ish tags
// like mcachicago.org
if ((tids[k]&BACKBITCOMP) == TAG_XMLTAG )
continue;
// break on all else
break;
}
// check this out
//if ( wlens[k] > 3 ) break;
// only allow space or hyphen for single char punct
char *p = wptrs[k];
char *pend = p + wlens[k];
for ( ; p < pend ; p++ ) {
// space is ok
if ( is_wspace_utf8(p) ) continue;
// hyphen is ok
if ( *p == '-' ) {
hyphen = true;
continue;
}
// : is ok to fix "ends at:"
if ( *p == ':' && brokenRange ) continue;
// period is ok ("sun. thru thur.")
if ( *p == '.' ) continue;
// allow comma to fix carnegieconcerts.com
// type "12pm, - 5pm"
if ( *p == ',' ) continue;
/*
// utf8 hyphen from unm.edu url
// no longer needed since XmlDoc.cpp now
// converts all utf8 hyphens into ascii
if ( p[0] == -30 &&
p[1] == -128 &&
p[2] == -109 ) {
p += 2;
hyphen = true;
continue;
}
*/
// . crazy utf8 space
// . www.trumba.com/calendars/KRQE_Calendar.rss
/*
if (wptrs[k][0] == -62 &&
wptrs[k][1] == -96 &&
wptrs[k][2] == '-' &&
wptrs[k][3] == -62 &&
wptrs[k][4] == -96 )
continue;
*/
// all others fail
break;
}
// all others fail
if ( p < pend ) break;
}
// just "and" by itself is not a range indicator
if ( andcount && andcount == alnumcount ) continue;
// skip if did not make it. we are not a range then.
if ( k < b ) continue;
// need a hyphen or equivalent to be a range
if ( ! hyphen ) continue;
// stop if had bad words
if ( badWords && ! brokenRange ) continue;
// if we are adding a range of daynums like "3-12" then
// scan to the left of that to see if "age" or "children"
// is before that and after the previous date...
if ( di->m_type == DT_DAYNUM && di->m_a >= 0 ) {
// get prev date
Date *prev = NULL;
for ( int32_t pi = i - 1 ; pi >= 0 ; pi-- ) {
// breathe
QUICKPOLL(m_niceness);
prev = m_datePtrs[pi];
if ( prev ) break;
}
// scan before us but do not go past "min"
int32_t min = di->m_a - 20;
// or the prev date
if ( prev && prev->m_b > min ) min = prev->m_b;
// sanity. i can equal "a"... i've seen that
// for some asian language page.
// http://www.zoneuse.com/
if ( min > di->m_a ) { char *xx=NULL;*xx=0; }
// are we an age range?
bool age = false;
// scan before us and remain in sentence and after
// the previous date...
for ( int32_t w = di->m_a - 1 ; w >= min ; w-- ) {
// breathe
QUICKPOLL(m_niceness);
if ( wids[w] == h_children ) age = true;
if ( wids[w] == h_age ) age = true;
if ( wids[w] == h_ages ) age = true;
}
// do not add this as a daynum range if its age range
if ( age ) continue;
// set a max now
int32_t max = di->m_b + 20;
// do not breach our words array
if ( max > m_nw ) max = m_nw;
// scan right for "years" "youngster 2-12 years"
for ( int32_t w = di->m_b ; w < max ; w++ ) {
QUICKPOLL(m_niceness);
// 2-12 years old
if ( wids[w] == h_years ) age = true;
// 2-12 year olds
if ( wids[w] == h_year ) age = true;
}
// do not add this as a daynum range if its age range
// or year range
if ( age ) continue;
}
// . fix "November - December 5, 2008" (graffiti.org)
// . if a daynum follows 2nd month in range, skip it
if ( di->m_type == DT_MONTH && dj->m_type == DT_MONTH ) {
// get next date after dj
Date *next = NULL;
// scan to right of dj
for ( int32_t nj = j+1 ; nj < m_numDatePtrs ; nj++ ) {
QUICKPOLL(m_niceness);
next = m_datePtrs[nj];
if ( next ) break;
}
// must be right after us
bool isdaynum = false;
// is it a daynum after us?
if ( next && next->m_type == DT_DAYNUM )
isdaynum = true;
// set max to scan
int32_t kmax = dj->m_b + 10;
// do not hit daynum
if ( next && kmax > next->m_a ) kmax = next->m_a;
// no scanning if not daynum
if ( ! isdaynum ) kmax = -1;
// and no words in between
for ( int32_t k = dj->m_b ; k < kmax ; k++ ) {
if ( ! wids[k] ) continue;
// if we hit another alnum word in between
// the dj month and daynum, then do not
// count it as a daynum for the dj month
isdaynum = false;
break;
}
// skip if it is
if ( isdaynum ) continue;
}
// use this now
Date *DD;
if ( di->m_type == DT_DOW ) {
DD = addDate (DT_RANGE_DOW,0,pre,dj->m_b,0);
if ( DD ) DD->m_dow = -1;
// set all the m_dowBits now
if ( DD ) {
int32_t dow1 = di->m_num;
int32_t dow2 = dj->m_num;
if ( dow1 < 0 ) { char *xx=NULL;*xx=0; }
if ( dow2 < 0 ) { char *xx=NULL;*xx=0; }
// fix "Tuesday through Sunday"
//if ( minDow > maxDow ) {
// int32_t tt = minDow;
// minDow = maxDow;
// maxDow = tt;
//}
for ( int32_t i = 1 ; i <= 7 ; i++ ) {
// skip if not in range
if ( dow1 <= dow2 ) {
if ( i < dow1 ) continue;
if ( i > dow2 ) continue;
}
// . strange range?
// . i.e. "Tues through Sun"
// . i.e. "Friday thru Monday"
else if ( dow1 > dow2 ) {
if ( i < dow1 && i > dow2 )
continue;
}
// sanity check
if ( i >= 8 ) { char *xx=NULL;*xx=0;}
DD->m_dowBits |= (1<<(i-1));
}
//if(minDow > maxDow ) { char *xx=NULL;*xx=0;}
}
}
else if ( di->m_type == DT_TOD ) {
DD = addDate (DT_RANGE_TOD,0,pre,dj->m_b,0);
if (DD ) DD->m_tod = -1;
}
else if ( di->m_type == DT_YEAR ) {
DD = addDate (DT_RANGE_YEAR,0,pre,dj->m_b,0);
}
else if ( di->m_hasType == (DT_MONTH|DT_DAYNUM|DT_COMPOUND) )
DD = addDate (DT_RANGE_MONTHDAY,0,pre,dj->m_b,0);
// "Nov - Dec 5"
else if ( di->m_type == DT_MONTH &&
dj->m_hasType == (DT_MONTH|DT_DAYNUM|DT_COMPOUND) )
DD = addDate (DT_RANGE_MONTHDAY,0,pre,dj->m_b,0);
// "Nov - Dec 5 2008"
else if ( di->m_type == DT_MONTH &&
dj->m_hasType==(DT_MONTH|DT_DAYNUM|DT_YEAR|
DT_COMPOUND) )
DD = addDate (DT_RANGE_MONTHDAY,0,pre,dj->m_b,0);
// nov - dec
else if ( di->m_hasType == DT_MONTH )
DD = addDate (DT_RANGE_MONTH,0,pre,dj->m_b,0);
else if ( di->m_type == DT_DAYNUM ) {
DD = addDate (DT_RANGE_DAYNUM,0,pre,dj->m_b,0);
if ( DD ) DD->m_dayNum = -1;
}
// trumba.com's
// "Friday, December 4, 1pm - Saturday, December 5, 2009, 4pm"
else if ( di->m_hasType == (DT_TOD|DT_DAYNUM|DT_MONTH|
DT_DOW|DT_COMPOUND) ) {
DD = addDate (DT_RANGE_TIMEPOINT,0,pre,dj->m_b,0);
if ( DD ) DD->m_dayNum = -1;
}
// cfa.aiany.org has
// "Feb 22, 2011 9:00 AM - Feb 24, 2011 4:00 PM" and we
// need that to NOT be an event brother so DT_RANGE_TIMEPOINT
// needs to be set
else if ( di->m_hasType == (DT_TOD|DT_DAYNUM|DT_MONTH|
DT_YEAR|DT_COMPOUND) ) {
DD = addDate (DT_RANGE_TIMEPOINT,0,pre,dj->m_b,0);
if ( DD ) DD->m_dayNum = -1;
}
// and another one just in case
else if ( di->m_hasType == (DT_TOD|DT_DAYNUM|DT_MONTH|
DT_COMPOUND) ) {
DD = addDate (DT_RANGE_TIMEPOINT,0,pre,dj->m_b,0);
if ( DD ) DD->m_dayNum = -1;
}
else
DD = addDate (DT_RANGE,0,pre,dj->m_b,0);
// return false on error
if ( ! DD ) return false;
// 1pm-2am (need to add 24 hours to 2am)
if ( di->m_type == DT_TOD &&
dj->m_num < di->m_num &&
dj->m_num < 12*3600 ) {
// no, then Saturday 5pm-2am actually gets the
// interval that is considered friday night
//dj->m_num += 24*3600;
// set this to that... for computing duration
dj->m_truncated = dj->m_num ;
// . so truncate to midnight
// . no! might be 9pm-3am
//dj->m_num = 24*3600;
//dj->m_tod = 24*3600;
//dj->m_minTod = 24*3600;
//dj->m_maxTod = 24*3600;
dj->m_num += 24*3600;
dj->m_tod += 24*3600;
dj->m_minTod += 24*3600;
dj->m_maxTod += 24*3600;
// note it. shift DEFINTION of day up by 2 hours
// if "num" was like 2am...
int32_t shiftDay = dj->m_num - 24*3600;
if ( m_shiftDay && shiftDay > m_shiftDay )
m_shiftDay = shiftDay;
else if ( m_shiftDay == 0 )
m_shiftDay = shiftDay;
// dj is implied pm then
dj->m_suppFlags |= SF_IMPLIED_AMPM;
}
// a quick fix for 12:00-12:00am, set di to noon then
if ( di->m_num == 86400 && dj->m_num == 86400 ) {
di->m_num = 12*3600;
di->m_tod = 12*3600;
di->m_minTod = 12*3600;
di->m_maxTod = 12*3600;
di->m_suppFlags |= SF_IMPLIED_AMPM;
}
// fix 12am-6pm (12am should be 0 not 86400)
if ( di->m_num == 86400 && dj->m_num < 86400 ) {
// make it midnight plus one second basically
di->m_num = 0;
di->m_tod = 0;
di->m_maxTod = 0;
di->m_minTod = 0;
}
// allow 10-5 or 9-5 to be implied
//if ( di->m_num > dj->m_num ) {
// log("hey");
//}
// and set the ptrs
DD->addPtr ( di , i , this );
DD->addPtr ( dj , j , this );
// sanity check
//if ( di->m_num == dj->m_num ) { char *xx=NULL;*xx=0; }
// force start back since first call to addPtr() sets it
DD->m_a = pre;
}
return true;
}
// . now set the m_min* dates after "until", etc.
// . we could make a dummy date, and call addPtrs on it with all
// the dates after the "until"
// . first make a dummy date based on spidered time and use that
// as the range's first endpoint
// . also check for any date with a "through" or "ongoing" before
// it and make that into a range as well
/*
void Dates::addOpenEndedRanges ( ) {
// do not create ranges of ranges or lists
dateflags_t skipFlags = DF_LIST | DF_RANGE;
//
// now look for "ongoing through Saturday, January 2, 2010" ...
// and other open ended ranges.
//
for ( int32_t i = 0 ; i < m_numDatePtrs - 1 ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if is a list or range already
if ( di->m_flags & skipFlags ) continue;
// get previous two words
int32_t pre = di->m_a - 2;
// set a min
int32_t min = di->m_a - 10;
if ( min < 0 ) min = 0;
// backup over tags and punct
for ( ; pre > min && ! m_wids[pre] ; pre-- ) ;
// skip if nothing
if ( pre == min ) continue;
// skip if not certain word
if ( m_wids[pre] != h_thru &&
m_wids[pre] != h_through &&
m_wids[pre] != h_until ) continue;
// update its start
di->m_a = pre;
// flag it as an open ended range
di->m_flags |= DF_ONGOING;
// now update the mins if min is valid
if ( di->m_minDayNum != 32
}
// must have valid spider time
if ( m_spideredTime <= 0 ) return;
// or set ongoing flag in addPtrs() addDate() and when it is set
// set the min
// parse that up
struct tm *timeStruct ;
timeStruct = localtime ( &m_spideredTime );
// now loop over all dates with DF_ONGOING set either in the above
// loop or in makeCompoundLists() and adjust the min endpoint.
// similar to addPtr()
for ( int32_t i = 0 ; i < m_numDatePtrs - 1 ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if not part of an open-ended range
if ( ! ( di->m_flags & DF_ONGOING ) ) continue;
// scan through each ptr looking for the ongoing flag
for ( int32_t j = 0 ; j < di->m_numPtrs ; j++ ) {
// int16_tcut
Date *dj = di->m_ptrs[j];
// skip if not part of an open-ended range
if ( ! ( dj->m_flags & DF_ONGOING ) ) continue;
//
// set the mins to the spidered time
// set m_minTime
tm ts;
memset(&ts, 0, sizeof(tm));
ts.tm_mon = m_minMonth - 1;
ts.tm_mday = m_minDay;
ts.tm_year = m_minYear - 1900;
m_minTime = mktime(&ts);
// integrate time of day
m_minTime += m_minTOD;
mdw left off here
}
*/
bool Dates::addLists ( Words *words , bool ignoreBreakingTags ) {
// for debug set this on MDWMDWMDW
//ignoreBreakingTags = true;
char **wptrs = words->getWords ();
int32_t *wlens = words->getWordLens ();
int64_t *wids = words->getWordIds ();
nodeid_t *tids = words->getTagIds ();
// int16_tcut
Section **sp = NULL;
if ( m_sections ) sp = m_sections->m_sectionPtrs;
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if nuked from range logic above
if ( ! di ) continue;
// skip if from url or not otherwise in the body
if ( di->m_a < 0 ) continue;
// don't make a list of DOW TODs like:
// "Sun. - Thur. until 10pm Fri. + Sat. until Midnight"
// http://blackbirdbuvette.com/
// why not????
//if ( di->m_hasType == (DT_TOD | DT_DOW) ) continue;
// don't make a list of holidays because
// "Thanksgiving, Christmas and New Year's Days"
// needs to telescope the individual holidays to
// "every day of the week, 9am-5pm" and
// "Saturdays and Sundays in summer: 9am-6pm"
// basically, each list item may need to telescope to
// a different date header...
if ( di->m_type == DT_HOLIDAY ) continue;
if ( di->m_type == DT_SUBDAY ) continue;
if ( di->m_type == DT_SUBWEEK ) continue;
if ( di->m_type == DT_SUBMONTH ) continue;
if ( di->m_type == DT_EVERY_DAY ) continue;
if ( di->m_type == DT_SEASON ) continue;
if ( di->m_type == DT_ALL_HOLIDAYS ) continue;
// never make a list of calendar daynums
if ( di->m_type == DT_DAYNUM && (di->m_flags & DF_IN_CALENDAR))
continue;
// reset this
int32_t np = 0;
Date *DD = NULL;
// init the "j" loop
int32_t j = i + 1 ;
// int16_tcut
Date *dj = NULL;
// used for list of TODs
bool lastHadAMPM = false;
// record if we had am/pm
if ( di->m_type==DT_TOD && (di->m_suppFlags & SF_HAD_AMPM) )
// was it am or pm
lastHadAMPM = true;
// make date type for di
datetype_t diHasType = di->m_hasType;
// if we have a monthday range take out DT_EVERYDAY
// so "Nov 11-12 daily 1pm" pairs with "Nov 13 2pm" in list
if ( diHasType & DT_RANGE_MONTHDAY )
diHasType &= ~DT_EVERY_DAY;
if ( diHasType & DT_RANGE_DAYNUM )
diHasType &= ~DT_EVERY_DAY;
// list of tods is same as one tod or tod range. same
// for daynums, months, etc.
diHasType &= ~DT_LIST_ANY;
diHasType &= ~DT_RANGE_ANY;
// scan over atomic dates after the month
for ( ; j < m_numDatePtrs ; j++ ) {
// get the jth atomic date
dj = m_datePtrs[j];
// skip if nuked (was endpoint of a range i guess)
if ( ! dj ) continue;
// skip if from url or not otherwise in the body
if ( dj->m_a < 0 ) continue;
// never make a list with calendar daynums
if ( dj->m_type == DT_DAYNUM &&
(dj->m_flags & DF_IN_CALENDAR))
break;
datetype_t djHasType = dj->m_hasType;
// like above, if monthday range take out DT_EVERYDAY
if ( djHasType & DT_RANGE_MONTHDAY )
djHasType &= ~DT_EVERY_DAY;
if ( djHasType & DT_RANGE_DAYNUM )
djHasType &= ~DT_EVERY_DAY;
// list of tods is same as one tod or tod range. same
// for daynums, months, etc.
djHasType &= ~DT_LIST_ANY;
djHasType &= ~DT_RANGE_ANY;
// does it match?
bool match = (djHasType == diHasType );
/*
datetype_t dt1 =
DT_TOD|
DT_DAYNUM|
DT_MONTH|
DT_COMPOUND|
DT_LISTDAYNUM;
*/
// but do allow "Dec 13,20" to match "Jan 3"
if ( ! match &&
(diHasType|DT_LIST_DAYNUM) ==
(djHasType|DT_LIST_DAYNUM) )
match = true;
// allow "Friday, May 9" + "and Saturday, May 10, 2008"
// www.missioncvb.org
if ( ! match &&
(diHasType|DT_YEAR) ==
(djHasType) )
match = true;
// allow for a list of
// "Sundays: Dec 13, 20, 27, Jan 3, at 2:00pm" and
// "Mondays: Dec 21, 28, at 8:00pm"
/*
datetype_t dt2 =
DT_TOD|
DT_DAYNUM|
DT_MONTH|
DT_COMPOUND|
DT_LISTDAYNUM;
*/
if ( ! match &&
(diHasType | DT_LIST_OTHER) ==
(djHasType | DT_LIST_OTHER) )
match = true;
// allow for a list of:
// "Tues., Wed., Thurs., 12 noon - 5 pm.;" and
// "Fri. 9 am. - 2:30 pm."
if ( ! match &&
(diHasType | DT_LIST_DOW) ==
(djHasType | DT_LIST_DOW) )
match = true;
// allow for a list of:
// 8 am. Mon - Fri, 7:30 am - 10 am Sun.
// for unm.edu
if ( ! match &&
(diHasType | DT_RANGE_TOD|DT_RANGE_DOW) ==
(djHasType | DT_RANGE_TOD|DT_RANGE_DOW) )
match = true;
// allow blackbirdbuvetter.com to do
// "Mon. - Fri. 11am - 2am" && "Sat. 12pm - 2am"
// that way the kitchen hours which are
// "Sun. - Thur. until 10pm" cam telescope to that
// list and get all the hours correct!
if ( ! match ) {
datetype_t d1 = diHasType;
datetype_t d2 = djHasType;
if ( d1 & DT_RANGE_DOW )
d1 &= ~DT_RANGE_DOW;
if ( d2 & DT_RANGE_DOW )
d2 &= ~DT_RANGE_DOW;
if ( d1 == d2 )
match = true;
}
// stop if not a day of month
if ( ! match ) break;
// TODO: possibly fix unm.edu which has:
// 9 am. - 6 pm. Mon. - Sat.
// Thur. 9 am. - 7 pm. Sun. 10 am - 4 pm. "
// and fails on gotBreak2 because dj->m_b hits the
// "Sun. 10 am..." date and not a breaking tag.
Date *dx = NULL;
for ( int32_t x = j + 1 ;
x < m_numDatePtrs;x++) {
QUICKPOLL(m_niceness);
dx = m_datePtrs[x];
if ( ! dx ) continue;
break;
}
// declare outside for loop
int32_t a;
// scan words in between date "di" and date "dj"
for ( a = di->m_b ; a < dj->m_a ; a++ ) {
// "and" is ok
if ( wids[a] == h_and ) continue;
// Monday OR Friday at 1pm
if ( wids[a] == h_or ) continue;
// "and on" is ok or "on tuesday, on wed ..."
// fixes http://www.law.berkeley.edu/140.htm
// "Monday to Saturday at 10 a.m. and on
// Sunday at 1 p.m"
if ( wids[a] == h_on ) continue;
// no other alnum words are ok
if ( wids[a] ) break;
// no tids
if ( tids[a] ) {
// anchor tag is ok though.
// fixes mrmovies.com's
// "Mon|Tue|..." menu thing
if ( !isBreakingTagId(tids[a]) )
continue;
// fix dj johnny b, has list of
// month/daynum/years one per line
///if ( skipBreakingTags ) continue;
// now allow any tag if di and dj are
// in the same sentence!
//if(sp[dj->m_a]->m_sentenceSection ==
// sp[di->m_a]->m_sentenceSection )
// continue;
// break the list
if ( ! ignoreBreakingTags ) break;
// do not string together
// "monday" "tuesday" ... when they
// are in a table heading row that
// is for a weekly schedule!!!! let's
// apply this to all date types
// though in case they are indeed
// headers in a table row.
if( //di->m_type == DT_DOW &&
//dj->m_type == DT_DOW &&
((tids[a] & BACKBITCOMP)==TAG_TD ||
(tids[a] & BACKBITCOMP)==TAG_TH )&&
di->m_section &&
dj->m_section &&
di->m_section->m_tableSec ==
dj->m_section->m_tableSec &&
di->m_section->m_rowNum >= 1 &&
di->m_section->m_rowNum ==
dj->m_section->m_rowNum )
break;
// otherwise, do not break it unless
// the list item shares the line with
// other text... like how we check
// in makeCompounds()
int32_t k;
/*
// scan to left of di
k = di->m_a - 1;
bool gotBreak1 = false;
// need a breaking tag to follow
for ( ; k >= 0 ; k-- ) {
QUICKPOLL(m_niceness);
if ( wids[k] ) break;
if ( ! tids[k] ) continue;
if (!isBreakingTagId(tids[k]))
continue;
gotBreak1 = true;
break;
}
// need a breaking tag after it before
// hitting another alnum word in order
// for it to be isolated
if ( ! gotBreak1 ) break;
*/
// same for dj, but check word after it
k = dj->m_b;
bool gotBreak2 = false;
// need a breaking tag to follow
for ( ; k < m_nw ; k++ ) {
QUICKPOLL(m_niceness);
// if we hit another date
// consider that like a
// breaking tag to fix unm.edu
if ( dx && k == dx->m_a ) {
gotBreak2 = true;
break;
}
if ( wids[k] ) break;
if ( ! tids[k] ) continue;
if (!isBreakingTagId(tids[k]))
continue;
gotBreak2 = true;
break;
}
// need a breaking tag after it before
// hitting another alnum word in order
// for it to be isolated
if ( ! gotBreak2 )
break;
// otherwise, they are both their
// own line so let them bond.
continue;
}
// allow any punct now to fix unm.edu which
// uses a ';' between
// "Tues., Wed., Thurs., 12 noon - 5 pm" and
// "Fri. 9 am. - 2:30 pm"
// i think this is even weaker than allowing
// breaking tags in, so we should be ok.
if ( ignoreBreakingTags ) {
// get sentence containing di
Section *n1 = sp[di->m_a];
Section *n2 = sp[dj->m_a];
n1 = n1->m_sentenceSection;
n2 = n2->m_sentenceSection;
// if either not in sentence, forget it
// Happens if in javascript i guess
// and we got no sections
if ( ! n1 ) break;
if ( ! n2 ) break;
// get sections
Section *s1 = sp[di->m_a];
Section *s2 = sp[dj->m_a];
// blow up
for ( ; s1 ; s1 = s1->m_parent ) {
if ( ! s1->m_parent ) break;
if (s1->m_parent->contains(s2))
break;
}
for ( ; s2 ; s2 = s2->m_parent ) {
if ( ! s2->m_parent ) break;
if (s2->m_parent->contains(s1))
break;
}
// crap, this breaks cabq.gov libraries
// page because they often have
// the Sunday hours in its own tag!
// so i commented it out.
//
// also the oppenheimer zvents.com url
// now blends the Sep date block
// with the Oct date block into one
// list again.
//
// BUT combines "... First Thursdays"
// with "Friday and Saturday" for
// rialtopool.com which is bad. so
// use an AND operator on these.
//
// if *BOTH* contain additional
// sentences then i wouldn't connect
// them together
if(s1->m_alnumPosA!=n1->m_alnumPosA&&
s2->m_alnumPosB!=n2->m_alnumPosB )
break;
// ok, connect them together!
continue;
}
// scan the punct word otherwise
char *p = wptrs[a];
char *pend = p + wlens[a];
for ( ; p < pend ; p += getUtf8CharSize(p) ) {
// space is ok
if ( *p == ' ' ) continue;
// other whitespace is ok
if ( is_wspace_utf8(p) ) continue;
// comma is ok
if ( *p == ',' ) continue;
// "Mon|Tue|..." menu for mrmovies.com
// so it being a header does not
// hurt us!
if ( *p == '|' ) continue;
// "Fri. + Sat. until Midnight"
if ( *p == '.' ) continue;
if ( *p == '+' ) continue;
// Fridays & Saturdays
if ( *p == '&' ) continue;
// but fix "24/7" for guynndollsllc.com
// page4.html
if ( *p=='/' && di->m_type==DT_DAYNUM )
break;
// panjea.org has "... guest instructor
// in March/April..."
if ( *p == '/' ) continue;
// otherwise stop
break;
}
// continue if ok
if ( p >= pend ) continue;
// otherwise, stop
break;
}
// if allowable junk between di and dj, add day to list
if ( a < dj->m_a ) break;
// must be ascending order!
//if ( np > 0 && ptrs[np-1]->m_num >= dj->m_num ) {
// // that is a deal killer
// np = 0;
// // stop
// break;
//}
// fix t & t or m & m because those are not true dows!
// also fix like m w wells too
if ( wlens[di->m_a] == 1 &&
wlens[dj->m_a] == 1 &&
di->m_type == DT_DOW &&
dj->m_type == DT_DOW )
break;
// get the type
datetype_t tt ;//= DT_LIST;
// and the subtype
if ( di->m_type == DT_DAYNUM )
tt = DT_LIST_DAYNUM;
else if ( di->m_type == DT_MONTH )
tt = DT_LIST_MONTH;
else if ( di->m_type == (DT_DAYNUM|DT_MONTH) )
tt = DT_LIST_MONTHDAY;
else if ( di->m_type == DT_TOD )
tt = DT_LIST_TOD;
else if ( di->m_type == DT_DOW )
tt = DT_LIST_DOW;
else
tt = DT_LIST_OTHER;
// record if we had am/pm
if ( dj->m_type==DT_TOD &&
(dj->m_suppFlags & SF_HAD_AMPM) )
// was it am or pm
lastHadAMPM = true;
// . fix for mrmovietimes.com
// . "10:20am, 5:10" (list of tods, only "am" is given)
// . force it to pm for addIntervals() function
if ( dj->m_type == DT_TOD &&
!(dj->m_suppFlags & SF_HAD_AMPM) &&
lastHadAMPM )
dj->m_suppFlags |= SF_PM_BY_LIST;
// make it
if ( ! DD ) DD = addDate(tt,0,di->m_a,di->m_b,0);
// return false on error
if ( ! DD ) return false;
// start it
if ( DD->m_numPtrs == 0 ) DD->addPtr ( di , i , this );
// . add to our list of things
// . this NULLs out anything we add to it!
DD->addPtr ( dj , j , this );
// record last
//last = dj;
// point to next date atom
//j++;
// and switch
di = dj;
// stop if too many!
if ( DD->m_numPtrs >= 100 ) break;
}
// must have at least TWO things to be a list
if ( np <= 1 ) continue;
// sanity check
if ( DD->m_numPtrs > 100 ) { char *xx=NULL;*xx=0; }
// advance i over the list we just made
i = j - 1;
}
return true;
}
// if monthDayOnly is true then we want to combine Month and Day date types so
// that addLists() can fix "Dec 11, 18 Jan 1" by making sure that is a list
bool Dates::makeCompounds ( Words *words ,
bool monthDayOnly ,
bool linkDatesInSameSentence ,
//bool dowTodOnly ,
bool ignoreBreakingTags ) {
char **wptrs = words->getWords ();
int32_t *wlens = words->getWordLens ();
int64_t *wids = words->getWordIds ();
nodeid_t *tids = words->getTagIds ();
// this range algo only works on simple date types for now
datetype_t simpleFlags = DT_TOD|DT_DOW|DT_DAYNUM|DT_MONTH|DT_YEAR;
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
// int16_tcut
Section **sp = NULL;
//if ( linkDatesInSameSentence ) sp = m_sections->m_sectionPtrs;
if ( m_sections ) sp = m_sections->m_sectionPtrs;
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if in url
if ( ! ( di->m_flags & DF_FROM_BODY ) ) continue;
// must be a simple month
if ( monthDayOnly && di->m_type != DT_MONTH ) continue;
// and non-numeric
if ( monthDayOnly && di->m_flags & DF_MONTH_NUMERIC ) continue;
// isolated daynums can never start a compound.
// fixes "1st & 3rd Saturdays 7:30-10:30PM" for abqfolkfest
if ( di->m_hasType == DT_DAYNUM ) continue;
if ( di->m_hasType == (DT_LIST_DAYNUM|DT_DAYNUM) ) continue;
// never make a list of calendar daynums
if ( di->m_type == DT_DAYNUM && (di->m_flags & DF_IN_CALENDAR))
continue;
// reset
datetype_t lastType = di->m_hasType;//0
// count them
//int32_t np = 0;
// init j loop
int32_t j = i + 1;
// int16_tcut
//Date *DD = NULL;
// mark as gotten
datetype_t got = di->m_hasType;
// make a list of ptrs
Date *ptrs[100];
int32_t index[100];
int32_t np = 0;
// add di
ptrs[np] = di;
// save index
index[np] = i;
// advance
np++;
// mark it
Date *prev = di;
Date *lastNonNull = di;
// declare up here
Date *dj;
// loop over all dates starting with this word number
for ( ; j < m_numDatePtrs ; j++ , prev = lastNonNull ) {
// get it from that
dj = m_datePtrs[j];
// skip if ignored, part of a range or list?
if ( ! dj ) continue;
// skip if in url
if ( ! ( dj->m_flags & DF_FROM_BODY ) ) continue;
// never make a list with calendar daynums
if ( dj->m_type == DT_DAYNUM &&
(dj->m_flags & DF_IN_CALENDAR))
break;
// update it
lastNonNull = dj;
// only one holiday!
// stop "Thanksgiving, Christmas"
// no, why? i can see if we have two other compounds
// dates that give store hours for the weekdays and
// weekends respectively, you might need to telescope
// the christmas holiday to a different header than
// the thanksgiving holiday...
if ( (prev->m_type & specialTypes ) &&
(dj->m_type & specialTypes ) )
break;
// isn't this what you meant:
if ( (got & specialTypes ) &&
(dj->m_type & specialTypes ) )
break;
// fix "[Ages] 12-18" being a tod daynum compound where
// we though 12 was a tod and 18 was a daynum for
// meetup.com
if ( prev->m_hasType == DT_TOD &&
np == 1 && // ! DD &&
!(prev->m_suppFlags & SF_HAD_AMPM) &&
dj->m_hasType == DT_DAYNUM )
break;
// must be a daynum?
if ( monthDayOnly && ! ( dj->m_hasType & DT_DAYNUM ) )
//continue;
// this was causing "may" in
// "may ..... march 3-4" to make may pair
// up with 3-4 so we need to break!
break;
/*
if ( dowTodOnly ) {
// one and only one must have dow or tod
if ( (di->m_hasType & DT_DOW) &&
(dj->m_hasType & DT_DOW) )
break;
if ( (di->m_hasType & DT_TOD) &&
(dj->m_hasType & DT_TOD) )
break;
if ( !(di->m_hasType & DT_DOW) &&
!(dj->m_hasType & DT_DOW) )
break;
if ( !(di->m_hasType & DT_TOD) &&
!(dj->m_hasType & DT_TOD) )
break;
}
*/
// in different sentence and we are not ignoring
// breaking tags, forget it! basically breaking tags
// and different sentences should be treat as
// equivalent
//bool sameSent = true;
//if ( sp )
// sameSent = ( sp[di->m_a]->m_senta ==
// sp[dj->m_a]->m_senta );
//if ( ! ignoreBreakingTags && ! sameSent )
// break;
// stop if we already had this date type in sequence
bool stop = false;
if ( (dj->m_hasType & simpleFlags) &
(got & simpleFlags) )
stop = true;
// . allow back to back lists of days of month though
// . i.e. dec 1,2-4 and jan 3-5 2009
if ( dj->m_hasType == DT_DAYNUM &&
lastType == DT_DAYNUM )
// allow it through, its a list!
stop = false;
// stop if a type we already got
if ( stop ) break;
// set this
lastType = dj->m_hasType;
// get word range
int32_t a = prev->m_b;
int32_t b = dj->m_a;
// can't include a date in the url now
if ( prev->m_flags & DF_FROM_URL ) continue;
if ( dj ->m_flags & DF_FROM_URL ) continue;
if ( a < 0 ) continue;
if ( b < 0 ) continue;
// allow pairing up across a br tag if there was a ':'
bool hadColon = false;
bool sameSentLink = false;
// if in same sentence, always link them i guess
if ( linkDatesInSameSentence ) {
// assume so
sameSentLink = true;
// mus tbe in same sentence
if (sp[di->m_a]->m_senta!=sp[dj->m_a]->m_senta)
sameSentLink = false;
// these are split sentences? whadup?
if ( sp[di->m_a]->m_senta < 0 )
sameSentLink = false;
// ignore fuzzies if not already linked
if ( di->m_flags & DF_FUZZY )
sameSentLink = false;
// ignore fuzzies if not already linked
if ( dj->m_flags & DF_FUZZY )
sameSentLink = false;
// . right now only support linking of a tod
// or tod range to a dow
// . fixes "The Saturday market is open from
// 10 a.m.-3 p.m" for santafe.org
// . if the date types are too complicated it
// like "every wed" and "every friday" for
// hardwoodmuseum.org it fails...
/*
bool ok = false;
if ( di->m_hasType == DT_DOW &&
dj->m_hasType == (DT_TOD|DT_RANGE_TOD))
ok = true;
if ( di->m_hasType == DT_DOW &&
dj->m_hasType == DT_TOD )
ok = true;
if ( dj->m_hasType == DT_DOW &&
di->m_hasType == (DT_TOD|DT_RANGE_TOD))
ok = true;
if ( dj->m_hasType == DT_DOW &&
di->m_hasType == DT_TOD )
ok = true;
// must be ok types
if ( ! ok ) continue;
*/
// otherwise, instantly link them
//a = b;
// no! because "closed" could separate them
// and we aren't allowed to link over that
// word!
//sameSentLink = true;
}
// assume not "onoing"
//bool ongoing = false;
// see if they belong together
for ( ; a < b ; a++ ) {
// breathe
QUICKPOLL(m_niceness);
// no breaking tags can be in sequence
if ( tids[a] ) {
// always ignore now!!
//continue;
// if already linked, skip
if ( sameSentLink ) continue;
// ok if not breaking tag
if ( ! isBreakingTagId(tids[a]) )
continue;
// . mcachicago.org uses tags
// . TODO: treat as breaking if rss
if ((tids[a]&BACKBITCOMP)==
TAG_XMLTAG )
continue;
// allow for br tags since they
// are often used as line breaks.
// wow, this causes major issues with
// like 15 urls when i uncomment it!
if ( tids[a] == TAG_BR && hadColon )
continue;
if ( ! ignoreBreakingTags )
break;
// get sentence containing di
Section *n1 = sp[di->m_a];
Section *n2 = sp[dj->m_a];
n1 = n1->m_sentenceSection;
n2 = n2->m_sentenceSection;
// if either not in sentence, forget it
// Happens if in javascript i guess
// and we got no sections
if ( ! n1 ) break;
if ( ! n2 ) break;
// get sections
Section *s1 = sp[di->m_a];
Section *s2 = sp[dj->m_a];
// blow up
for ( ; s1 ; s1 = s1->m_parent ) {
if ( ! s1->m_parent ) break;
if (s1->m_parent->contains(s2))
break;
}
for ( ; s2 ; s2 = s2->m_parent ) {
if ( ! s2->m_parent ) break;
if (s2->m_parent->contains(s1))
break;
}
// if either contains additional
// sentences then i wouldn't connect
// them together
if ( s1->m_alnumPosA!=n1->m_alnumPosA )
break;
if ( s2->m_alnumPosA!=n2->m_alnumPosA )
break;
// allow for or since
// they often put store hours in
// a table with the dow in left column
// and the tod range in the right
// column. crap they also use
// tr and div... so let all breaking
// tags through now...
// unless either dj or di is
// in a sentence with other text!!!
// so get each sentence...
// scan to left of di
int32_t k = di->m_a - 1;
// backup over punct word
if ( k >= 0 && ! wids[k] && ! tids[k] )
k--;
// is it a tag or word? if its a word
// assume we are part of a sentence
// and should not pair up with dj
if ( wids[k] ) break;
// same for dj, but check word after it
k = dj->m_b;
// skip over punct word
if ( k < m_nw && ! wids[k] && !tids[k])
k++;
// is it a tag or word? if its a word
// assume we are part of a sentence
// and should not pair up with di
if ( wids[k] ) break;
// otherwise, they are both their
// own sentence so let them bond.
continue;
// this was originally intended to fix
// reverbnation.com's
// "28
Jan
" but
// we actually put that fix in the
// "01 nov" canonical date detection
// loop above. so this is here without
// any reason, so i will comment it
// out now
/*
// "p1" = "breaking section"
Section *p1 = sp[a];
// if contains more than just the
// previous date, forget it
if ( p1->m_firstWordPos!= prev->m_a )
break;
if ( p1->m_lastWordPos != prev->m_b-1 )
break;
// same must be true for next date
Section *p2 = sp[b];
if ( p2->m_firstWordPos!= dj->m_a )
break;
if ( p2->m_lastWordPos != dj->m_b-1 )
break;
// stop if like or
//if(isBreakingTagId(m_tids[a]))break;
// its an ok tag
continue;
*/
// ... and go back to our old algo...
break;
}
// punct?
if ( ! wids[a] ) {
// if already linked, skip
if ( sameSentLink ) continue;
//char hadHyphen = 0;
char *p = wptrs[a];
char *pend = p + wlens[a];
bool stop = false;
for ( ; p < pend ; p++ ) {
if ( *p == ':' ) {
hadColon = 1;
break;
}
//if ( *p == '-' ) {
// hadHyphen = 1;
//}
// could indicate end of
// sentence. stay within
// our sentence until after
// we know what the sentences
// are... in which case
// m_sections will be set
//if ( ! m_sections &&
// ( *p == '.' ||
// *p == '!' ||
// *p == '?' ) ) {
// stop = true;
// break;
//}
}
// stop now
if ( stop ) break;
// fix
//Friday, November 4, 2011 at 11:00 AM-
//Sunday, November 6, 2011 at 10:00 PM
// so we don't pair up
// 11:00AM with Sunday and fuck up the
// range for floridaflowfest-ehometext.
// eventbrite.com/
// BUT this breaks"Tuesday thru Friday
// - 8:30am to 4:30pm" for zvents.com
// kimo theater office hrs.
//if ( hadHyphen && dowTodOnly )
// break;
// any tag following a colon means
// to break the sentence. so following
// that logic that's in Sections::
// addSentences() we should break
// to! we have stuff like
// on all saturdays from 10am-3pm:
// ... list of doms.
//if ( a+1=0 && tids&&tids[a-1] )
// break;
// try stopping on colons
// . no, makes villr.com and
// texasdrums.drums.org fail
// because the date near the colon
// ends up getting telescoped to
// by another date which is wrong...
//if ( hadColon ) break;
continue;
}
//if ( linkDatesInSameSentence ) continue;
// allow "at" like "Dec 11th at 8pm"
if ( wids[a] == h_at ) continue;
// allow "on" like "8pm on Dec 11th"
if ( wids[a] == h_on ) continue;
// May 1, 2009 from 5pm to 8pm
//if ( wids[a] == h_from ) continue;
// "friday the 27th of november"
if ( wids[a] == h_of ) continue;
if ( wids[a] == h_the ) continue;
// "Tuesday from 3:00pm until 7:30pm"
if ( wids[a] == h_from ) continue;
// "Tuesday evening at 7:30"
if ( wids[a] == h_evening ) continue;
if ( wids[a] == h_night ) continue;
if ( wids[a] == h_morning ) continue;
if ( wids[a] == h_afternoon ) continue;
if ( wids[a] == h_evenings ) continue;
if ( wids[a] == h_nights ) continue;
if ( wids[a] == h_mornings ) continue;
if ( wids[a] == h_afternoons ) continue;
// is ok now "Nov 4th "
if ( wids[a] == h_st ) continue;
if ( wids[a] == h_nd ) continue;
if ( wids[a] == h_rd ) continue;
if ( wids[a] == h_th ) continue;
// fix activedatax.com:
// "start date: 10/7/2011 start time: 6:00pm
// end date: 10/7/2011 end time: 11:59pm"
if ( wids[a] == h_start ) continue;
if ( wids[a] == h_end ) continue;
if ( wids[a] == h_starts ) continue;
if ( wids[a] == h_ends ) continue;
if ( wids[a] == h_time ) continue;
// even if in same sentence and should be
// linked, the word "closed" will break
// that up. this fixes
// "Tues evenings 5-8 closed weekends" which
// was not finding the word "closed" and not
// setting DF_CLOSE_DATE for "weekends"
if ( wids[a] == h_closed ) break;
if ( wids[a] == h_closes ) break;
if ( wids[a] == h_closure) break;
if ( wids[a] == h_except ) break;
// if already linked, skip
if ( sameSentLink ) continue;
//
// . i guess we allow any words for now!
// . no! let lets the telscoper and
// addRanges() and addLists() do this now
// . those algos could also check for ambiguity
// and not pair things up if there is some
//if ( wids[a] ) continue;
// . could be an open ended range
// . in that case, when done, we set the
// m_min* members to the spideredTime for all
// dates after word #a.
/*
if ( m_wids[a] == h_until ||
m_wids[a] == h_through ||
m_wids[a] == h_thru ) {
ongoing = true;
continue;
}
*/
if ( wids[a] ) break;
// any punct ok for now
continue;
}
// disrupted?
if ( a < b && j != i ) break;
// . if prev is a month/daynum compound, then nuke it!
// that is way too fuzzy!
// . this fixes "2005 -22" where 2005 was part of a
// phone # and 22 was part of a street address for
// dexknows.com
if ( np == 1 && // ! DD &&
prev->m_type == DT_YEAR &&
dj->m_type == DT_DAYNUM )
break;
// likewise, the otherway is bad too!
if ( np == 1 && // ! DD &&
dj->m_type == DT_YEAR &&
prev->m_type == DT_DAYNUM )
break;
// add it
ptrs[np] = dj;
// save index
index[np] = j;
// advance
np++;
// flags or
got |= dj->m_hasType;
// full?
if ( np >= 100 ) break;
/*
// use this now
if ( ! DD ) {
// make first date
DD = addDate(DT_COMPOUND,0,di->m_a,0,0);
// return false on error
if ( ! DD ) return false;
// add in date ptr #i as first ptr
DD->addPtr ( di , i , this );
}
// add in flag
//if ( ongoing ) DD->m_flags |= DF_ONGOING;
//if ( ongoing ) dj->m_flags |= DF_ONGOING;
// set new end point MDW LEFT OFF HERE
DD->m_b = dj->m_b;
// flags or
got |= dj->m_hasType;
// add it in
DD->addPtr ( dj , j , this );
// swap for next guy
di = dj;
// we only reserved mem for 100 Date::m_ptrs[]!
if ( DD->m_numPtrs >= 100 ) break;
*/
}
// need at least 2 to tango
if ( np < 2 ) continue;
// . dates that end in just a month are bad!
// . fix "6-8 may" for gmsstrings.com/default.aspx
// . "grades 6-8 may learn to play..."
// . do not add this compound date if it ends in a simple month
// . crap, then that breaks "Saturday before Second Sunday"
// compounding up with "of February"
if ( ptrs[np-2]->m_type == DT_RANGE_DAYNUM &&
ptrs[np-1]->m_type == DT_MONTH )
continue;
// init DD
Date *DD = addDate(DT_COMPOUND,0,di->m_a,0,0);
// return false on error
if ( ! DD ) return false;
// ok, now make the compound date from the list
for ( int32_t j = 0 ; j < np ; j++ ) {
// update
DD->addPtr ( ptrs[j] , index[j] , this );
// update m_b
DD->m_b = ptrs[j]->m_b;
}
}
return true;
}
// . sets Date::m_dateHash
// . if date represents the exact same times then should have same date hash
// . normalizes
// . i.e. "11/12/11 = Nov 12th 2011" or "11:00am = 11 in the morning"
void Dates::setDateHashes ( ) {
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// do recursively
getDateHash ( di , di );
// . sanity check
// . happened for "6 5 5 6 6 5" string of daynums?
if ( di->m_dateHash64 == 0LL ) di->m_dateHash64 = 999;
}
}
uint64_t Dates::getDateHash ( Date *di , Date *orig ) {
uint64_t dh = getDateHash2 ( di , orig );
di->m_dateHash64 = dh;
return dh;
}
// ultimately this is the same problem we have with normalizing the
// english format of a date (printTextNorm())
uint64_t Dates::getDateHash2 ( Date *di , Date *orig ) {
if ( di->m_type & DT_RANGE_ANY ) {
uint64_t h1 = getDateHash ( di->m_ptrs[0] , orig );
uint64_t h2 = getDateHash ( di->m_ptrs[1] , orig );
return hash64h ( h1 , h2 );
}
if ( (di->m_type & DT_COMPOUND) ||
(di->m_type & DT_LIST_ANY) ||
(di->m_type & DT_TELESCOPE) ) {
// blank it out
uint64_t h = 0;
//uint64_t lasttt = 0;
// loop over ptrs
for ( int32_t i = 0 ; i < di->m_numPtrs ; i++ ) {
uint64_t tt = getDateHash ( di->m_ptrs[i] , orig );
h = hash32h ( tt, h );
// . watch out for lists of just the same thing!
// . fix "5.5" list of daynums for mytravelguide.com
//if ( tt == lasttt ) continue;
//lasttt = tt;
// or 1,2,1,2 would ultimately be zero!
//if ( (h ^ tt) != 0LL ) h ^= tt;
}
return h;
}
// "Friday Evening 7pm" should ignore "evening"
if ( (orig->m_hasType & DT_TOD) && di->m_type == DT_SUBDAY )
return 0;
if ( (orig->m_hasType & DT_DAYNUM) &&
// fix for "July 20-21, 2011 [[]] Wednesday [[]] 8:00 a.m"
// for illinois.edu
!(orig->m_hasType & DT_RANGE_DAYNUM) &&
di->m_type == DT_DOW )
return 0;
if ( (orig->m_hasType & DT_COMPOUND) && di->m_type == DT_YEAR )
return 0;
uint64_t dt = di->m_type;
if ( dt == DT_MONTH ) return hash64h(dt,di->m_num);
if ( dt == DT_DAYNUM )return hash64h(dt,di->m_num);
if ( dt == DT_DOW ) {
uint64_t h = hash64h(dt,(uint64_t)di->m_dowBits);
// combine with "last" "3rd" etc. for "3rd thursday"
suppflags_t rmask =
SF_FIRST|
SF_LAST|
SF_SECOND|
SF_THIRD|
SF_FOURTH|
SF_FIFTH;
suppflags_t sf = di->m_suppFlags & rmask;
if ( sf ) h = hash64h ((int64_t)sf,h);
return h;
}
if ( dt == DT_TOD ) return hash64h(dt,di->m_num);
if ( dt == DT_YEAR ) return hash64h(dt,di->m_num);
if ( dt == DT_HOLIDAY ) return hash64h(dt,di->m_num);
if ( dt == DT_TIMESTAMP ) return hash64h(dt,di->m_num);
// afternoon, morning etc. (TODO: fix m_num)
if ( dt == DT_SUBDAY ) return hash64h(dt,di->m_num);
// weekends, etc (TODO: fix m_num)
if ( dt == DT_SUBWEEK ) return hash64h(dt,di->m_num);
// last day of the month (TODO: fix m_num)
if ( dt == DT_SUBMONTH ) return hash64h(dt,di->m_num);
// every day
if ( dt == DT_EVERY_DAY ) return hash64h(dt,di->m_num);
// summer winter, etc.
if ( dt == DT_SEASON ) return hash64h(dt,di->m_num);
// holidays
if ( dt == DT_ALL_HOLIDAYS ) return hash64h(dt,di->m_num);
// wtf?
char *xx=NULL;*xx=0;
return 0;
}
int32_t Dates::printDateNeighborhood ( Date *di , Words *w ) {
int32_t nw = w->getNumWords();
char **wptrs = w->getWords ();
int32_t *wlens = w->getWordLens();
nodeid_t *tids = w->m_tagIds;
int64_t *wids = w->getWordIds();
SafeBuf sb;
int32_t a = di->m_a;
int32_t b = di->m_b;
if ( a < 0 ) return 0;
a -= 10;
b += 10;
if ( a < 0 ) a = 0;
if ( b > nw ) b = nw;
bool lastWasSpace = false;
for ( int32_t i = a ; i < b ; i++ ) {
if ( i == di->m_a )
sb.pushChar('*');
if ( i == di->m_b )
sb.pushChar('*');
if ( tids[i] ) {
if ( !lastWasSpace ) sb.pushChar(' ');
lastWasSpace = 1;
continue;
}
if ( ! wids[i] ) {
if ( !lastWasSpace ) sb.pushChar(' ');
lastWasSpace = 1;
continue;
}
lastWasSpace = false;
sb.safeMemcpy ( wptrs[i],wlens[i] );
}
// print out
sb.pushChar('\0');
char *s = sb.getBufStart();
log("neigh: %s\n",s);
return 0;
}
// for gdb to call
int32_t Dates::printDates2 ( ) {
printDates ( NULL );
return 1;
}
int32_t Dates::print ( Date *d ) {
SafeBuf sb;
d->printText ( &sb , m_words , false);
fprintf(stderr,"%s\n",sb.getBufStart() );
return 1;
}
// make an array of the Date ptrs that are in a date such that each ptr
// does not consist of any other ptrs, but is atomic
Date **Dates::getDateElements ( Date *di , int32_t *ne ) {
// already did it?
if ( di->m_numFlatPtrs > 0 ) {
*ne = di->m_numFlatPtrs;
return (Date **)(m_cbuf.getBufStart()+di->m_flatPtrsBufOffset);
}
// use cbuf for this
if ( ! m_cbuf.reserve ( 20*sizeof(Date *) ) ) return NULL;
// int16_tcut
int32_t startOffset = m_cbuf.length();
// store here
di->m_flatPtrsBufOffset = startOffset;
// . store all ptrs into there
// . it returns NULL with g_errno set on error
if ( ! addPtrToArray ( di ) ) return NULL;
// get the ending offset after adding the date ptrs
int32_t endOffset = m_cbuf.length();
// set length
*ne = (endOffset - startOffset)/sizeof(Date *);
// set that
di->m_numFlatPtrs = *ne;
// must be > 0
if ( *ne <= 0 ) { char *xx=NULL;*xx=0; }
// point to the buffer
Date **p = (Date **)(m_cbuf.getBufStart() + startOffset);
// sort it by Date::m_a so Events::makeEventDisplay2() works right
bubbleSortLoop:
char flag = 0;
for ( int32_t i = 1 ; i < *ne ; i++ ) {
if ( p[i]->m_a >= p[i-1]->m_a ) continue;
Date *tmp = p[i-1];
p[i-1] = p[i];
p[i] = tmp;
flag = 1;
}
if ( flag ) goto bubbleSortLoop;
// return ptr to array of ptrs
return (Date **)(m_cbuf.getBufStart()+di->m_flatPtrsBufOffset);
}
bool Dates::addPtrToArray ( Date *dp ) {
// only add base types
if ( dp->m_numPtrs == 0 ) {
if ( ! m_cbuf.pushPtr(dp) ) return false;
return true;
}
// recursive otherwise
for ( int32_t i = 0 ; i < dp->m_numPtrs ; i++ )
if ( ! addPtrToArray ( dp->m_ptrs[i] ) )
return false;
return true;
}
bool Dates::printDates ( SafeBuf *sbArg ) {
SafeBuf *sb = sbArg;
SafeBuf tmp;
// skip if not debug
if ( ! sbArg ) sb = &tmp;
char *format = "unknown";
if ( m_dateFormat == DF_AMERICAN ) format = "american";
if ( m_dateFormat == DF_EUROPEAN ) format = "european";
// int16_tcut
//Sections *ss = m_sections;
char *bh = "";
if ( m_badHtml ) bh = " (bad html )";
if ( sbArg )
sb->safePrintf(""
""
"Dates "
" (format=%s) (firstgood=%"INT32" lastgood=%"INT32")"
" 25hrRespider=%"INT32""
"(sitehash=0x%"XINT32")%s"
" \n"
"# "
"startWord "
"endWord "
"text "
"pub date score "
"timestamp "
"timezone "
"date content hash "
"turk tag hash "
//"sentId "
"flags "
"tagHash "
"occ# "
"clockHash "
"termid "
" \n",
format,m_firstGood,m_lastGood,
(int32_t)m_needQuickRespider,m_siteHash,bh);
else
sb->safePrintf(
"Publication Date Candidates "
" (format=%s) (firstgood=%"INT32" lastgood=%"INT32")"
" 25hrRespider=%"INT32""
"(sitehash=0x%"XINT32")%s"
"\n"
"# | "
"startWord | "
"endWord | "
"text | "
"pub date score | "
"timestamp | "
"timezone | "
"flags | "
"tagHash | "
"occ# | "
"clockHash | "
"termid "
"\n",
format,m_firstGood,m_lastGood,
(int32_t)m_needQuickRespider,m_siteHash,bh);
// dates from body
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if nuked from range logic above
if ( ! di ) continue;
// flag it
di->m_flags |= DF_NOTKILLED;
// print it
di->print( sbArg, m_sections,m_words,m_siteHash,i,m_best,this);
}
// dates from body
for ( int32_t i = 0 ; i < m_numTotalPtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_totalPtrs[i];
// skip if nuked from range logic above
if ( ! di ) continue;
// only if killed
if ( di->m_flags & DF_NOTKILLED ) continue;
// print it
di->print( sbArg, m_sections,m_words,m_siteHash,i,m_best,this);
}
if ( ! sbArg ) {
fprintf(stdout,"%s",sb->getBufStart() );
return true;
}
sb->safePrintf("
\n " );
sb->safePrintf("NOTE: The publication date will be in bold. "
"The publication date has the highest score out of "
"all the Candidates. If the winning candidate has the "
"\"needmoreinfo\" flag set then no publication date "
"will be used, and the page will be scheduled to be "
"respidered 25 hours from now."
" ");
sb->safePrintf("NOTE: The date range in blue is the min pub "
"date, as determined by when the link first appeared "
"on its parent page. we assume any dates are relative "
"to the years in that time range in blue."
" ");
return true;
}
// just print the date itself
void Date::printText ( SafeBuf *sb , Words *words , bool inHtml ) {
nodeid_t *tids = words->getTagIds();
char **wptrs = words->getWords();
int32_t *wlens = words->getWordLens ();
int64_t *wids = words->getWordIds();
//if ( m_numPtrs == 0 && (m_flags & DF_CLOSE_DATE) )
if ( (m_flags & DF_CLOSE_DATE) && inHtml )
sb->safePrintf("");
if ( m_numPtrs > 0 && m_mostUniqueDatePtr == m_ptrs[0] && inHtml )
sb->safePrintf("");
bool lastWasBullet = false;
bool lastWasSpace = false;
// print out each word
for ( int32_t j = m_a ; j < m_b ; j++ ) {
// skip if tag
if ( tids[j] ) {
// print a comma for breaking tags separating list elms
if ( isBreakingTagId ( tids[j] ) &&
// fix for tag for mcachicago.com
(tids[j]&BACKBITCOMP) != TAG_XMLTAG &&
// really needs to be a new line tag
(tids[j]&BACKBITCOMP) != TAG_TD &&
! lastWasBullet ) {
sb->safePrintf(" • ");
lastWasBullet = true;
lastWasSpace = true;
}
if ( ! lastWasSpace ) {
sb->pushChar(' ');
lastWasSpace = true;
}
continue;
}
// print it otherwise
sb->safeMemcpy(wptrs[j],wlens[j]);
if ( ! wids[j] ) continue;
lastWasBullet = false;
lastWasSpace = false;
}
if ( m_numPtrs > 0 && m_mostUniqueDatePtr == m_ptrs[0] && inHtml )
sb->safePrintf(" ");
//if ( m_numPtrs == 0 && (m_flags & DF_CLOSE_DATE) )
if ( (m_flags & DF_CLOSE_DATE) && inHtml )
sb->safePrintf(" ");
// telescope ptrs
for ( int32_t i = 1 ; m_type==DT_TELESCOPE && isafePrintf(" [[]] ");
if ( (dp->m_flags & DF_CLOSE_DATE) && inHtml )
sb->safePrintf("");
if ( dp == m_mostUniqueDatePtr && inHtml)
sb->safePrintf("");
// print out each word
dp->printText ( sb , words, false );
//for ( int32_t j = dp->m_a ; j < dp->m_b ; j++ ) {
// // skip if tag
// if ( tids[j] ) continue;
// // print it otherwise
// sb->safeMemcpy(wptrs[j],wlens[j]);
//}
if ( dp == m_mostUniqueDatePtr && inHtml )
sb->safePrintf(" ");
if ( (dp->m_flags & DF_CLOSE_DATE) && inHtml )
sb->safePrintf(" ");
}
// end in assumed year
if ( m_flags & DF_ASSUMED_YEAR ) {
//int32_t t1 = m_minPubDate;
time_t t1 = m_minStartFocus;
time_t t2 = m_maxStartFocus;
if ( inHtml ) sb->safePrintf("");
sb->safePrintf(" ** %s- ",ctime(&t1));
sb->safePrintf("%s",ctime(&t2));
if ( inHtml ) sb->safePrintf(" ");
}
}
/*
static void setGroupNumRecursive ( Date *dp ,
datetype_t *accMaskPtr ,
int32_t *groupNumPtr ) {
// if any of our siblings is a repeat type we have to inc group
if ( *accMaskPtr & dp->m_hasType ) {
*accMaskPtr = 0;
*groupNumPtr = *groupNumPtr + 1;
}
// set group #s on our brothers first before descending!!!
for ( int32_t i = 0 ; i < dp->m_numPtrs ; i++ ) {
// int16_tcut
Date *di = dp->m_ptrs[i];
// same?
if ( di->m_hasType & *accMaskPtr ) {
*accMaskPtr = 0;
*groupNumPtr = *groupNumPtr + 1;
}
// or them up
*accMaskPtr |= di->m_hasType;
// assign group #
di->m_groupNum = *groupNumPtr;
}
// reset this i guess, the whole point of doing brothers first...
*accMaskPtr = 0;
// then descend into each one
for ( int32_t i = 0 ; i < dp->m_numPtrs ; i++ ) {
// int16_tcut
Date *di = dp->m_ptrs[i];
// reset?
setGroupNumRecursive ( di, accMaskPtr, groupNumPtr );
}
}
*/
static char *s_mnames[13] = {
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
"January"
};
char *s_dnames[8] = {
"Sunday",
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday"
};
char *getDOWName ( int32_t dow ) {
if ( dow < 0 || dow > 6 ) { char *xx=NULL;*xx=0; }
return s_dnames[dow];
}
char *getMonthName ( int32_t month ) {
if ( month < 0 || month > 11 ) { char *xx=NULL;*xx=0; }
return s_mnames[month];
}
bool printDOW ( SafeBuf *sb , Date *dp ) {
int32_t mi = dp->m_dow;
if ( mi <= 0 || mi >= 8 ) { char *xx=NULL;*xx=0; }
char *ev = NULL;
// get the full date
Date *full = dp;
for ( ; full->m_dateParent; full = full->m_dateParent );
// plural?
if ( dp->m_suppFlags & SF_RECURRING_DOW )
ev = "every";
// int16_tcut
Date *parent = dp->m_dateParent;
// fix for "10pm Monday NIGHTS" for zipscene. only the
// NIGHTS had this set
if ( parent && (parent->m_suppFlags & SF_RECURRING_DOW) )
ev = "every ";
// sometimes we have "Monday and Wednesday ... Nights"
// so our parent is a dow list and HD_NIGHT is brother of that parent
// like for nickhotel.com
if ( full->m_suppFlags & SF_RECURRING_DOW )
ev = "every ";
// 9:15am to 10:15am every Monday and Wednesday
if ( parent &&
parent->m_type == DT_LIST_DOW &&
parent->m_ptrs[0] == dp &&
(parent->m_suppFlags & SF_RECURRING_DOW) )
ev = "every ";
// if we are recurring dow in a list, do not repeat
// fix "EVERY saturday and EVERY sunday"
if ( parent &&
parent->m_type == DT_LIST_DOW &&
parent->m_ptrs[0] != dp )
ev = NULL;
// but not if we are in a range like "Thursday through Saturday"
// because "every THursday through every Saturday" is wrong.
if ( parent && parent->m_type == DT_RANGE_DOW )
ev = NULL;
// are we a clsoed date?
bool closed = false;
// or parent?
Date *pp = dp;
for ( ; pp ; pp = pp->m_dateParent )
if ( pp->m_flags & DF_CLOSE_DATE ) closed = true;
// print it
if ( ev && ! closed && ! sb->safePrintf("every ") ) return false;
// every 1st & 2nd thursday
suppflags_t sfmask = 0;
sfmask |= SF_FIRST;
sfmask |= SF_SECOND;
sfmask |= SF_THIRD;
sfmask |= SF_FOURTH;
sfmask |= SF_FIFTH;
sfmask |= SF_LAST;
// how many do we have?
suppflags_t mybits = (uint32_t)(sfmask&dp->m_suppFlags);
int32_t nsf = getNumBitsOn32 ( mybits );
// count each bit that we have in the loop too
int32_t nc = 0;
// loop over the bits
for ( suppflags_t k = SF_FIRST ; k <= SF_LAST ; k <<= 1 ) {
if ( ! (dp->m_suppFlags & k ) ) continue;
char *sep;
if ( nc == 0 ) sep = "";
else if ( nc + 1 < nsf ) sep = ", ";
else if ( nc + 1 == nsf ) sep = " and ";
else sep = "";
char *str ;
if ( k == SF_FIRST ) str = "1st";
if ( k == SF_SECOND ) str = "2nd";
if ( k == SF_THIRD ) str = "3rd";
if ( k == SF_FOURTH ) str = "4th";
if ( k == SF_FIFTH ) str = "5th";
if ( k == SF_LAST ) str = "last";
if ( ! sb->safePrintf("%s%s",sep,str)) return false;
nc++;
}
// print space after them, if any
if ( nc ) {
if ( ! sb->safePrintf(" ")) return false;
}
// print dow name properly then
if ( ! sb->safePrintf("%s",s_dnames[mi-1]) )
return false;
// if closed and recurring print 's' instead of every
// print it
if ( ev && closed && ! sb->safePrintf("s") ) return false;
//if ( nc && ! sb->safePrintf(" of the month") )
// return false;
return true;
}
static bool printTOD ( SafeBuf *sb , time_t ttt ) {
// seconds since start of day
int32_t relativeTod = ttt % 86400;
// convert to minutes since start of day
int32_t totalMins = relativeTod / 60;
// hours since start of day
int32_t hour = totalMins / 60;
int32_t min = totalMins % 60;
// print noon to avoid ambiguity with midnight
if ( hour == 0 && min == 0 )
return sb->safePrintf("midnight");
if ( hour == 24 && min == 0 )
return sb->safePrintf("midnight");
if ( hour == 12 && min == 0 )
return sb->safePrintf("noon");
char *ap = " am";
if ( hour >= 12 ) {
ap = " pm";
hour -= 12;
}
// still greater than? 2am?
if ( hour >= 12 ) {
ap = " am";
hour -=12;
}
// 0 is noon usually
if ( hour == 0 ) hour = 12;
if ( min != 0 )
return sb->safePrintf("%"INT32":%02"INT32"%s",hour,min,ap);
return sb->safePrintf("%"INT32"%s",hour,ap) ;
}
static bool printMonthDay ( SafeBuf *sb , int32_t month , int32_t dayNum ) {
// print that out
char *suffix = "th";
int32_t md = dayNum;
if ( md == 1 || md == 21 || md == 31 ) suffix = "st";
if ( md == 2 || md == 22 ) suffix = "nd";
if ( md == 3 || md == 23 ) suffix = "rd";
// February 12th, etc.
if ( ! sb->safePrintf(" %s %"INT32"%s ",
s_mnames[month],
dayNum,
suffix))
return false;
return true;
}
// for "Feb. 3 - March 31 [[]] Thursdays, Feb. 3 - Mar. 31 from 6:30 - 9:30 pm"
// for denver.org, print "February 3rd to March 31st"
static bool printMonthDayRange ( SafeBuf *sb ,
Event *ev ,
Interval *int3 ,
Date **all ,
int32_t numAll ) {
bool rangeSet = false;
int32_t bestMonth1;
int32_t bestMonth2;
int32_t bestDay1;
int32_t bestDay2;
int32_t bestYear1;
int32_t bestYear2;
// . first try to print symbolically.
// . identify all monthday ranges in teh date and do a symbolic
// intersection if necessary, then print that out i fpossible
for ( int32_t i = 0 ; i < numAll ; i++ ) {
// int16_tcut
Date *di = all[i];
// need daynum
if ( di->m_type != DT_DAYNUM ) continue;
// must be part of monthdaynum range
Date *p1 = di->m_dateParent;
if ( ! p1 ) continue;
Date *p2 = p1->m_dateParent;
if ( ! p2 ) continue;
if ( p2->m_type != DT_RANGE_MONTHDAY ) continue;
//
// TODO: if it is a list of monthday ranges, forget it!
//
// and we must be first
if ( p2->m_ptrs[0] != p1 ) continue;
// record min max
int32_t month1 = p1->m_month;
int32_t day1 = di->m_num;
int32_t year1 = p1->m_year;
// get end point
int32_t month2 = p2->m_ptrs[1]->m_month;
int32_t day2 = p2->m_ptrs[1]->m_dayNum;
int32_t year2 = p2->m_ptrs[1]->m_year;
// sanity
if ( ! rangeSet ) {
bestMonth1 = month1;
bestDay1 = day1;
bestYear1 = year1;
bestMonth2 = month2;
bestDay2 = day2;
bestYear2 = year2;
rangeSet = true;
continue;
}
// intersect the ranges symbolically
if ( year1 == bestYear1 && month1 > bestMonth1 ) {
bestMonth1 = month1;
bestDay1 = day1;
}
if ( year2 == bestYear2 && month2 < bestMonth2 ) {
bestMonth2 = month2;
bestDay2 = day2;
}
}
// print it out symbolically?
if ( rangeSet ) {
if ( ! printMonthDay (sb,bestMonth1-1,bestDay1))return false;
if ( ! sb->safePrintf(" to ") ) return false;
if ( ! printMonthDay (sb,bestMonth2-1,bestDay2))return false;
return true;
}
// gmtime assumes the time_t we give it is in utc time
// which i guess it is...
time_t ttt1 = int3->m_a;
// get timezone info
char useDST;
char tz = ev->m_address->getTimeZone(&useDST);
// sanity
if ( tz >= 25 ) { char *xx=NULL;*xx=0; }
// apply that to the time
ttt1 += 3600 * tz;
// now we also deal with DST too!
int32_t bonus = 0;
if ( useDST && getIsDST(ttt1,tz) ) bonus = 3600;
ttt1 += bonus;
// make into time
struct tm *ts = gmtime ( &ttt1 );
if ( ! printMonthDay ( sb, ts->tm_mon, ts->tm_mday ) ) return false;
if ( ! sb->safePrintf ( " to " ) ) return false;
// find last interval
int3 += ev->m_ni - 1;
// endpoint
time_t ttt2 = int3->m_b + 3600 * tz + bonus;
// make into time
ts = gmtime ( &ttt2 );
if ( ! printMonthDay ( sb, ts->tm_mon, ts->tm_mday ) ) return false;
return true;
}
// miss: Every 1st & 2nd Thursday's [[]] before 9PM [[]] 7:30PM - 11:30PM
// from rateclubs.com... need to intersect ongoing range with other...
// really we don't deal well with multiple ranges on recurring dates...
// just print the date itself
// print normalized
bool Date::printTextNorm ( SafeBuf *sb , Words *words , bool inHtml ,
Event *ev , SafeBuf *intBuf ) {
if ( ! printTextNorm2 (sb,words,inHtml,ev,intBuf) ) return false;
if ( ! ev->m_numCloseDates ) return true;
if ( ! sb->safePrintf(" closed ") ) return false;
// store all closed dates in here too
for ( int32_t i = 0 ; i < ev->m_numCloseDates ; i++ ) {
// breathe
//QUICKPOLL(m_niceness);
// get it
Date *di = ev->m_closeDates[i];
// print it
if ( ! di->printTextNorm2 (sb,words,false,ev,NULL) )
return false;
}
return true;
}
bool Date::printTextNorm2 ( SafeBuf *sb , Words *words , bool inHtml ,
Event *ev , SafeBuf *intBuf ) {
// we need this to get the timezone
if ( ! ev->m_address ) return true;
// point to interval buffer for all dates
char *bufStart = NULL;
Interval *int3 = NULL;
if ( intBuf ) {
bufStart = intBuf->getBufStart();
// int16_tcuts to the list of intervals this event has
int3 = (Interval *)(ev->m_intervalsOff + bufStart);
}
// if its just one time interval print it out all pretty!
if ( ev->m_ni == 1 && int3 ) {
// gmtime assumes the time_t we give it is in utc time
// which i guess it is...
time_t ttt1 = int3->m_a;
// get timezone info
char useDST;
char tz = ev->m_address->getTimeZone(&useDST);
// sanity
if ( tz >= 25 ) {
log("date: got bad timezone of %"INT32". resetting to -6.",
(int32_t)tz);
useDST = true;
tz = -6;
}
// apply that to the time
ttt1 += 3600 * tz;
// now we also deal with DST too!
int32_t bonus = 0;
if ( useDST && getIsDST(ttt1,tz) ) bonus = 3600;
ttt1 += bonus;
// make into time
struct tm *ts = gmtime ( &ttt1 );
char *suffix = "th";
int32_t md = ts->tm_mday;
if ( md == 1 || md == 21 || md == 31 ) suffix = "st";
if ( md == 2 || md == 22 ) suffix = "nd";
if ( md == 3 || md == 23 ) suffix = "rd";
// February 12th
sb->safePrintf("%s %s %"INT32"%s %"INT32" ",
s_dnames[ts->tm_wday],
s_mnames[ts->tm_mon],
(int32_t)ts->tm_mday,
suffix,
(int32_t)ts->tm_year+1900);
// endpoint
time_t ttt2 = int3->m_b + 3600 * tz + bonus;
// only if not same
if ( ttt2 != ttt1 ) {
// print each tod
sb->safePrintf("from ");
// first time
printTOD ( sb , ttt1 );
// range
sb->safePrintf(" to ");
// endpoint
printTOD ( sb , ttt2 );
}
// else just know hte start time
else {
sb->safePrintf("at ");
// first time
printTOD ( sb , ttt1 );
}
return true;
}
// fix for "26 [[]] April 2011 [[]] ..."
// put ALL ptrs into here
// http://www.pridesource.com/calendar_item.html?item=9045
// breaches the 256 limit. it has a int32_t list of compound dates
//Date *all[1024];
//int32_t numAll = 0;
//::addPtrToArray ( all , &numAll , this , NULL );//&dnp );
int32_t numAll = 0;
Date **all = m_dates->getDateElements ( this , &numAll );
// error?
if ( ! all ) return false;
Date *dowDate = NULL;
bool hasTODRange = false;
// . check for special format
// . every 1st Wednesday, Tuesday through Sunday 9 a.m. to 5 p.m
// . first Friday at 1:30pm [[]] Tues-Fri, 8-5
// . from collectorsguide.com
// . so if we got a compound that has a dowrange and a todrange, and
// the compounds brother is a recurring dow, format it nice.
if ( m_numPtrs == 2 &&
( m_ptrs[0]->m_hasType == DT_DOW ||
m_ptrs[0]->m_hasType == (DT_DOW|DT_TOD|DT_COMPOUND)) &&
// i don't care about DT_LIST_OTHER
(m_ptrs[1]->m_hasType | DT_LIST_OTHER)
== (DT_RANGE_DOW|
DT_DOW|
DT_COMPOUND|
DT_LIST_OTHER| // multiple dow store hrs
DT_RANGE_TOD|
DT_TOD) ) {
dowDate = m_ptrs[0];
hasTODRange = true;
}
// Every Tuesday [[]] before 9PM [[]] 7:30PM - 11:30PM
// rateclubs.com
if ( m_numPtrs == 3 &&
m_ptrs[0]->m_hasType == DT_DOW &&
m_ptrs[1]->m_hasType == DT_TOD &&
m_ptrs[2]->m_hasType == (DT_TOD|DT_RANGE_TOD) ) {
dowDate = m_ptrs[0];
hasTODRange = true;
}
// reset
bool hasMonthDayRange = false;
// Feb. 3-March 31 [[]] Thursdays, Feb. 3 - Mar. 31 from 6:30 - 9:30pm
// denver.org
if ( m_numPtrs == 2 &&
m_ptrs[0]->m_hasType == (DT_MONTH|
DT_DAYNUM|
DT_COMPOUND|
DT_RANGE_MONTHDAY) &&
m_ptrs[1]->m_numPtrs == 3 &&
m_ptrs[1]->m_ptrs[0]->m_hasType == DT_DOW &&
m_ptrs[1]->m_ptrs[1]->m_hasType == (DT_MONTH|
DT_DAYNUM|
DT_COMPOUND|
DT_RANGE_MONTHDAY) &&
m_ptrs[1]->m_ptrs[2]->m_hasType == (DT_TOD |DT_RANGE_TOD) ) {
dowDate = m_ptrs[1]->m_ptrs[0];
hasTODRange = true;
hasMonthDayRange = true;
}
// i've seen these first two things be true, but int3 be false
// dunno why...
if ( dowDate && hasTODRange && int3 ) {
// get the recurring bits in case it is not "every"
if ( ! printDOW ( sb , dowDate ) ) return false;
// print like Feb 1 - Jan 28
if ( hasMonthDayRange &&
! printMonthDayRange (sb, ev, int3,all,numAll ) )
return false;
// gmtime assumes the time_t we give it is in utc time
// which i guess it is...
time_t ttt1 = int3->m_a;
// get timezone info
char useDST;
char tz = ev->m_address->getTimeZone(&useDST);
// sanity
if ( tz >= 25 ) { char *xx=NULL;*xx=0; }
// apply that to the time
ttt1 += 3600 * tz;
// now we also deal with DST too!
int32_t bonus = 0;
if ( useDST && getIsDST(ttt1,tz) ) bonus = 3600;
ttt1 += bonus;
// every wednesday
//sb->safePrintf("Every %s ",
// s_dnames[m_ptrs[0]->m_num-1]);
// endpoint
time_t ttt2 = int3->m_b + 3600 * tz + bonus;
// only if not same
if ( ttt2 != ttt1 ) {
// print each tod
sb->safePrintf(" from ");
// first time
printTOD ( sb , ttt1 );
// range
sb->safePrintf(" to ");
// endpoint
printTOD ( sb , ttt2 );
}
// else just know hte start time
else {
// print each tod
sb->safePrintf(" at ");
// first time
printTOD ( sb , ttt1 );
}
return true;
}
int32_t np = m_numPtrs;
// fake it if we are it
if ( np == 0 ) np = 1;
// ok, add date ptrs to do not print table
//HashTableX dnp;
//char dnpbuf[1024];
//dnp.set ( 4,0,32,dnpbuf,1024,false,0,"dnptbl");
//addDoNotPrintDates ( &dnp );
// we need to do the group # thing recursively i think
//int32_t groupNum = 0;
//setGroupNumRecursive ( this , &accMask , &groupNum );
//datetype_t accMask = 0;
// now move a single daynum telescope to after the month
for ( int32_t i = 0 ; i < numAll ; i++ ) {
if ( all[i]->m_type != DT_DAYNUM ) continue;
if ( all[i]->m_dateParent->m_type != DT_TELESCOPE ) continue;
// fix "7:30pm [[]] 2 [[]] SAT [[]] April 2011"
// find next month on right
int32_t k; for ( k = i+1 ; k < numAll ; k++ )
if ( all[k]->m_type == DT_MONTH ) break;
// skip if none
if ( k >= numAll ) continue;
// save it
Date *tmp = all[i];
// otherwise, put ourselves after it! shift down
for ( int32_t j = i ; j < k ; j++ )
all[j] = all[j+1];
// go after it
all[k] = tmp;
}
//
// mark some dates as redundant for printing purposes
//
// . fix "after 12a.m [[]] Thursday Weekly" from printing out
// "after midnight every Thursday daily"
bool recurringDow = false;
for ( int32_t i = 0 ; i < numAll ; i++ ) {
// get next ptr
Date *di = all[i];
// get parent
Date *dp = di->m_dateParent;
// is it a recurring dow?
if ( di->m_type == DT_DOW &&
dp &&
(dp->m_suppFlags & SF_RECURRING_DOW) ) {
recurringDow = true;
continue;
}
// if we are "daily" or "weekly" after a recurring dow,
// then do not print that!
if ( di->m_type == DT_EVERY_DAY && recurringDow )
di->m_flags |= DF_REDUNDANT;
// turn it off now
recurringDow = false;
}
//if ( m_a == 1668 && (m_type == DT_TELESCOPE) )
// log("hey");
// telescope ptrs
for ( int32_t i = 0 ; i < numAll ; i++ ) {
// get next ptr
Date *dp = all[i];
// fake it if we are it...
//if ( m_numPtrs > 0 ) dp = m_ptrs[i];
//else dp = this;
// skip if not core
if ( dp->m_numPtrs ) continue;
// print delim
//if ( m_type == DT_TELESCOPE && i>0) sb->safePrintf(" [[]] ");
// skip printing if unnecessary
if ( dp->m_flags & DF_REDUNDANT )
continue;
if ( (dp->m_flags & DF_CLOSE_DATE) && inHtml )
if ( ! sb->safePrintf("") )
return false;
//if ( dp == m_mostUniqueDatePtr && inHtml)
// sb->safePrintf("");
//Date *parent = NULL;
//if ( dp != this ) parent = this;
//Date *parent = dp->m_dateParent;
// print each element
if ( ! ::printDateElement ( dp , sb , words , this ) )
return false;
//if ( dp == m_mostUniqueDatePtr && inHtml )
// sb->safePrintf(" ");
if ( (dp->m_flags & DF_CLOSE_DATE) && inHtml )
if ( ! sb->safePrintf(" ") ) return false;
}
if ( numAll == 1024 ) {
if ( ! sb->safePrintf("... (truncated!) " ) )
return false;
}
/*
// end in assumed year
if ( m_flags & DF_ASSUMED_YEAR ) {
int32_t t1 = m_minPubDate;
// use 90 days instead of 365 since usually people will
// indicate the year if the date is so far out
int32_t t2 = t1 + 90*24*3600;
if ( inHtml ) sb->safePrintf("");
sb->safePrintf(" ** %s- ",ctime(&t1));
sb->safePrintf("%s",ctime(&t2));
if ( inHtml ) sb->safePrintf(" ");
}
*/
return true;
}
/*
bool Date::addDoNotPrintDates ( HashTableX *dnp ) {
datetype_t mask = 0;
mask |= DT_YEAR;
mask |= DT_MONTH;
mask |= DT_LIST_MONTH;
mask |= DT_RANGE_MONTH;
mask |= DT_RANGE_MONTHDAY;
mask |= DT_DAYNUM;
mask |= DT_RANGE_DAYNUM;
mask |= DT_LIST_DAYNUM;
mask |= DT_TOD;
mask |= DT_RANGE_TOD;
mask |= DT_LIST_TOD;
mask |= DT_DOW;
mask |= DT_RANGE_DOW;
mask |= DT_LIST_DOW;
// fix "Night every Tuesday evening from 6:00pm to 9pm" for
// nonamejustfriends.com
//if ( m_type != DT_TELESCOPE ) return true;
// if we have a daynum anywhere! do not print and dow info
datetype_t badTypes = 0;
// . if we have a single day like "Dec 5" (not ranges, then set this)
// . not part of a list or range
// . juvejazzgestival has:
// Fri and Sat 10 a.m. to 5 p.m. February 4th , 5th and 6th 2011
if ( m_flags & DF_HAS_ISOLATED_DAYNUM ) {
//badTypes |= DT_DOW;
badTypes |= DT_RANGE_DOW;
badTypes |= DT_LIST_DOW;
}
// fixes 9PM [[]] Sat, Apr 16 [[]] Sat, Apr 16 2011 - 9pm-3am
// so it does not print the first 9PM
if ( m_hasType & DT_RANGE_TOD )
badTypes |= DT_TOD;
// fix "Night every Tuesday evening from 6:00pm to 9pm" for
// nonamejustfriends.com
if ( m_hasType & DT_TOD )
badTypes |= DT_SUBDAY;
if ( (m_hasType & DT_DAYNUM) &&
!(m_hasType & DT_RANGE_DAYNUM) &&
!(m_hasType & DT_RANGE_MONTHDAY) ) {
badTypes |= DT_SUBDAY;
badTypes |= DT_SUBWEEK;
badTypes |= DT_SUBMONTH;
badTypes |= DT_EVERY_DAY;
badTypes |= DT_ALL_HOLIDAYS;
}
datetype_t accTypes = 0;
// scan date ptrs of di a
for ( int32_t j = 0 ; j < m_numPtrs ; j++ ) {
// breathe
//QUICKPOLL ( m_niceness );
// int16_tcut
Date *dj = m_ptrs[j];
// dedup
datetype_t rmTypes = accTypes ;
// mask out DT_COMPOUND, etc.
rmTypes &= mask;
// don't mask these, they are pre-masked
rmTypes |= badTypes;
// do not print those
if ( ! dj->addDoNotPrintRecursive ( rmTypes , dnp ) )
return false;
// do not add if was not a telescope!
if ( m_type != DT_TELESCOPE ) continue;
// dedup these types
accTypes |= dj->m_hasType;
// expand types
if (accTypes & DT_DAYNUM) accTypes|=DT_RANGE_DAYNUM;
if (accTypes & DT_DAYNUM) accTypes|=DT_LIST_DAYNUM;
if (accTypes & DT_DAYNUM) accTypes|=DT_RANGE_MONTHDAY;
if (accTypes & DT_MONTH ) accTypes|=DT_RANGE_MONTH;
if (accTypes & DT_MONTH ) accTypes|=DT_LIST_MONTH;
}
return true;
}
// returns false and sets g_errno on error
bool Date::addDoNotPrintRecursive (datetype_t dt, HashTableX *dnp) {
// if nothing, skip
if ( dt == 0 ) return true;
// skip for now
//return true;
// int16_tcut
int32_t key = (int32_t)this;
// ranges actually have ptrs, so check for them up top...
if ( m_type & dt ) return dnp->addKey ( &key );
// . stop on tod ranges...
// . fixes 9PM [[]] Sat, Apr 16 [[]] Sat, Apr 16 2011 - 9pm-3am
// so it doesn't nuke its tod range!
if ( m_type == DT_RANGE_TOD ) return true;
// fix
// On the first Tues of each month, the talks are given at the
// Jonson Gallery. Tues-Fri 9am-4pm, Sun 1-4pm
// normalizes to "every 1st Tuesday 9 a.m. to 4 p.m. 1 p.m. to 4 p.m."
// because it nuked our dows, so fix that!
if ( m_type == DT_RANGE_DOW ) return true;
// just to be safe, skip any range now
if ( m_type & DT_RANGE_ANY ) return true;
if ( m_type & DT_COMPOUND ) return true;
for ( int32_t j = 0 ; j < m_numPtrs ; j++ ) {
// breathe
//QUICKPOLL ( m_niceness );
// int16_tcut
Date *dj = m_ptrs[j];
// test
if ( ! dj->addDoNotPrintRecursive ( dt , dnp ) ) return false;
}
if ( m_numPtrs > 0 ) return true;
// we are it otherwise
if ( m_type & dt ) return dnp->addKey ( &key );
return true;
}
*/
bool printDateElement ( Date *dp , SafeBuf *sb , Words *words ,
Date *fullDate ) {
// . skip if no printing!
// . this is because it is redundant information and confuses
// the turks and those users viewing the event cached page
//if ( dp->m_flags & DF_DONOTPRINT ) return true;
// int16_tcut
//int32_t key = (int32_t)dp;
// skip if do not print is basically set
//if ( dnp->isInTable ( &key ) ) return true;
//for ( int32_t i = 0 ; i < dp->m_numPtrs ; i++ )
// if (!printDateElement(dp->m_ptrs[i],sb,words,dp,dnp,fullDate))
// return false;
if ( dp->m_numPtrs ) return true;
//nodeid_t *tids = words->getTagIds();
char **wptrs = words->getWords();
int32_t *wlens = words->getWordLens ();
Date *parent = dp->m_dateParent;
// preceed with space i guess
//if ( ! sb->pushChar(' ') ) return false;
char *prefix = NULL;
// sometimes we got a compound range, so check parent of parent
// we should set m_dateParents right then, last thing we do...
Date *parent2 = parent;
Date *lastdp2 = dp;
Date *dp2 = dp;
for ( ; parent2 ; parent2 = parent2->m_dateParent ) {
if ( parent2->m_type & DT_RANGE_ANY ) break;
// skip if same
//if ( parent2 == parent ) continue;
// if ever not the first of its parent, do not print " thru "
// . we need the first element of the 2nd in the range
//if ( parent2->m_ptrs[0] != dp ) { lastdp2 = NULL; break; }
if ( parent2->m_ptrs[0] != dp2 ) lastdp2 = NULL;
// up this too
dp2 = dp2->m_dateParent;
}
// same for lists!!
Date *parent3 = parent;
Date *lastdp3 = dp;
Date *dp3 = dp;
for ( ; parent3 ; parent3 = parent3->m_dateParent ) {
if ( parent3->m_type & DT_LIST_ANY ) break;
// if ever not the first of its parent, do not print " thru "
// . we need the first element of the 2nd in the range
//if ( parent3->m_ptrs[0] != dp ) { lastdp3 = NULL; break; }
if ( parent3->m_ptrs[0] != dp3 ) lastdp3 = NULL;
// up this too
dp3 = dp3->m_dateParent;
}
// range?
if ( parent2 &&
// any kind of range will do
(parent2->m_type & DT_RANGE_ANY) &&
// make sure there
lastdp2 &&
// . before the 2nd element
// . use m_a because ptrs will NOT match
lastdp2->m_a == parent2->m_ptrs[1]->m_a )
// print through operator
prefix = " through ";
// tod range?
if ( parent &&
// any kind of range will do
(parent->m_type & DT_RANGE_TOD) &&
// before the 2nd element
dp == parent->m_ptrs[1] )
// print through operator
prefix = " to ";
// " at 7pm"
if ( parent &&
// not the first!
parent->m_ptrs[0] != dp &&
// and single daynum not in range
!(parent->m_type & DT_RANGE_ANY) &&
// tods only for this
dp->m_type == DT_TOD )
prefix = " at ";
// " at 7pm"
else if ( parent &&
// are the first!
parent->m_ptrs[0] == dp &&
// and only! (must be telescope?)
parent->m_numPtrs == 1 &&
parent->m_dateParent && // m_type == DT_TELESCOPE &&
parent->m_dateParent->m_ptrs[0] != dp &&
// not the first telescope
//parent->m_dateParent &&
//parent->m_dateParent->m_ptrs[0] != parent &&
// tods only for this
dp->m_type == DT_TOD )
prefix = " at ";
// open-ended ongoing ranges
if ( dp->m_flags & DF_ONGOING ) {
if ( dp->m_flags & DF_BEFORE_TOD )
prefix = " until ";
else
prefix = " after ";
}
//else if ( dp->m_type == DT_TOD ) {
// log("hey");
// //char *xx=NULL;*xx=0;
//}
// could be a compound that is then part of a list!
// if your parent is a list print a ", " or " and "
// before printing the date element
if ( parent3 &&
// we are in a list
( parent3->m_type & DT_LIST_ANY ) &&
// make sure there
lastdp3 &&
// . if we are last in list...
// . use m_a because ptrs will NOT match
lastdp3->m_a == parent3->m_ptrs[parent3->m_numPtrs-1]->m_a ) {
prefix = " and ";
}
else if ( parent3 &&
// we are in a list
( parent3->m_type & DT_LIST_ANY ) &&
// make sure there
lastdp3 &&
// . if we are last in list...
// . use m_a because ptrs will NOT match
lastdp3->m_a != parent3->m_ptrs[0]->m_a ) {
prefix = ", ";
}
// are we first month in a list of months?
if ( parent &&
parent->m_type == DT_LIST_MONTH &&
dp->m_type == DT_MONTH &&
parent->m_ptrs[0] == dp )
prefix = " in ";
// "every Friday 12:05pm to 1pm in January"
// if printing a month like January and a day is not on left or
// right of us...
if ( dp->m_type == DT_MONTH &&
// not first
parent &&
parent->m_ptrs[0] != dp &&
// and prev brother not a month
parent->m_ptrs[0]->m_type != DT_MONTH &&
// and no daynum present
!(fullDate->m_hasType & DT_DAYNUM) )
prefix = " in ";
// default
if ( ! prefix ) prefix = " ";
if ( ! sb->safePrintf("%s", prefix) ) return false;
// a month? make it full name
if ( dp->m_type == DT_MONTH ) {
int32_t mi = dp->m_month;
if ( mi < 0 || mi >= 13 ) { char *xx=NULL;*xx=0; }
if ( dp->m_suppFlags & SF_MID )
if ( ! sb->safePrintf("mid-") )
return false;
if ( ! sb->safePrintf("%s",s_mnames[mi-1]) )
return false;
}
// normalize day names
else if ( dp->m_type == DT_DOW ) {
if ( ! printDOW ( sb , dp ) ) return false;
}
// a tod?
else if ( dp->m_type == DT_TOD ) {
// print it out
return printTOD ( sb , dp->m_tod );
}
// a daynum?
else if ( dp->m_type == DT_DAYNUM ) {
int32_t dn = dp->m_dayNum;
char *suffix = "th";
if ( dn == 1 || dn == 21 || dn == 31 )
suffix = "st";
if ( dn == 2 || dn == 22 )
suffix = "nd";
if ( dn == 3 || dn == 23 )
suffix = "rd";
if ( ! sb->safePrintf("%"INT32"%s",dn,suffix) ) return false;
}
// year
else if ( dp->m_type == DT_YEAR ) {
if ( ! sb->safePrintf("%"INT32"",dp->m_year ) ) return false;
}
else if ( dp->m_type == DT_EVERY_DAY ) {
if ( ! sb->safePrintf("daily" ) ) return false;
}
// nights mornings evening afternoon...
else if ( dp->m_type == DT_SUBDAY ) {
return true;
}
else if ( dp->m_type == DT_SUBWEEK ) {
if ( dp->m_num == HD_TTH ) {
if ( ! sb->safePrintf("Tuesday & Thursday" ) )
return false;
}
else if ( dp->m_num == HD_MW ) {
if ( ! sb->safePrintf("Monday & Wednesday" ) )
return false;
}
else if ( dp->m_num == HD_MWF ) {
if ( !sb->safePrintf("Monday, Wednesday, Friday"))
return false;
}
else if ( dp->m_num == HD_WEEKENDS ) {
if ( ! sb->safePrintf("Weekends"))
return false;
}
else if ( dp->m_num == HD_WEEKDAYS ) {
if ( !sb->safePrintf("Weekdays"))
return false;
}
else { char *xx=NULL;*xx=0; }
}
// what is this???? summers, weekends...
else {
char *s = wptrs[dp->m_a];
int32_t len = wptrs[dp->m_b-1] - wptrs[dp->m_a] +wlens[dp->m_b-1];
if ( ! sb->safeMemcpy( s , len ) ) return false;
}
// print groupnumright before then
//if(! sb->safePrintf("%"INT32" ",dp->m_groupNum) ) return false;
return true;
}
void Date::print ( SafeBuf *sbArg ,
Sections *ss ,
Words *words ,
int32_t siteHash ,
int32_t num ,
Date *best ,
Dates *dates ) {
// use this
SafeBuf tmp;
SafeBuf *sb = sbArg;
if ( ! sbArg ) sb = &tmp;
// bold strings
char *b1 = "";
char *b2 = "";
if ( this == best ) { b1 = ""; b2 = " "; }
char *f1 = "";
char *f2 = "";
// were we part of a compound date or whatever?
if ( ! ( m_flags & DF_NOTKILLED ) ) {
f1 = ""; f2 = " "; }
// show it
if ( sbArg )
sb->safePrintf("\n"
// tell diff to ignore
"%s%s#%"INT32"%s%s \n"
"%"INT32" "
"%"INT32" " ,
f1,b1,num,b2,f2,
m_a,m_b );
else
sb->safePrintf("%s#%"INT32"%s | "
"%"INT32" | "
"%"INT32" | " ,
b1,num,b2,
m_a,m_b );
// show the text of the date
if ( sbArg ) sb->safePrintf("");
if ( m_flags & DF_INVALID ) sb->safePrintf("");
sb->safePrintf("%s",f1);
if ( m_a >= 0 && m_b >= 0 ) {
printText ( sb , words );
}
else
sb->safePrintf("???");
sb->safePrintf("%s",f2);
if ( m_flags & DF_INVALID ) sb->safePrintf(" ");
// end in assumed year
//if ( m_flags & DF_ASSUMED_YEAR )
// sb->safePrintf(" ** %"INT32"",m_year);
if ( sbArg ) sb->safePrintf(" ");
else sb->safePrintf(" | ");
// timestamp
struct tm *timeStruct = localtime ( &m_timestamp );
// assume numeric timestamps are already in UTC?
if ( m_type == DT_TIMESTAMP )
timeStruct = gmtime ( &m_timestamp );
char time[256];
strftime ( time , 256 , "%b %e %T %Y UTC", timeStruct );
if ( m_timestamp == 0 ) strcpy(time,"---");
//TimeZone *tzPtr = m_pubDateTimeZones[i];
//char *tzStr = " ";
//if ( tzPtr ) tzStr = tzPtr->m_name;
// some other junk
if ( sbArg ) {
sb->safePrintf("%s%"INT32"%s " // score
"%s " // timetamp
"%s ",
b1,-1 * m_penalty,b2,
time,
"---");//tzStr);
}
else
sb->safePrintf("%s%"INT32"%s | " // score
"%s | " // timetamp
// timezone
"%s"
,
b1,-1 * m_penalty,b2,
time,
"---");//tzStr);
// sentence id
//sb->safePrintf("%"INT32" ",m_sentenceId);
// datehash64
sb->safePrintf("%"UINT64" ",m_dateHash64);
// . tag hash
// . turkTagHash is for date elements really
sb->safePrintf("%"UINT32" ",m_dateTypeAndTagHash32);//m_turkTagHash);
// flag row
sb->safePrintf("");
// print each flag
if ( m_headerCount )
sb->safePrintf("(hdrcnt=%"INT32") ",m_headerCount);
if ( m_tmph )
sb->safePrintf("(deduphash=0x%"XINT32") ",m_tmph);
if ( m_maxYearGuess )
sb->safePrintf("(maxyearguess=%"INT32") ",m_maxYearGuess);
if ( m_dowBasedYear )
sb->safePrintf("(dowbasedyear=%"INT32") ",m_dowBasedYear);
if ( m_flags & DF_DUP ) {
int32_t dupNum = dates->getDateNum(m_dupOf);
sb->safePrintf("dupof%"INT32" ",dupNum);
}
if ( m_flags & DF_SUB_DATE ) {
int32_t dnum = dates->getDateNum(m_subdateOf);
sb->safePrintf("subdateof%"INT32" ",dnum);
}
if ( m_flags & DF_EVENT_CANDIDATE )
sb->safePrintf("eventcandidate ");
if ( m_flags & DF_ASSUMED_YEAR )
sb->safePrintf("assumedyear ");
if ( m_flags & DF_YEAR_UNKNOWN )
sb->safePrintf("yearunkown ");
if ( m_flags & DF_STORE_HOURS )
sb->safePrintf("storehours ");
if ( m_flags & DF_SUBSTORE_HOURS )
sb->safePrintf("substorehours ");
if ( m_flags & DF_WEEKLY_SCHEDULE )
sb->safePrintf("weeklysched ");
if ( m_flags & DF_KITCHEN_HOURS )
sb->safePrintf("kitchenhours ");
if ( m_flags & DF_SCHEDULECAND )
sb->safePrintf("schedulecand ");
if ( m_flags & DF_TIGHT )
sb->safePrintf("tight ");
if ( m_flags & DF_INCRAZYTABLE )
sb->safePrintf("incrazytable ");
if ( m_flags & DF_TABLEDATEHEADERROW )
sb->safePrintf("tabledateheaderrow ");
if ( m_flags & DF_TABLEDATEHEADERCOL )
sb->safePrintf("tabledateheadercol ");
if ( m_flags & DF_IN_LIST )
sb->safePrintf("indatelist ");
if ( m_flags & DF_FIRST_IN_LIST )
sb->safePrintf("firstindatelist ");
if ( m_flags & DF_REDUNDANT )
sb->safePrintf("redundant ");
if ( m_flags & DF_HAS_ISOLATED_DAYNUM )
sb->safePrintf("hasisolateddaynum ");
if ( m_flags & DF_IN_CALENDAR )
sb->safePrintf("incalendar ");
if ( m_calendarSection )
sb->safePrintf("incalendarsection ");
if ( m_flags & DF_REGISTRATION )
sb->safePrintf("registration ");
if ( m_flags & DF_NONEVENT_DATE )
sb->safePrintf("noneventdate ");
if ( m_flags5 & DF5_IGNORE )
sb->safePrintf("ignore ");
if ( m_flags & DF_ONOTHERPAGE )
sb->safePrintf("onotherpage ");
//if ( m_flags & DF_UPSIDEDOWN )
// sb->safePrintf("upsidedown ");
//if ( m_flags & DF_GOOD_EVENT_DATE )
// sb->safePrintf("goodeventdate ");
if ( m_flags & DF_HAS_WEAK_DOW )
sb->safePrintf("hasweakdow ");
if ( m_flags & DF_HAS_STRONG_DOW )
sb->safePrintf("hasstrongdow ");
//if ( m_flags & DF_IN_VERTICAL_LIST )
// sb->safePrintf("inverticallist ");
if ( m_flags & DF_INVALID )
sb->safePrintf("invalid ");
//if ( m_flags & DF_BAD_ENDING )
// sb->safePrintf("badending ");
//if ( m_flags & DF_IN_SAME_SENTENCE )
// sb->safePrintf("insamesentence ");
if ( m_flags & DF_CLOSE_DATE )
sb->safePrintf("closed ");
//if ( m_flags & DF_BAD_RECURRING_DOW )
// sb->safePrintf("badrecurringdow ");
if ( m_flags & DF_PUB_DATE )
sb->safePrintf("pubdate ");
//if ( m_flags & DF_COMMENT_DATE )
// sb->safePrintf("commentdate ");
// not allowed to be recurring
//if ( m_flags & DF_FUNERAL_DATE )
// sb->safePrintf("funeraldate ");
if ( m_suppFlags & SF_RECURRING_DOW )
sb->safePrintf("recurringdow ");
//if ( m_suppFlags & SF_DOW_IN_TITLE )
// sb->safePrintf("dowintitle ");
if ( m_suppFlags & SF_PLURAL )
sb->safePrintf("plural ");
if ( m_suppFlags & SF_NON )
sb->safePrintf("non ");
if ( m_suppFlags & SF_MID )
sb->safePrintf("mid ");
if ( m_suppFlags & SF_EVERY )
sb->safePrintf("every ");
if ( m_suppFlags & SF_FIRST )
sb->safePrintf("first ");
if ( m_suppFlags & SF_LAST )
sb->safePrintf("last ");
if ( m_suppFlags & SF_SECOND )
sb->safePrintf("second ");
if ( m_suppFlags & SF_THIRD )
sb->safePrintf("third ");
if ( m_suppFlags & SF_FOURTH )
sb->safePrintf("fourth ");
if ( m_suppFlags & SF_FIFTH )
sb->safePrintf("fifth ");
if ( m_suppFlags & SF_HAD_AMPM )
sb->safePrintf("hadampm ");
if ( m_suppFlags & SF_PM_BY_LIST )
sb->safePrintf("pmbylist ");
if ( m_suppFlags & SF_MILITARY_TIME )
sb->safePrintf("militarytime ");
if ( m_suppFlags & SF_IMPLIED_AMPM )
sb->safePrintf("impliedampm ");
if ( m_suppFlags & SF_NIGHT )
sb->safePrintf("night ");
if ( m_suppFlags & SF_AFTERNOON )
sb->safePrintf("afternoon ");
if ( m_suppFlags & SF_MORNING )
sb->safePrintf("morning ");
if ( m_suppFlags & SF_ON_PRECEEDS )
sb->safePrintf("onpreceeds ");
if ( m_suppFlags & SF_SPECIAL_TOD )
sb->safePrintf("specialtod ");
//if ( m_suppFlags & SF_NON_FUZZY )
// sb->safePrintf(" ");
if ( m_flags & DF_COPYRIGHT )
sb->safePrintf("copyright ");
if ( m_flags & DF_IN_HYPERLINK )
sb->safePrintf("inhyperlink ");
if ( m_flags & DF_ONGOING )
sb->safePrintf("ongoing ");
if ( m_flags & DF_AFTER_TOD )
sb->safePrintf("aftertod ");
if ( m_flags & DF_BEFORE_TOD )
sb->safePrintf("beforetod ");
if ( m_flags & DF_EXACT_TOD )
sb->safePrintf("exacttod ");
if ( m_tableCell ) {
sb->safePrintf("tablesec=0x%"PTRFMT" ",
(PTRTYPE)m_tableCell->m_tableSec);
sb->safePrintf("row=%"INT32" ",m_tableCell->m_rowNum);
sb->safePrintf("col=%"INT32" ",m_tableCell->m_colNum);
}
if ( m_flags & DF_LEFT_BOOKEND )
sb->safePrintf("leftbookend ");
if ( m_flags & DF_RIGHT_BOOKEND )
sb->safePrintf("rightbookend ");
if ( m_flags & DF_HARD_LEFT )
sb->safePrintf("hardleft ");
if ( m_flags & DF_HARD_RIGHT )
sb->safePrintf("hardright ");
if ( m_flags & DF_FUZZY )
sb->safePrintf("fuzzy ");
if ( m_flags & DF_USEDASHEADER )
sb->safePrintf("usedasheader ");
//if ( m_flags & DF_GOOD_EVENT_DATE )
// sb->safePrintf("goodeventdate ");
//if ( m_flags & DF_NEEDMOREINFO )
// sb->safePrintf("needmoreinfo"
// " ");
if ( m_flags & DF_OFFICIAL )
sb->safePrintf("officialtime ");
if ( m_flags & DF_CLOCK )
sb->safePrintf("clock ");
if ( m_flags & DF_NOTCLOCK )
sb->safePrintf("notclock ");
// . get the flags from section voting from datedb list
// . Section::m_voteFlags are set in Sections.cpp
Section *sn = NULL;
if ( m_flags & DF_FROM_BODY )
// use m_a not m_b...?
sn = ss->m_sectionPtrs[m_a];
// should be -1 if no voters!
//float v1 = dates->m_osvt->getScore ( sn , SV_CLOCK );
//float n1 = dates->m_osvt->getNumSampled ( sn , SV_CLOCK );
//if ( n1 > 0 )
// sb->safePrintf("sec_clock(%.02f[%f]) ",v1,n1);
if ( m_hasType & DT_TOD )
sb->safePrintf("TOD ");
if ( m_hasType & DT_DAYNUM )
sb->safePrintf("DAYNUM ");
if ( m_hasType & DT_MONTH )
sb->safePrintf("MONTH ");
if ( m_hasType & DT_YEAR )
sb->safePrintf("YEAR ");
if ( m_hasType & DT_DOW ) // day of week
sb->safePrintf("DOW ");
//if ( m_hasType & DT_MOD ) // "first" "last" "second"
// sb->safePrintf("MOD ");
if ( m_hasType & DT_HOLIDAY )
sb->safePrintf("HOLIDAY ");
if ( m_hasType & DT_SUBDAY )
sb->safePrintf("SUBDAY ");
if ( m_hasType & DT_SUBWEEK )
sb->safePrintf("SUBWEEK ");
if ( m_hasType & DT_SUBMONTH )
sb->safePrintf("SUBMONTH ");
if ( m_hasType & DT_EVERY_DAY )
sb->safePrintf("EVERYDAY ");
if ( m_hasType & DT_SEASON )
sb->safePrintf("SEASON ");
if ( m_hasType & DT_ALL_HOLIDAYS )
sb->safePrintf("ALLHOLIDAYS ");
if ( m_hasType & DT_TIMESTAMP )
sb->safePrintf("TIMESTAMP ");
if ( m_hasType & DT_RANGE )
sb->safePrintf("RANGE ");
if ( m_hasType & DT_RANGE_YEAR )
sb->safePrintf("RANGEYEAR ");
if ( m_hasType & DT_RANGE_TOD )
sb->safePrintf("RANGETOD ");
if ( m_hasType & DT_RANGE_DOW )
sb->safePrintf("RANGEDOW ");
if ( m_hasType & DT_RANGE_TIMEPOINT )
sb->safePrintf("RANGETIMEPOINT ");
if ( m_hasType & DT_RANGE_DAYNUM )
sb->safePrintf("RANGEDAYNUM ");
if ( m_hasType & DT_RANGE_MONTHDAY )
sb->safePrintf("RANGEMONTHDAY ");
if ( m_hasType & DT_LIST_DAYNUM )
sb->safePrintf("LISTDAYNUM ");
if ( m_hasType & DT_LIST_TOD )
sb->safePrintf("LISTTOD ");
if ( m_hasType & DT_LIST_DOW )
sb->safePrintf("LISTDOW ");
if ( m_hasType & DT_LIST_MONTH )
sb->safePrintf("LISTMONTH ");
if ( m_hasType & DT_LIST_MONTHDAY )
sb->safePrintf("LISTMONTHDAY ");
if ( m_hasType & DT_LIST_OTHER )
sb->safePrintf("LISTOTHER ");
if ( m_hasType & DT_COMPOUND )
sb->safePrintf("COMPOUND ");
if ( m_hasType & DT_TELESCOPE )
sb->safePrintf("TELESCOPE ");
if ( m_flags & DF_FUTURE )
sb->safePrintf("infuture ");
if ( m_flags & DF_ESTIMATED )
sb->safePrintf("estimated ");
//if ( m_flags & DF_INHYPERLINK )
// sb->safePrintf("inhyperlink ");
if ( m_flags & DF_FROM_BODY )
sb->safePrintf("frombody ");
if ( m_flags & DF_FROM_URL )
sb->safePrintf("fromurl ");
//if ( m_flags & DF_FROM_RSS )
// sb->safePrintf("fromrss ");
if ( m_flags & DF_FROM_RSSINLINK )
sb->safePrintf("rssinlink ");
if ( m_flags & DF_FROM_RSSINLINKLOCAL )
sb->safePrintf("rssinlinklocal ");
if ( m_flags & DF_FROM_META )
sb->safePrintf("frommeta ");
if ( m_flags & DF_UNIQUETAGHASH )
sb->safePrintf("uniquetaghash ");
else
sb->safePrintf("repeatedtaghash ");
if ( m_flags & DF_AMBIGUOUS )
sb->safePrintf("ambiguous ");
if ( m_flags & DF_AMERICAN )
sb->safePrintf("american ");
if ( m_flags & DF_EUROPEAN )
sb->safePrintf("european ");
//float v2 = dates->m_osvt->getScore ( sn , SV_EURDATEFMT );
//float n2 = dates->m_osvt->getNumSampled ( sn , SV_EURDATEFMT );
//if ( n2 > 0 )
// sb->safePrintf("sec_european(%.02f[%f]) ",v2,n2);
if ( m_flags & DF_MONTH_NUMERIC )
sb->safePrintf("monthnumeric ");
// this means we did not find something like "1:33 pm"
// near the date!
if ( m_flags & DF_NOTIMEOFDAY )
sb->safePrintf("notimeofday ");
if ( m_flags & DF_MATCHESURLDAY )
sb->safePrintf("matchesurlday ");
if ( m_flags & DF_MATCHESURLMONTH )
sb->safePrintf("matchesurlmonth ");
if ( m_flags & DF_MATCHESURLYEAR )
sb->safePrintf("matchesurlyear ");
if ( m_flags & DF_INBADTAG )
sb->safePrintf("inbadtag ");
if ( m_flags & DF_BEFORE1970 )
sb->safePrintf("before1970 ");
//if ( m_flags & DF_HAS_YEAR )
// sb->safePrintf("hasyear ");
if ( m_flags & DF_CANONICAL )
sb->safePrintf("canonical ");
if ( m_dayNum >= 0 )
sb->safePrintf("daynum=%"INT32" ",(int32_t)m_dayNum);
if ( m_minDayNum < 32 )
sb->safePrintf("mindaynum=%"INT32" ",(int32_t)m_minDayNum);
if ( m_maxDayNum > 0 )
sb->safePrintf("maxdaynum=%"INT32" ",(int32_t)m_maxDayNum);
if ( m_month >= 0 )
sb->safePrintf("month=%"INT32" ",(int32_t)m_month);
if ( m_tod >= 0 )
sb->safePrintf("tod=%"INT32" ",(int32_t)m_tod);
if ( m_minTod < 30*3600 )
sb->safePrintf("mintod=%"INT32" ",(int32_t)m_minTod);
if ( m_maxTod > 0 )
sb->safePrintf("maxtod=%"INT32" ",(int32_t)m_maxTod);
if ( m_dowBits )
sb->safePrintf("dowbits=0x%"XINT32"[%"INT32"] ",
(uint32_t)((unsigned char)m_dowBits),
getNumBitsOn8(m_dowBits));
if ( m_minYear != 2050 )
sb->safePrintf("minyear=%"INT32" ",(int32_t)m_minYear);
if ( m_maxYear != 1900 )
sb->safePrintf("maxyear=%"INT32" ",(int32_t)m_maxYear);
sb->safePrintf("datehash=0x%"XINT64" ",m_dateHash64);
// make this
int64_t termId = hash64 ( m_clockHash , siteHash );
// mask it
termId &= TERMID_MASK;
if ( sbArg )
sb->safePrintf(" "
//"%"INT32" " // wordNum
"0x%08"XINT32" " // tagHash
"%"UINT32" " // occNum
"0x%08"XINT32" " // clockhash
"%"UINT64" " // termid
" \n" ,
m_tagHash ,
m_occNum ,
m_clockHash ,
termId );
else
sb->safePrintf("0x%08"XINT32" | " // tagHash
"%"UINT32" | " // occNum
"0x%08"XINT32" | " // clockhash
"%"UINT64"" // termid
"\n" ,
m_tagHash ,
m_occNum ,
m_clockHash ,
termId );
if ( ! sbArg )
fprintf(stdout,"%s",sb->getBufStart() );
}
// returns false if no dates
bool Dates::getDateOffsets ( Date *date ,
int32_t num ,
int32_t *dateStartOff ,
int32_t *dateEndOff ,
int32_t *dateSentStartOff ,
int32_t *dateSentEndOff ) {
//nodeid_t *tids = words->getTagIds();
char **wptrs = m_words->getWords();
int32_t *wlens = m_words->getWordLens ();
//if ( m_numPtrs == 0 && (m_flags & DF_CLOSE_DATE) )
//if ( (m_flags & DF_CLOSE_DATE) )
// sb->safePrintf("");
// assume none
*dateStartOff = -1;
*dateEndOff = -1;
//*dateSentStartOff = -1;
//*dateSentEndOff = -1;
char *docStart = m_words->m_words[0];
// assume this date
Date *dp = date;
// if not telescope...d one
if ( num > 0 ) {
if ( date->m_type != DT_TELESCOPE ) return false;
if ( num >= date->m_numPtrs ) return false;
// get next ptr
dp = date->m_ptrs[num];
}
char *p = wptrs[dp->m_a];
char *pend = wptrs[dp->m_b-1] + wlens[dp->m_b-1];
*dateStartOff = p - docStart;
*dateEndOff = pend - docStart;
return true;
/*
// . get sentence offsets
// . get section of the date
Section *sd = dp->m_section;
// scan up until sentence section
for ( ; sd ; sd = sd->m_parent )
if ( sd->m_flags & SEC_SENTENCE ) break;
char *s = wptrs[sd->m_a];
char *send = wptrs[sd->m_b-1] + wlens[sd->m_b-1];
*dateSentStartOff = s - docStart;
*dateSentEndOff = send - docStart;
return true;
*/
}
// . returns start time if we had a legit one
// . returns -1 and sets g_errno on error
// . returns -2 if no time found
// . "Shows at 4, 6 and 8pm on Friday and 1pm on Saturday with an "
// "introduction by Traga Rinpoche before the 6pm Friday show."
// . - http://www.dailylobo.com/index.php/calendar/event/9m6q6esn96icvmme44iu3h79cc?time=1252706400
// . also for movie times like " 11:45am, 2:20, 4:55, 7:30, 7:55, 10:05, 10:30"
// assume that all times with a colon are pm unless otherwise stated.
int32_t Dates::parseTimeOfDay3 ( Words *w ,
int32_t i ,
int32_t niceness ,
int32_t *endWordNum ,
TimeZone **tzPtr ,
bool monthPreceeds ,
// do we KNOW if it was am or pm?
bool *hadAMPM ,
bool *hadMinute ,
bool *isMilitary ) {
int32_t nw = w->getNumWords();
char **wptrs = w->getWords ();
int32_t *wlens = w->getWordLens();
nodeid_t *tids = w->m_tagIds;
int64_t *wids = w->getWordIds();
// save it
int32_t savei = i;
// must start with a number
//if ( ! is_digit(wptrs[i][0]) ) {
// if ( m_wids[i] != h_noon &&
// m_wids[i] != h_midnight ) { char *xx=NULL;*xx=0; }
//}
// if length is two both must be digit ("9p"?)
if ( wlens[i]>=2 &&
! is_digit(wptrs[i][1]) &&
to_lower_a(wptrs[i][1]) != 'p' &&
to_lower_a(wptrs[i][1]) != 'a' )
return -2;
// 5+ is bad ("12pm" is like the biggest?)
if ( wlens[i] >= 5 ) return -2;
// get as number
int32_t hour = w->getAsLong(i);
// . must be valid hour
// . allow for 00:29:00 GMT like for trumba.com!
if ( hour < 0 || hour > 24 ) return -2;
// are we military time?
if ( hour > 12 ) *isMilitary = true;
else *isMilitary = false;
// starting with 0 is military. oh-six-hundred.
if ( wptrs[i][0] == '0' ) *isMilitary = true;
// this will need to be true
bool gotIt = false;
// assume no minute follows
bool hadMin = false;
bool hadPeriod = false;
// a period after the number?
int32_t numDigits = 0;
if ( numDigits == 0 && is_digit(wptrs[i][0]) ) numDigits++;
if ( numDigits == 1 && is_digit(wptrs[i][1]) ) numDigits++;
if ( numDigits == 2 && is_digit(wptrs[i][2]) ) numDigits++;
if ( numDigits >= 3 ) return -2;
// if a minute follows, must be like 04:32
int32_t minute = 0;
// support ceder.net's "6:30 - 8.00PM" !!!! allow periods
if ( i+2getAsLong(i+2);
if ( minute < 0 || minute > 59 ) return -2;
// must be at least two chars
if ( wlens[i+2] < 2 ) return -2;
// flag this
if ( wptrs[i][numDigits]=='.' )
hadPeriod = true;
// point to the minute
i += 2;
// flag it
hadMin = true;
// this too
*hadMinute = true;
}
// does a second follow? "19:35:12 GMT"
int32_t sec = 0;
if ( i+2getAsLong(i+2);
if ( sec < 0 || sec > 59 ) return -2;
// point to the second
i += 2;
}
// timezone unknown at this point
if ( tzPtr ) *tzPtr = NULL;
// . assume end of it. point to word after that last number
// . we kinda have to stop here because our quest for finding an "am"
// or "pm" often takes us over another time, because we are part of
// a time range like "9 - 11am".
*endWordNum = i + 1;
// is it pm? am?
bool isPM = false;
bool isAM = false;
bool isMil = false;
// int16_tcuts
//int64_t h_and = hash64b("and");
//int64_t h_to = hash64b("to");
//int64_t h_noon = hash64b("noon");
//int64_t h_midnight = hash64b("midnight");
// does "noon" follow the number, which must be 12
if ( i + 2 < nw && hour == 12 && minute==0 && wids[i+2]==h_noon ) {
// it is legit
gotIt = true;
// assume am
//isAM = true;
isPM = true;
// update the end of it
*endWordNum = i + 3;
}
if ( i + 2 < nw && hour == 12 && minute==0 && wids[i+2]==h_midnight){
// it is legit
gotIt = true;
// assume pm
isAM = false;
// update the end of it
*endWordNum = i + 3;
// make hour 24
hour = 24;
}
// limit am/pm scan to 10 words
int32_t kmax = i + 10;
if ( kmax > nw ) kmax = nw;
char *s;
// falg
bool hadCrap = false;
bool hadRangeIndicator = false;
bool hadTODAfter = false;
bool lastPunctWordHadJunk = false;
bool hadTag = false;
// count words after the numeric time stuff. looking for am or a. m.
// etc. kinda cruft
//int32_t additional = 0;
//int32_t hadPunct = 0;
int32_t scannedHours = 0;
int32_t followingNum = -1;
// flag init
//int32_t end = -1;
// start with current word, it might have "pm" in it as substr
for ( int32_t k = savei ; k < kmax ; k++ ) {
// breathe
QUICKPOLL(niceness);
// stop if a tag. no some crappy pages have tags between
// the "1" and the "pm" !! maybe a tag??? could be.
if ( tids[k] ) {
// treat a tag as a whitespace for our purposes
lastPunctWordHadJunk = false;
hadTag = true;
continue;//break;
}
// skip if not alnum word
if ( ! wids[k] ) {
// mark if we had a tod after
if ( wptrs[k][0]=='-' &&
followingNum == -1 )
hadRangeIndicator = true;
// if we had no colon (minute) after us but the
// following hour does, then reject us as a tod.
// fixes "Route 8 9:00 pm - 1:00 am" for
// http://www.guysndollsllc.com/page5/page4/page4.html
if ( wptrs[k][0]==':' &&
followingNum >= 1 &&
! hadMin &&
! hadRangeIndicator &&
// we must be preceeded by a space then another
// word... i.e. we are in a closer grouping to
// this other word than we are the following
// time of day...
i-2 >= 0 &&
w->isSpaces(i-1) &&
wids[i-2] )
return -2;
if ( wptrs[k][0]==':' &&
followingNum >= 0 &&
hadRangeIndicator )
hadTODAfter = true;
// . 1) a beautiful day
// . set lastPunctWordHadJunk for use below
char *p = wptrs[k];
char *pend = p + wlens[k];
lastPunctWordHadJunk = false;
for ( ; p < pend ; p++ ) {
QUICKPOLL(niceness);
if ( is_wspace_a(*p) ) continue;
lastPunctWordHadJunk = true;
break;
}
continue;
}
// skip if "and" (example: "1, 4 and 6pm")
if ( wids[k] == h_and ) {
//wids[k] == h_to ||
//wids[k] == h_through ) {
// set this to indicate what we print out. i.e.
// what string the time itself is
hadCrap = true;
continue;
}
if ( wids[k] == h_to ||
wids[k] == h_through ||
wids[k] == h_though || // misspelling
wids[k] == h_until ||
wids[k] == h_til ||
wids[k] == h_till ||
wids[k] == h_thru ) {
hadCrap = true;
hadRangeIndicator = true;
continue;
}
// did we have midnight?
// if we had something like 8:30 to midnight, assume we
// are pm then!
if ( wids[k] == h_midnight && ! isAM )
isPM = true;
if ( wids[k] == h_midday && ! isAM )
isPM = true;
if ( wids[k] == h_dusk && ! isAM )
isPM = true;
if ( wids[k] == h_sunset && ! isAM )
isPM = true;
if ( wids[k] == h_sundown && ! isAM )
isPM = true;
if ( wids[k] == h_dawn && ! isPM )
isAM = true;
if ( wids[k] == h_sunrise && ! isPM )
isAM = true;
// point to it
s = wptrs[k];
// and the end of it
char *send = wptrs[k] + wlens[k];
// if this is a number, count it
if ( is_digit(*s) && k >= *endWordNum ) {
// set this
hadCrap = true;
// get the number
int32_t num = w->getAsLong(k);
// stop if a year or something
if ( num < 0 || num >= 60 ) break;
// count it
scannedHours++;
// record it if first one
if ( followingNum == -1 ) followingNum = num;
}
// skip over all digits
for ( ; s < send && is_digit(*s) ; s++ ) ;
// if they were all digits, try try next word.
if ( s >= send ) continue;
// get first alpha char as lower case
char c = to_lower_a(*s);
// if first letter is not a or p, forget it (h is military hrs)
if ( c != 'a' && c != 'p' && c != 'h' ) break;
// . is the 'a' or 'p' part of the hour/minute (in same word?)
// . "1am" or "2p" or "3:30p" etc.
bool AMPMConnected = false;
if ( is_digit(wptrs[k][0]) ) AMPMConnected = true;
// . watch out for sentences starting with "A"
// . but do allow "1A "???
if ( *s == 'A' && is_wspace_utf8 (s+1) ) break;
// fix "33:30pm" for terrence wilson because it
// was thinking the "3" which was the monthday (April 3rd)
// was a tod! so don't cross tags AND another number when
// looking for the am pm
if ( followingNum >= 0 && hadTag )
break;
// skip the 'a' or 'p'
s++;
// use "t" to search for a following 'm'
char *t = s;
// skip period, if any
if ( *t == '.' ) t++;
// skip a space if any
if ( *t == ' ' ) t++;
// and another even
if ( *t == ' ' ) t++;
// check for the 'm' after the 'a' or 'p'
if ( to_lower_a(*t) != 'm' ) {
// . might have been "7:30p".
// . forget it if we had "7px" or something
if ( is_alnum_utf8(s) ) break;
// if a punct was between the tod and the 'a' or 'p'
// then do not allow it through! "1) a beautiful"
// for newyork.sa-people.com
if ( ! AMPMConnected && lastPunctWordHadJunk ) break;
// ok we got "1) a beautiful" or whatever..
// maybe require 't' to be non alpha unless
// its a month or something
//if ( ! is_alpha_utf8(t) ) goto skip;
// is it a month name?
//if ( k+2= 0 ) goto skip;
// all shucks, ignore the "a"
//break;
goto skip;
}
// skip the m
t++;
// must not be another alnum after that
if ( is_alnum_utf8 (t) ) break;
// update s to t to include the "m"
s = t;
// ok, we got it
skip:
gotIt = true;
// set the flag
if ( c == 'p' ) isPM = true;
else if ( c == 'a' ) isAM = true;
else if ( c == 'h' ) isMil = true; // military time. 7:00 h
// do not update endWordNum if we had alnum words that
// were not "am" or "pm" per se
if ( hadCrap ) break;
// reset this
int32_t qq;
// now identify the last word in our time because
// we must identify a range of words in the time.
for ( qq = *endWordNum ; qq < nw ; qq++ )
// if this word is passed our last char, stop
if ( wptrs[qq] >= s ) break;
// ok, we got it!
*endWordNum = qq;
// all done
break;
}
// . fix "11-1pm" for panjea.org url
// . and fix "11:30-1:30pm" ?
// . watch out for "12:30-2pm" though!
if ( scannedHours >= 1 && followingNum < hour && isPM && hour < 12 ) {
// swap these
isAM = true;
isPM = false;
}
// fix for "11:00-12:00 PM"
if ( scannedHours >= 1 && followingNum == 12 && isPM && hour < 12 ) {
// swap these
isAM = true;
isPM = false;
}
// a hack fix for "Daily 9-5:30"
if ( hadTODAfter ) {
hadMin = true;
*hadMinute = true;
}
// if had to skip a couple of words looking for am/pm and a month
// preceeded and no ":"... but if we had an am/pm then let it through!
// otherwise "April 10 11-1pm" fails to recognize "11" as a time of
// day since it has monthPreceeds set to true..
if ( monthPreceeds && ! hadMin && ! isAM && ! isPM )
return -2;
// or if we had to scan more than 1 additional hour to find the
// am/pm we are probably a day of the month. the count "scannedHours"
// includes ourselves
// . mdw: i made it scannedHours>=1 from scannedHours>1 to fix a
// trumba.com rss page date of "Friday, December 4, 1pm"
if ( monthPreceeds && ! hadMin && scannedHours >= 1 &&
// fix August 27, 5-8 p.m.
! hadRangeIndicator )
return -2;
// fix "9/20 3:00p" for www.when.com/albuquerque-nm/venues
if ( scannedHours >= 3 && ! hadMin )
return -2;
// update. but do not include any other numbers in our string!
// we need to allow the logic above to add in DT_RANGE_TOD date types
// and not do it here
//if ( gotIt && ampmFollows && scannedHours == 0 ) {
// // sanity check
// if ( additional < 0 ) { char *xx=NULL;*xx=0; }
// *endWordNum += additional + hadPunct - 1;
// // MDW hack fix for "4A"
// if ( *endWordNum == i ) *endWordNum = i+1;
//}
// we REQUIRE an am or pm, OR a MINUTE
if ( ! gotIt && ! hadMin ) return -2;
// . add in 12 hours if we are pm
// . crap, for "12 midnight" hour was already set to 24 above
if ( isPM && hour != 12 && hour != 24 ) hour += 12;
// . if we had "8:30" but no am or pm, assume "pm"
// . do not do this if in military time though! (hour>=12)
// . no, this messes up "Daily 9-5:30"
//if ( ! isAM && ! isPM && hadMin && hour < 12 ) hour += 12;
// . 12 am (midnight) is an exception
// . CAUTION: this exceeds the 24 hr clock!
if ( isAM && hour == 12 ) hour += 12;
// away with crap like "15 pm" (which gets made to hour 27)
if ( hour >= 25 ) return -2;
// 24:10 is not a valid time
//if ( hour == 24 && minute != 0 ) return -2;
// . if we had something, advance "i" over the "am" or "pm"
// . update i to point to where "s" left off
while ( gotIt && i+1 < nw && wptrs[i] < s ) i++;
// otherwise, just skip the hour/min/sec that we were ref'ing
if ( ! gotIt ) i++;
// . point to possible timezone
// . if we are on a punctuation word, skip that
if ( ! wids[i] && ! tids[i] ) i++;
// for ceder.net's "6:30 - 8.00PM" fix!
if ( hadPeriod && ! isAM && ! isPM && ! isMil ) return -2;
// now we should be pointing to the timezone
*tzPtr = NULL;
// skip if word is "at", not a good timezone!
//int64_t h_at = hash64("at",2);
// tzptr will be set to NULL if not recognized as a timezone
int32_t tznw = 0;
if ( wids[i] ) {
tznw = getTimeZoneWord ( i , wids, nw,tzPtr , m_niceness );
// return -1 with g_errno set on error
if ( tznw < 0 ) return -1;
// sanity
if ( tznw >= 25 ) { char *xx=NULL;*xx=0; }
}
// advance i over timezone, if we had one
if ( *tzPtr ) {
// sanity check
if ( tznw <= 0 ) { char *xx=NULL;*xx=0; }
// . update this too now!
// . tznw should be like 1 or 3, etc.
// . watch out for 9-5 EST, do not update end of "9" to EST
if ( followingNum < 0 ) *endWordNum = i + tznw;
// skip over it for next time
i++;
}
// make it
int32_t seconds = hour * 3600 + minute * 60;
// sanity check
if ( seconds < 0 ) { char *xx=NULL;*xx=0; }
// sanity check
if ( seconds > 25*3600 ) { char *xx=NULL;*xx=0; }
if ( isAM || isPM || isMil ) *hadAMPM = true;
else *hadAMPM = false;
if ( *isMilitary ) return seconds;
if ( *hadAMPM ) return seconds;
if ( m_contentType != CT_XML ) return seconds;
///////////
//
// BEGIN XML MILITARY TAG TIME CHECK
//
///////////
int32_t kmin = i - 20;
if ( kmin < 0 ) kmin = 0;
int32_t k = i - 1;
bool hitLeftTag = false;
for ( ; k >= kmin ; k-- ) {
// stop on tag word
if ( tids[k] ) { hitLeftTag = true; break; }
// skip punct words
if ( ! wids[k] ) continue;
// stop on alnum
if ( ! is_digit(wptrs[k][0]) ) break;
// make sure all are digits
if ( ! m_words->isNum ( k ) ) break;
// ok, it was just pure numbers, keep going
}
// if we hit a tag on the left, check our right
k = i + 1;
kmax = i + 20;
bool hitRightTag = false;
if ( ! hitLeftTag ) k = kmax;
for ( ; k < kmax ; k-- ) {
// stop on tag word
if ( tids[k] ) { hitRightTag = true; break; }
// skip punct words
if ( ! wids[k] ) continue;
// stop on alnum
if ( ! is_digit(wptrs[k][0]) ) break;
// make sure all are digits
if ( ! m_words->isNum ( k ) ) break;
// ok, it was just pure numbers, keep going
}
// if we are an isolated pure number in an xml tag, assume military
// if we did not have an am/pm
if ( ! hitLeftTag ) return seconds;
if ( ! hitRightTag ) return seconds;
*isMilitary = true;
///////////
//
// END XML MILITARY TAG TIME CHECK
//
///////////
// return it
return seconds;
}
TimeZone tzs[] = {
{ "acdt" , 10, 30, 1 }, // ACDT, +10:30
{ "acst" , 9, 30, 1 }, // ACST, +9:30
{ "adt" , -3, 0, 1 }, // ADT, -3:00
{ "aedt" , 11, 0, 1 }, // AEDT, +11:00
{ "aest" , 10, 0, 1 }, // AEST, +10:00
{ "aft" , 4, 30, 1 }, // AFT, +4:30
{ "ahdt" , -9, 0, 1 }, // AHDT, -9:00 - historical?
{ "ahst" , -10, 0, 1 }, // AHST, -10:00 - historical?
{ "akdt" , -8, 0, 1 }, // AKDT, -8:00
{ "akst" , -9, 0, 1 }, // AKST, -9:00
{ "amst" , 4, 0, 1 }, // AMST, +4:00
{ "amt" , 4, 0, 1 }, // AMT, +4:00
{ "anast" , 13, 0, 1 }, // ANAST, +13:00
{ "anat" , 12, 0, 1 }, // ANAT, +12:00
{ "art" , -3, 0, 1 }, // ART, -3:00
{ "ast" , -4, 0, 1 }, // AST, -4:00
{ "at" , -1, 0, 1 }, // AT, -1:00
{ "awst" , 8, 0, 1 }, // AWST, +8:00
{ "azost" , 0, 0, 1 }, // AZOST, 0:00
{ "azot" , -1, 0, 1 }, // AZOT, -1:00
{ "azst" , 5, 0, 1 }, // AZST, +5:00
{ "azt" , 4, 0, 1 }, // AZT, +4:00
{ "badt" , 4, 0, 1 }, // BADT, +4:00
{ "bat" , 6, 0, 1 }, // BAT, +6:00
{ "bdst" , 2, 0, 1 }, // BDST, +2:00
{ "bdt" , 6, 0, 1 }, // BDT, +6:00
{ "bet" , -11, 0, 1 }, // BET, -11:00
{ "bnt" , 8, 0, 1 }, // BNT, +8:00
{ "bort" , 8, 0, 1 }, // BORT, +8:00
{ "bot" , -4, 0, 1 }, // BOT, -4:00
{ "bra" , -3, 0, 1 }, // BRA, -3:00
{ "bst" , 1, 0, 1 }, // BST, +1:00
{ "bt" , 6, 0, 1 }, // BT, +6:00
{ "btt" , 6, 0, 1 }, // BTT, +6:00
{ "cat" , 2, 0, 1 }, // CAT, +2:00
{ "cct" , 8, 0, 1 }, // CCT, +8:00
{ "cdt" , -5, 0, 1 }, // CDT, -5:00
{ "cest" , 2, 0, 1 }, // CEST, +2:00
{ "cet" , 1, 0, 1 }, // CET, +1:00
{ "chadt" , 13, 45, 1 }, // CHADT, +13:45
{ "chast" , 12, 45, 1 }, // CHAST, +12:45
{ "chst" , 10, 0, 1 }, // CHST, +10:00
{ "ckt" , -10, 0, 1 }, // CKT, -10:00
{ "clst" , -3, 0, 1 }, // CLST, -3:00
{ "clt" , -4, 0, 1 }, // CLT, -4:00
{ "cot" , -5, 0, 1 }, // COT, -5:00
{ "cst" , -6, 0, 1 }, // CST, -6:00
{ "ct" , -6, 0, 1 }, // CT, -6:00
{ "cut" , 0, 0, 2 }, // CUT, 0:00
{ "cxt" , 7, 0, 1 }, // CXT, +7:00
{ "davt" , 7, 0, 1 }, // DAVT, +7:00
{ "ddut" , 10, 0, 1 }, // DDUT, +10:00
{ "dnt" , 1, 0, 1 }, // DNT, +1:00
{ "dst" , 2, 0, 1 }, // DST, +2:00
{ "easst" , -5, 0, 1 }, // EASST -5:00
{ "east" , -6, 0, 1 }, // EAST, -6:00
{ "eat" , 3, 0, 1 }, // EAT, +3:00
{ "ect" , -5, 0, 1 }, // ECT, -5:00
{ "edt" , -4, 0, 1 }, // EDT, -4:00
{ "eest" , 3, 0, 1 }, // EEST, +3:00
{ "eet" , 2, 0, 1 }, // EET, +2:00
{ "egst" , 0, 0, 1 }, // EGST, 0:00
{ "egt" , -1, 0, 1 }, // EGT, -1:00
{ "emt" , 1, 0, 1 }, // EMT, +1:00
{ "est" , -5, 0, 1 }, // EST, -5:00
{ "et" , -5, 0, 1 }, // ET, -5:00
{ "fdt" , -1, 0, 1 }, // FDT, -1:00
{ "fjst" , 13, 0, 1 }, // FJST, +13:00
{ "fjt" , 12, 0, 1 }, // FJT, +12:00
{ "fkst" , -3, 0, 1 }, // FKST, -3:00
{ "fkt" , -4, 0, 1 }, // FKT, -4:00
{ "fst" , 2, 0, 1 }, // FST, +2:00
{ "fwt" , 1, 0, 1 }, // FWT, +1:00
{ "galt" , -6, 0, 1 }, // GALT, -6:00
{ "gamt" , -9, 0, 1 }, // GAMT, -9:00
{ "gest" , 5, 0, 1 }, // GEST, +5:00
{ "get" , 4, 0, 1 }, // GET, +4:00
{ "gft" , -3, 0, 1 }, // GFT, -3:00
{ "gilt" , 12, 0, 1 }, // GILT, +12:00
{ "gmt" , 0, 0, 2 }, // GMT, 0:00
{ "gst" , 10, 0, 1 }, // GST, +10:00
{ "gt" , 0, 0, 2 }, // GT, 0:00
{ "gyt" , -4, 0, 1 }, // GYT, -4:00
{ "gz" , 0, 0, 2 }, // GZ, 0:00
{ "haa" , -3, 0, 1 }, // HAA, -3:00
{ "hac" , -5, 0, 1 }, // HAC, -5:00
{ "hae" , -4, 0, 1 }, // HAE, -4:00
{ "hap" , -7, 0, 1 }, // HAP, -7:00
{ "har" , -6, 0, 1 }, // HAR, -6:00
{ "hat" , -2, -30, 1 }, // HAT, -2:30
{ "hay" , -8, 0, 1 }, // HAY, -8:00
{ "hdt" , -9, -30, 1 }, // HDT, -9:30
{ "hfe" , 2, 0, 1 }, // HFE, +2:00
{ "hfh" , 1, 0, 1 }, // HFH, +1:00
{ "hg" , 0, 0, 2 }, // HG, 0:00
{ "hkt" , 8, 0, 1 }, // HKT, +8:00
{ "hna" , -4, 0, 1 }, // HNA, -4:00
{ "hnc" , -6, 0, 1 }, // HNC, -6:00
{ "hne" , -5, 0, 1 }, // HNE, -5:00
{ "hnp" , -8, 0, 1 }, // HNP, -8:00
{ "hnr" , -7, 0, 1 }, // HNR, -7:00
{ "hnt" , -3, -30, 1 }, // HNT, -3:30
{ "hny" , -9, 0, 1 }, // HNY, -9:00
{ "hoe" , 1, 0, 1 }, // HOE, +1:00
{ "hours" , 0, 0, 2 }, // HOURS, no change, but indicates time
{ "hrs" , 0, 0, 2 }, // HRS, no change, but indicates time
{ "hst" , -10, 0, 1 }, // HST, -10:00
{ "ict" , 7, 0, 1 }, // ICT, +7:00
{ "idle" , 12, 0, 1 }, // IDLE, +12:00
{ "idlw" , -12, 0, 1 }, // IDLW, -12:00
{ "idt" , 3, 0, 1 }, // IDT, +3:00
{ "iot" , 5, 0, 1 }, // IOT, +5:00
{ "irdt" , 4, 30, 1 }, // IRDT, +4:30
{ "irkst" , 9, 0, 1 }, // IRKST, +9:00
{ "irkt" , 8, 0, 1 }, // IRKT, +8:00
{ "irst" , 4, 30, 1 }, // IRST, +4:30
{ "irt" , 3, 30, 1 }, // IRT, +3:30
{ "ist" , 1, 0, 1 }, // IST, +1:00
{ "it" , 3, 30, 1 }, // IT, +3:30
{ "ita" , 1, 0, 1 }, // ITA, +1:00
{ "javt" , 7, 0, 1 }, // JAVT, +7:00
{ "jayt" , 9, 0, 1 }, // JAYT, +9:00
{ "jst" , 9, 0, 1 }, // JST, +9:00
{ "jt" , 7, 0, 1 }, // JT, +7:00
{ "kdt" , 10, 0, 1 }, // KDT, +10:00
{ "kgst" , 6, 0, 1 }, // KGST, +6:00
{ "kgt" , 5, 0, 1 }, // KGT, +5:00
{ "kost" , 12, 0, 1 }, // KOST, +12:00
{ "krast" , 8, 0, 1 }, // KRAST, +8:00
{ "krat" , 7, 0, 1 }, // KRAT, +7:00
{ "kst" , 9, 0, 1 }, // KST, +9:00
{ "lhdt" , 11, 0, 1 }, // LHDT, +11:00
{ "lhst" , 10, 30, 1 }, // LHST, +10:30
{ "ligt" , 10, 0, 1 }, // LIGT, +10:00
{ "lint" , 14, 0, 1 }, // LINT, +14:00
{ "lkt" , 6, 0, 1 }, // LKT, +6:00
{ "magst" , 12, 0, 1 }, // MAGST, +12:00
{ "magt" , 11, 0, 1 }, // MAGT, +11:00
{ "mal" , 8, 0, 1 }, // MAL, +8:00
{ "mart" , -9, -30, 1 }, // MART, -9:30
{ "mat" , 3, 0, 1 }, // MAT, +3:00
{ "mawt" , 6, 0, 1 }, // MAWT, +6:00
{ "mdt" , -6, 0, 1 }, // MDT, -6:00
{ "med" , 2, 0, 1 }, // MED, +2:00
{ "medst" , 2, 0, 1 }, // MEDST, +2:00
{ "mest" , 2, 0, 1 }, // MEST, +2:00
{ "mesz" , 2, 0, 1 }, // MESZ, +2:00
{ "met" , 1, 0, 1 }, // MEZ, +1:00
{ "mewt" , 1, 0, 1 }, // MEWT, +1:00
{ "mex" , -6, 0, 1 }, // MEX, -6:00
{ "mht" , 12, 0, 1 }, // MHT, +12
{ "mmt" , 6, 30, 1 }, // MMT, +6:30
{ "mpt" , 10, 0, 1 }, // MPT, +10:00
{ "msd" , 4, 0, 1 }, // MSD, +4:00
{ "msk" , 3, 0, 1 }, // MSK, +3:00
{ "msks" , 4, 0, 1 }, // MSKS, +4:00
{ "mst" , -7, 0, 1 }, // MST, -7:00
//{ "mt" , 8, 30, 1 }, // MT, +8:30
{ "mt" , -7, 0, 1 }, // MORE LIKELY MOUNTAIN TIME, -7:00
{ "mut" , 4, 0, 1 }, // MUT, +4:00
{ "mvt" , 5, 0, 1 }, // MVT, +5:00
{ "myt" , 8, 0, 1 }, // MYT, +8:00
{ "nct" , 11, 0, 1 }, // NCT, +11:00
{ "ndt" , 2, 30, 1 }, // NDT, +2:30
{ "nft" , 11, 30, 1 }, // NFT, +11:30
{ "nor" , 1, 0, 1 }, // NOR, +1:00
{ "novst" , 7, 0, 1 }, // NOVST, +7:00
{ "novt" , 6, 0, 1 }, // NOVT, +6:00
{ "npt" , 5, 45, 1 }, // NPT, +5:45
{ "nrt" , 12, 0, 1 }, // NRT, +12:00
{ "nst" , -3, -30, 1 }, // NST, -3:30
{ "nsut" , 6, 30, 1 }, // NSUT, +6:30
{ "nt" , -11, 0, 1 }, // NT, -11:00
{ "nut" , -11, 0, 1 }, // NUT, -11:00
{ "nzdt" , 13, 0, 1 }, // NZDT, +13:00
{ "nzst" , 12, 0, 1 }, // NZST, +12:00
{ "nzt" , 12, 0, 1 }, // NZT, +12:00
{ "oesz" , 3, 0, 1 }, // OESZ, +3:00
{ "oez" , 2, 0, 1 }, // OEZ, +2:00
{ "omsst" , 7, 0, 1 }, // OMSST, +7:00
{ "omst" , 6, 0, 1 }, // OMST, +6:00
{ "pdt" , -7, 0, 1 }, // PDT, -7:00
{ "pet" , -5, 0, 1 }, // PET, -5:00
{ "petst" , 13, 0, 1 }, // PETST, +13:00
{ "pett" , 12, 0, 1 }, // PETT, +12:00
{ "pgt" , 10, 0, 1 }, // PGT, +10:00
{ "phot" , 13, 0, 1 }, // PHOT, +13:00
{ "pht" , 8, 0, 1 }, // PHT, +8:00
{ "pkt" , 5, 0, 1 }, // PKT, +5:00
{ "pmdt" , -2, 0, 1 }, // PMDT, -2:00
{ "pmt" , -3, 0, 1 }, // PMT, -3:00
{ "pnt" , -8, -30, 1 }, // PNT, -8:30
{ "pont" , 11, 0, 1 }, // PONT, +11:00
{ "pst" , -8, 0, 1 }, // PST, -8:00
{ "pt" , -8, 0, 1 }, // PT, -8:00
{ "pwt" , 9, 0, 1 }, // PWT, +9:00
{ "pyst" , -3, 0, 1 }, // PYST, -3:00
{ "pyt" , -4, 0, 1 }, // PYT, -4:00
{ "r1t" , 2, 0, 1 }, // R1T, +2:00
{ "r2t" , 3, 0, 1 }, // R2T, +3:00
{ "ret" , 4, 0, 1 }, // RET, +4:00
{ "rok" , 9, 0, 1 }, // ROK, +9:00
{ "sadt" , 10, 30, 1 }, // SADT, +10:30
{ "sast" , 2, 0, 1 }, // SAST, +2:00
{ "sbt" , 11, 0, 1 }, // SBT, +11:00
{ "sct" , 4, 0, 1 }, // SCT, +4:00
{ "set" , 1, 0, 1 }, // SET, +1:00
{ "sgt" , 8, 0, 1 }, // SGT, +8:00
{ "srt" , -3, 0, 1 }, // SRT, -3:00
{ "sst" , 2, 0, 1 }, // SST, +2:00
{ "swt" , 1, 0, 1 }, // SWT, +1:00
{ "tft" , 5, 0, 1 }, // TFT, +5:00
{ "tha" , 7, 0, 1 }, // THA, +7:00
{ "that" , -10, 0, 1 }, // THAT, -10:00
{ "tjt" , 5, 0, 1 }, // TJT, +5:00
{ "tkt" , -10, 0, 1 }, // TKT, -10:00
{ "tmt" , 5, 0, 1 }, // TMT, +5:00
{ "tot" , 13, 0, 1 }, // TOT, +13:00
{ "truk" , 10, 0, 1 }, // TRUK, +10:00
{ "tst" , 3, 0, 1 }, // TST, +3:00
{ "tuc" , 0, 0, 1 }, // TUC, 0:00
{ "tvt" , 12, 0, 1 }, // TVT, 12:00
{ "ulast" , 9, 0, 1 }, // ULAST, +9:00
{ "ulat" , 8, 0, 1 }, // ULAT, +8:00
{ "usz1" , 2, 0, 1 }, // USZ1, +2:00
{ "usz1s" , 3, 0, 1 }, // USZ1S, +3:00
{ "usz2" , 3, 0, 1 }, // USZ2, +3:00
{ "usz2s" , 4, 0, 1 }, // USZ2S, +4:00
{ "usz3" , 4, 0, 1 }, // USZ3, +4:00
{ "usz3s" , 5, 0, 1 }, // USZ3S, +5:00
{ "usz4" , 5, 0, 1 }, // USZ4, +5:00
{ "usz4s" , 6, 0, 1 }, // USZ4S, +6:00
{ "usz5" , 6, 0, 1 }, // USZ5, +6:00
{ "usz5s" , 7, 0, 1 }, // USZ5S, +7:00
{ "usz6" , 7, 0, 1 }, // USZ6, +7:00
{ "usz6s" , 8, 0, 1 }, // USZ6S, +8:00
{ "usz7" , 8, 0, 1 }, // USZ7, +8:00
{ "usz7s" , 9, 0, 1 }, // USZ7S, +9:00
{ "usz8" , 9, 0, 1 }, // USZ8, +9:00
{ "usz8s" , 10, 0, 1 }, // USZ8S, +10:00
{ "usz9" , 10, 0, 1 }, // USZ9, +10:00
{ "usz9s" , 11, 0, 1 }, // USZ9S, +11:00
{ "utc" , 0, 0, 2 }, // UTC, 0:00
{ "utz" , -3, 0, 1 }, // UTZ, -3:00
{ "uyt" , -3, 0, 1 }, // UYT, -3:00
{ "uz10" , 11, 0, 1 }, // UZ10, +11:00
{ "uz10s" , 12, 0, 1 }, // UZ10S, +12:00
{ "uz11" , 12, 0, 1 }, // UZ11, +12:00
{ "uz11s" , 13, 0, 1 }, // UZ11S, +13:00
{ "uz12" , 13, 0, 1 }, // UZ12, +13:00
{ "uz12s" , 14, 0, 1 }, // UZ12S, +14:00
{ "uzt" , 5, 0, 1 }, // UZT, +5:00
{ "vet" , -4, 0, 1 }, // VET, -4:00
{ "vlast" , 11, 0, 1 }, // VLAST, +11:00
{ "vlat" , 10, 0, 1 }, // VLAT, +10:00
{ "vtz" , -2, 0, 1 }, // VTZ, -2:00
{ "vut" , 11, 0, 1 }, // VUT, +11:00
{ "wakt" , 12, 0, 1 }, // WAKT, +12:00
{ "wast" , 2, 0, 1 }, // WAST, +2:00
{ "wat" , 1, 0, 1 }, // WAT, +1:00
{ "west" , 1, 0, 1 }, // WEST, +1:00
{ "wesz" , 1, 0, 1 }, // WESZ, +1:00
{ "wet" , 0, 0, 1 }, // WET, 0:00
{ "wez" , 0, 0, 1 }, // WEZ, 0:00
{ "wft" , 12, 0, 1 }, // WFT, +12:00
{ "wgst" , -2, 0, 1 }, // WGST, -2:00
{ "wgt" , -3, 0, 1 }, // WGT, -3:00
{ "wib" , 7, 0, 1 }, // WIB, +7:00
{ "wit" , 9, 0, 1 }, // WIT, +9:00
{ "wita" , 8, 0, 1 }, // WITA, +8:00
{ "wst" , 8, 0, 1 }, // WST, +8:00
{ "wtz" , -1, 0, 1 }, // WTZ, -1:00
{ "wut" , 1, 0, 1 }, // WUT, 1:00
{ "yakst" , 10, 0, 1 }, // YAKST, +10:00
{ "yakt" , 9, 0, 1 }, // YAKT, +9:00
{ "yapt" , 10, 0, 1 }, // YAPT, +10:00
{ "ydt" , -8, 0, 1 }, // YDT, -8:00
{ "yekst" , 6, 0, 1 }, // YEKST, +6:00
{ "yst" , -9, 0, 1 }, // YST, -9:00
{ "\0" , 0, 0, 0 } };
// hash table of timezone information
static HashTableX s_tzt;
static int64_t h_mountain;
static int64_t h_eastern;
static int64_t h_central;
static int64_t h_pacific;
static int64_t h_time2;
static int64_t h_mdt;
static int64_t h_at2;
bool initTimeZoneTable ( ) {
// if already initalized return true
if ( s_tzt.m_numSlotsUsed ) return true;
// init static wids
h_mountain = hash64n("mountain");
h_eastern = hash64n("eastern");
h_central = hash64n("central");
h_pacific = hash64n("pacific");
h_time2 = hash64n("time");
h_mdt = hash64n("mdt");
h_at2 = hash64n("at");
// set up the time zone hashtable
if ( ! s_tzt.set( 8,4, 300,NULL,0,false,0,"tzts"))
return false;
// load time zone names and their modifiers into hashtable
for ( int32_t i = 0 ; *tzs[i].m_name ; i++ ) {
char *t = tzs[i].m_name;
int32_t tlen = gbstrlen(t);
// hash like Words.cpp computeWordIds
uint64_t h = hash64Lower_utf8( t , tlen );
// use the ptr as the value
if ( ! s_tzt.addKey ( &h, &tzs[i] ) )
return false;
}
return true;
}
// return what we have to add to UTC to get time in locale specified by "s"
// where "s" is like "PDT" "MST" "EST" etc. if unknown return 999999
int32_t getTimeZone ( char *s ) {
if ( ! s ) return BADTIMEZONE;
char *send = s;
// point to end of the potential timezone
for ( ; *send && isalnum(*send) ; send++ );
// hash it
uint64_t h = hash64Lower_utf8( s , send -s );
// make sure table is ready
initTimeZoneTable();
// look it up
int32_t slot = s_tzt.getSlot( &h );
if ( slot < 0 ) return 999999;
// did we find it in the table?
TimeZone *tzptr = (TimeZone *)s_tzt.getValueFromSlot ( slot );
// no error, return true
int32_t secs = tzptr->m_hourMod * 3600;
secs += tzptr->m_minMod * 60;
return secs;
}
// . returns how many words starting at i are in the time zone
// . 0 means not a timezone
int32_t getTimeZoneWord ( int32_t i ,
int64_t *wids,
int32_t nw ,
TimeZone **tzptr ,
int32_t niceness ) {
// no ptr
*tzptr = NULL;
// only init table once
bool s_init16 = false;
// init the hash table of month names
if ( ! s_init16 ) {
// on error we return -1 from here
if ( ! initTimeZoneTable() ) return -1;
s_init16 = true;
}
// this is too common of a word!
if ( wids[i] == h_at2 ) return 0;
int32_t slot = s_tzt.getSlot( &wids[i] );
// return this, assume just one word
int32_t tznw = 1;
// . "mountain time"
// . this removes the event title "M-F 8:30 AM-5:30 PM Mountain Time"
// from the event (horus) on http://www.sfreporter.com/contact_us/
if ( slot<0 && i+2m_str,localBuf,localBufSize,0)){
char *xx=NULL;*xx=0;}
// get wids
int64_t *kwids = tmp.m_wordIds;
// first word hash is the key
int64_t h = kwids[0];
// must be valid
if ( ! h ) { char *xx=NULL;*xx=0; }
// reset wid count
dv->m_numWids = 0;
// loop over words
for ( int32_t k = 0 ; k < tmp.m_numWords ;k++ ) {
// skip if not word alnum
if ( ! kwids[k] )
continue;
// sanity
if ( dv->m_numWids >= MAX_WIDS ) {
char *xx=NULL;*xx=0; }
// set initial hash
dv->m_wids[(int32_t)dv->m_numWids] = kwids[k];
// inc it
dv->m_numWids++;
}
// get it
//DateVal *dv = &dvs[j];
//int32_t len = gbstrlen(dv->m_str);
//uint64_t h = hash64Lower_utf8(dv->m_str,len);
// sanity check
if ( dv->m_val == 0 ) { char *xx=NULL;*xx=0; }
if ( dv->m_val < -30 ) { char *xx=NULL;*xx=0; }
// add should always be success since we are pre-alloc
if ( ! s_dvt.addKey(&h,&dv) ){
char*xx=NULL;*xx=0;}
}
return true;
}
bool isMonth ( int64_t wid ) {
// sanity check
if ( ! s_init98 ) { char *xx=NULL;*xx=0; }
// get slot
int32_t slot = s_dvt.getSlot64 ( &wid );
// none? no date type then
if ( slot < 0 ) return false;
// see if it is a match
DateVal **dvp = (DateVal **)s_dvt.getValueFromSlot ( slot );
// get it
DateVal *dv = *dvp;
// is it a month?
return ( dv->m_type == DT_MONTH );
}
// used by Sections::addSentences()
bool isDateType ( int64_t *pwid ) {
// sanity check
if ( ! s_init98 ) { initDateTypes(); } // char *xx=NULL;*xx=0; }
// get slot
int32_t slot = s_dvt.getSlot64 ( pwid );
// none? no date type then
if ( slot < 0 ) return false;
// see if it is a match
DateVal **dvp = (DateVal **)s_dvt.getValueFromSlot ( slot );
// get it
DateVal *dv = *dvp;
// get it
static datetype_t dd =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS | // "holidays"
DT_MONTH |
DT_DAYNUM |
DT_DOW;
// is it a month?
return ( dv->m_type & dd);
}
// . get the DT_* type of date this is
// . SUPPORT: "13th day of","12th month",... "second week"(is a range)
datetype_t Dates::getDateType ( int32_t i , int32_t *val , int32_t *endWord ,
int64_t *wids , int32_t nw ,
// does the word "on" preceed word #i?
bool onPreceeds ) {
// only init the table once
//static bool s_init = false;
// set up the month name hashtable
if ( ! s_init98 ) initDateTypes();
// sum for compounds like twenty-first or nineteen hundred and nine
//int32_t sum = 0;
// breathe
QUICKPOLL ( m_niceness );
// get slot
int32_t slot = s_dvt.getSlot64 ( &wids[i] );
// none? no date type then
if ( slot < 0 ) return 0;
// assume ending word is i+1 (i.e. [i,i+1) )
if ( endWord ) *endWord = i+1;
// assume no "best match"
DateVal *best = NULL;
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
// loop over matches
for ( ; slot >= 0 ; slot = s_dvt.getNextSlot ( slot, &wids[i] ) ) {
// breathe
QUICKPOLL(m_niceness);
// see if it is a match
DateVal **dvp = (DateVal **)s_dvt.getValueFromSlot ( slot );
// get it
DateVal *dv = *dvp;
// return now if not holiday
if ( ! ( dv->m_type & specialTypes ) ) { //!= DT_HOLIDAY ) {
// save it
if ( val ) *val = dv->m_val;
// and return the type
return dv->m_type;
}
// ignore type holiday, DT_HOLIDAY, for now since it is
// so ambiguous. people say "Halloween party" or
// "x-mas party" but it is never on halloween or christmas.
// but this loses "closed thanksgivings and christmas"
/*
if ( dv->m_type == DT_HOLIDAY &&
// news years eve and day are exceptions to this
// since timing is so important for them
dv->m_val != HD_NEW_YEARS_EVE &&
dv->m_val != HD_NEW_YEARS_DAY &&
// if the word "on" is before the holiday then assume
// it is on that holiday
! onPreceeds )
// otherwise, do not consider the holiday name itself
// as to when the event is actually occuring
continue;
*/
// if already had a best we need to beat it in # words matched
if ( best && dv->m_numWids <= best->m_numWids ) continue;
// . TTh, MW and MWF should be capitalized!!
// . these are schedule abbreviations used in college
if ( dv->m_val == HD_TTH ||
dv->m_val == HD_MW ||
dv->m_val == HD_MWF ) {
if ( is_lower_a(m_wptrs[i][0]) )
return 0;
}
// if holdiday is one word, that is a match
if ( dv->m_numWids == 1 ) {
// save it as the best match, but could be overriden
// like how Christmas Eve would override Christmas
best = dv;
// try next matching holiday, if any
continue;
}
// limit scan below
int32_t max = i + 10;
// limit the limit
if ( max > nw ) max = nw;
// the next word
int32_t next = 1;
// start right after i
int32_t j = i + 1;
// . if holiday has multiple words, we gotta scan!
// . i.e. "New Year's Eve"
for ( ; j < max ; j++ ) {
// all done, all matched
if ( next >= dv->m_numWids ) break;
// skip if not word
if ( ! wids[j] ) continue;
// if no match, try next slot
if ( dv->m_wids[next] != wids[j] ) break;
// match next word now
next++;
}
// if did not match all wids in dv, try next slot
if ( next < dv->m_numWids ) continue;
// a new best match
best = dv;
// update it
if ( endWord ) *endWord = j;
}
// 0 if no match... maybe just matched first word of a multiword
// holiday like "New Year's Eve" we matched "New"
if ( ! best ) return 0;
// store it
if ( val ) *val = best->m_val;
return best->m_type;
}
// month names in various languages
struct Months {
char month[32];
char value;
};
Months months[] = {
// support numbers
{ "1" , 1 } ,
{ "2" , 2 } ,
{ "3" , 3 } ,
{ "4" , 4 } ,
{ "5" , 5 } ,
{ "6" , 6 } ,
{ "7" , 7 } ,
{ "8" , 8 } ,
{ "9" , 9 } ,
{ "10" ,10 } ,
{ "11" ,11 } ,
{ "12" ,12 } ,
{ "01" , 1 } ,
{ "02" , 2 } ,
{ "03" , 3 } ,
{ "04" , 4 } ,
{ "05" , 5 } ,
{ "06" , 6 } ,
{ "07" , 7 } ,
{ "08" , 8 } ,
{ "09" , 9 }
};
/*
// FIRST BY MONTH IN SUPPORTED LANGUAGES
// JANUARY
{ "jan" , 1 },
{ "janv" , 1 },
{ "janvier" , 1 },
{ "januari" , 1 },
{ "januar" , 1 },
{ "enero" , 1 },
// FEBRUARY
{ "feb" , 2 },
{ "febr" , 2 },
{ "februari" , 2 },
{ "februar" , 2 },
{ "febrero" , 2 },
{ "fevr" , 2 },
{ "fevrier" , 2 },
// MARCH
{ "mar" , 3 },
{ "mars" , 3 },
{ "marzo" , 3 },
{ "marec" , 3 },
{ "marz" , 3 },
{ "maart" , 3 },
{ "abr" , 3 },
{ "abril" , 3 },
// APRIL
{ "apr" , 4 },
{ "avril" , 4 },
// MAY
{ "may" , 5 },
{ "mayo" , 5 },
{ "mai" , 5 },
{ "mei" , 5 },
{ "maj" , 5 },
// JUNE
{ "jun" , 6 },
{ "june" , 6 },
{ "juni" , 6 },
{ "junio" , 6 },
{ "junij" , 6 },
{ "juin" , 6 },
// JULY
{ "juil" , 7 },
{ "juillet" , 7 },
{ "jul" , 7 },
{ "july" , 7 },
{ "juli" , 7 },
{ "julio" , 7 },
{ "julij" , 7 },
// AUGUST
{ "aug" , 8 },
{ "august" , 8 },
{ "augustus" , 8 },
{ "augusti" , 8 },
{ "aout" , 8 },
{ "agosto" , 8 },
{ "avg" , 8 },
{ "avgust" , 8 },
// SEPTEMBER
{ "sep" , 9 },
{ "sept" , 9 },
{ "september" , 9 },
{ "septembre" , 9 },
{ "septiembre" , 9 },
{ "set" , 9 },
// OCTOBER
{ "oct" , 10 },
{ "october" , 10 },
{ "octobre" , 10 },
{ "octubre" , 10 },
{ "okt" , 10 },
{ "oktober" , 10 },
// NOVEMBER
{ "nov" , 11 },
{ "november" , 11 },
{ "novembre" , 11 },
{ "noviembre" , 11 },
// DECEMBER
{ "dec" , 12 },
{ "december" , 12 },
{ "decembre" , 12 },
{ "dez" , 12 },
{ "dezember" , 12 },
{ "dic" , 12 },
{ "deciembre" , 12 },
{ "des" , 12 },
{ "desember" , 12 },
// THEN BY LANGUAGE (note: dups are ok)
// Abaza | абаза (abaza)
{ "гъынчыльа", 1 },
{ "январь" , 1 },
{ "мазахӀван", 2 },
{ "февраль" , 2 },
{ "гӀапынхъамыз", 3 },
{ "мартӀ" , 3 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "пхынхъа" , 6 },
{ "июнь" , 6 },
{ "пхынчыльа", 7 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
{ "ġənćəla" , 1 },
{ "janvar'" , 1 },
{ "mazaḥʷan" , 2 },
{ "fevral" , 2 },
{ "ʿapənqaməz" , 3 },
{ "marṭ" , 3 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "maj" , 5 },
{ "pĥənqa" , 6 },
{ "ijun'" , 6 },
{ "pĥənćəla" , 7 },
{ "ijul" , 7 },
{ "avgust" , 8 },
{ "sentjabr'" , 9 },
{ "oktjabr'" , 10 },
{ "nojabr'" , 11 },
{ "dekabr'" , 12 },
// Abkhaz 1 | аҧсшәа (aṗsšʷa)
{ "ианвар" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "маи" , 5 },
{ "ииун" , 6 },
{ "ииуль" , 7 },
{ "август" , 8 },
{ "сентиабр" , 9 },
{ "октиабр" , 10 },
{ "ноиабр" , 11 },
{ "декабр" , 12 },
{ "ianvar" , 1 },
{ "fevral'" , 2 },
{ "mart" , 3 },
{ "aprel'" , 4 },
{ "mai" , 5 },
{ "iiun" , 6 },
{ "iiul'" , 7 },
{ "avgust" , 8 },
{ "sentiabr" , 9 },
{ "oktiabr" , 10 },
{ "noiabr" , 11 },
{ "dekabr" , 12 },
// Adyghe 1 | адыгэбзэ (adəgăbză) see rus
{ "мэзае" , 1 },
{ "щӀышылэ" , 2 },
{ "гъатхэпэ" , 3 },
{ "мэлыжьыхь", 4 },
{ "нэкъыгъэ" , 5 },
{ "мэкъуауэгъуэ", 6 },
{ "бадзэуэгъуэ", 7 },
{ "шыщхьэӀу" , 8 },
{ "фокӀадэ" , 9 },
{ "жэпуэгъуэ", 10 },
{ "щэкӀуэгъуэ", 11 },
{ "дыгъэгъазэ", 12 },
{ "măzaje" , 1 },
{ "ṣ̌ʿəšəlă" , 2 },
{ "ġatĥălă" , 3 },
{ "măləẓ̌əḥ" , 4 },
{ "năqəġă" , 5 },
{ "măqʷauăġʷă" , 6 },
{ "badzăuăġʷă" , 7 },
{ "šəṣ̌ḥălu" , 8 },
{ "foč̤adă" , 9 },
{ "žăpuăġʷă" , 10 },
{ "ṣ̌ăḳʷăġʷă", 11 },
{ "dəġăġază" , 12 },
// Afrikaans
{ "januarie" , 1 },
{ "februarie" , 2 },
{ "maart" , 3 },
{ "april" , 4 },
{ "mei" , 5 },
{ "junie" , 6 },
{ "julie" , 7 },
{ "augustus" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "desember" , 12 },
// Alabama | Albaamo innaaɬiilka
{ "hasi cháffàaka" , 1 },
{ "hasi hachàalímmòona hasiholtina aɬɬámmòona", 1 },
{ "hasiholtina hachàalímmòona", 1 },
{ "febwiiri" , 2 },
{ "hasiholtina istatókla", 2 },
{ "màchka" , 3 },
{ "hasiholtina istatótchìina", 3 },
{ "eyprilka" , 4 },
{ "hasiholtina istonóostàaka", 4 },
{ "meyka" , 5 },
{ "hasiholtina istatáɬɬàapi", 5 },
{ "hasiholtina istahánnàali", 6 },
{ "hasiholtina istontóklo", 7 },
{ "awkoska" , 8 },
{ "hasiholtina istontótchìina", 8 },
{ "hasiholtina istachákkàali", 9 },
{ "hasiholtina ispókkòoli", 10 },
{ "hasiholtina istapókkòolawah cháffàaka", 11 },
{ "hasiholtina istanóoka", 12 },
{ "hasiholtina istapókkòolawah tóklo", 12 },
// Albanian | shqip
{ "janar" , 1 },
{ "shkurt" , 2 },
{ "mars" , 3 },
{ "prill" , 4 },
{ "maj" , 5 },
{ "qershor" , 6 },
{ "korrik" , 7 },
{ "gusht" , 8 },
{ "shtator" , 9 },
{ "tetor" , 10 },
{ "nëntor" , 11 },
{ "dhjetor" , 12 },
// Amharic | አማርኛ (ămarəña) & Tigrinya | ትግርኛ (təgrəña)
{ "ጃንዩወሪ" , 1 },
{ "ፌብሩወሪ" , 2 },
{ "ማርች" , 3 },
{ "ኤፕረል" , 4 },
{ "ሜይ" , 5 },
{ "ጁን" , 6 },
{ "ጁላይ" , 7 },
{ "ኦገስት" , 8 },
{ "ሴፕቴምበር", 9 },
{ "ኦክተውበር", 10 },
{ "ኖቬምበር" , 11 },
{ "ዲሴምበር" , 12 },
{ "jañuwări" , 1 },
{ "februwări" , 2 },
{ "marč" , 3 },
{ "epräl" , 4 },
{ "mey" , 5 },
{ "jun" , 6 },
{ "julay" , 7 },
{ "ogäst" , 8 },
{ "septembär" , 9 },
{ "oktäwbär" , 10 },
{ "novembär" , 11 },
{ "disembär" , 12 },
// Arabic 1 | العربية (al-ʿarabīyâ)
{ "يناير" , 1 },
{ "فبراير" , 2 },
{ "مارس" , 3 },
{ "أبريل" , 4 },
{ "مايو" , 5 },
{ "يونيو" , 6 },
{ "يوليو" , 7 },
{ "أغسطس" , 8 },
{ "سبتمبر" , 9 },
{ "أكتوبر" , 10 },
{ "نوفمبر" , 11 },
{ "ديسمبر" , 12 },
{ "yanāyir" , 1 },
{ "fibrāyir" , 2 },
{ "māris" , 3 },
{ "abrīl" , 4 },
{ "māyū" , 5 },
{ "yūniyū" , 6 },
{ "yūliyū" , 7 },
{ "aġusṭus" , 8 },
{ "sibtambar" , 9 },
{ "uktūbar" , 10 },
{ "nūfambar" , 11 },
{ "dīsambar" , 12 },
// Aragonese | aragonés
{ "chinero" , 1 },
{ "frebero" , 2 },
{ "marzo" , 3 },
{ "abril" , 4 },
{ "mayo" , 5 },
{ "chunio" , 6 },
{ "chulio" , 7 },
{ "agosto" , 8 },
{ "setiembre" , 9 },
{ "otubre" , 10 },
{ "nobiembre" , 11 },
{ "abiento" , 12 },
// Armenian | Հայերեն (hayeren)
{ "հունվար" , 1 },
{ "փետրվար" , 2 },
{ "մարտ" , 3 },
{ "ապրիլ" , 4 },
{ "մայիս" , 5 },
{ "հունիս" , 6 },
{ "հուլիս" , 7 },
{ "օգոստոս" , 8 },
{ "սեպտեմբեր", 9 },
{ "հոկտեմբեր", 10 },
{ "նոյեմբեր" , 11 },
{ "դեկտեմբեր", 12 },
{ "hounvar" , 1 },
{ "ṗetrvar" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "mayis" , 5 },
{ "hounis" , 6 },
{ "houlis" , 7 },
{ "ōgostos" , 8 },
{ "september" , 9 },
{ "hoktember" , 10 },
{ "noyember" , 11 },
{ "dektember" , 12 },
// Aromanian | armãneascã
{ "yinar" , 1 },
{ "shcurtu" , 2 },
{ "martsu" , 3 },
{ "apriir" , 4 },
{ "mailu" , 5 },
{ "cirisharlu" , 6 },
{ "alunarlu" , 7 },
{ "avgustu" , 8 },
{ "yizmaciunjle" , 9 },
{ "xumedru" , 10 },
{ "brumarlu" , 11 },
{ "andreulu" , 12 },
// Assamese | অসমীয়া (ôĥômīyā)
{ "জানুৱাৰী", 1 },
{ "ফেব্ৰুৱাৰী", 2 },
{ "মাৰ্চ" , 3 },
{ "এপ্ৰিল", 4 },
{ "মে" , 5 },
{ "জুন" , 6 },
{ "জুলাই" , 7 },
{ "আগচ্ট" , 8 },
{ "চেপ্টেম্বৰ", 9 },
{ "অক্টোবৰ", 10 },
{ "নৱেম্বৰ", 11 },
{ "ডিচেম্বৰ", 12 },
{ "jānuwārī" , 1 },
{ "pʰebruwārī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jun" , 6 },
{ "julāi" , 7 },
{ "āgôčṭ" , 8 },
{ "čepṭembôr" , 9 },
{ "ôkṭobôr" , 10 },
{ "nôwembôr" , 11 },
{ "ḍičembôr" , 12 },
// Asturian | asturianu
{ "xineru" , 1 },
{ "febreru" , 2 },
{ "marzu" , 3 },
{ "abril" , 4 },
{ "mayu" , 5 },
{ "xunu" , 6 },
{ "xunetu" , 7 },
{ "agostu" , 8 },
{ "setiembre" , 9 },
{ "ochobre" , 10 },
{ "payares" , 11 },
{ "avientu" , 12 },
// Aymara | aymar
{ "chichu" , 1 },
{ "anata" , 2 },
{ "chuqa" , 3 },
{ "llamayu" , 4 },
{ "qasïwi" , 5 },
{ "mara t’aqa" , 6 },
{ "huillka kuti" , 7 },
{ "llumpaqa" , 8 },
{ "sata" , 9 },
{ "chika sata" , 10 },
{ "lapaka" , 11 },
{ "jallu qallta" , 12 },
// Azerbaijani | azərbaycanca / азәрбајҹанҹа
{ "yanvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "may" , 5 },
{ "iyun" , 6 },
{ "iyul" , 7 },
{ "avqust" , 8 },
{ "sentyabr" , 9 },
{ "oktyabr" , 10 },
{ "noyabr" , 11 },
{ "dekabr" , 12 },
{ "јанвар" , 1 },
{ "феврал" , 2 },
{ "март" , 3 },
{ "апрел" , 4 },
{ "май" , 5 },
{ "ијун" , 6 },
{ "ијул" , 7 },
{ "август" , 8 },
{ "сентјабр" , 9 },
{ "октјабр" , 10 },
{ "нојабр" , 11 },
{ "декабр" , 12 },
// Bambara | Bamana
{ "zanwuye" , 1 },
{ "zanwiye" , 1 },
{ "feburuye" , 2 },
{ "marisi" , 3 },
{ "awirili" , 4 },
{ "mɛ" , 5 },
{ "zuɛn" , 6 },
{ "zuluye" , 7 },
{ "uti" , 8 },
{ "sɛtanburu" , 9 },
{ "ɔkutɔburu" , 10 },
{ "nowanburu" , 11 },
{ "desanburu" , 12 },
// Bashkir | башҡорт (bašqort)
{ "ғинуар" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
{ "ġinuar" , 1 },
{ "fevral'" , 2 },
{ "mart" , 3 },
{ "aprel'" , 4 },
{ "maj" , 5 },
{ "ijun'" , 6 },
{ "ijul'" , 7 },
{ "avgust" , 8 },
{ "sentjabr'" , 9 },
{ "oktjabr'" , 10 },
{ "nojabr'" , 11 },
{ "dekabr'" , 12 },
// Basque | euskara
{ "urtarrila" , 1 },
{ "otsaila" , 2 },
{ "martxoa" , 3 },
{ "apirila" , 4 },
{ "maiatza" , 5 },
{ "ekaina" , 6 },
{ "uztaila" , 7 },
{ "abuztua" , 8 },
{ "iraila" , 9 },
{ "urria" , 10 },
{ "azaroa" , 11 },
{ "abendua" , 12 },
// Belarusian | беларуская / biełaruskaja
{ "студзень" , 1 },
{ "люты" , 2 },
{ "сакавік" , 3 },
{ "красавік" , 4 },
{ "май" , 5 },
{ "чэрвень" , 6 },
{ "ліпень" , 7 },
{ "жнівень" , 8 },
{ "верасень" , 9 },
{ "кастрычнік", 10 },
{ "лістапад" , 11 },
{ "снежань" , 12 },
{ "studzień" , 1 },
{ "luty" , 2 },
{ "sakavik" , 3 },
{ "krasavik" , 4 },
{ "maj" , 5 },
{ "červień" , 6 },
{ "lipień" , 7 },
{ "žnivień" , 8 },
{ "vierasień" , 9 },
{ "kastryčnik" , 10 },
{ "listapad" , 11 },
{ "sniežań" , 12 },
// Bengali | বাংলা (bāṁlā)
{ "জানুয়ারী", 1 },
{ "ফেব্রুয়ারী", 2 },
{ "মার্চ" , 3 },
{ "এপ্রিল", 4 },
{ "মে" , 5 },
{ "জুন" , 6 },
{ "জুলাই" , 7 },
{ "আগস্ট" , 8 },
{ "সেপ্টেম্বর", 9 },
{ "অক্টোবর", 10 },
{ "নভেম্বর", 11 },
{ "ডিসেম্বর", 12 },
{ "jānuyārī" , 1 },
{ "pʰebruyārī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jun" , 6 },
{ "julāi" , 7 },
{ "āgôsṭ" , 8 },
{ "sepṭembôr" , 9 },
{ "ôkṭobôr" , 10 },
{ "nôbʰembôr" , 11 },
{ "ḍisembôr" , 12 },
// Bislama
{ "januware" , 1 },
{ "februari" , 2 },
{ "maj" , 3 },
{ "epril" , 4 },
{ "mei" , 5 },
{ "jun" , 6 },
{ "julae" , 7 },
{ "ogis" , 8 },
{ "septemba" , 9 },
{ "oktoba" , 10 },
{ "novemba" , 11 },
{ "desemba" , 12 },
// Blackfoot | siksiká
{ "áísstoyiimsstaa" , 1 },
{ "isspssáísskitsimao’p", 1 },
{ "ómahksíki’somm", 1 },
{ "píítaiáí" , 2 },
{ "saómmitsiki’somm", 2 },
{ "sa’aiki’somm" , 3 },
{ "matsiyíkkapisaii’somm", 4 },
{ "aapistsísskitsaato’s", 5 },
{ "ito’tsisamssootaa", 6 },
{ "niipiaato’s" , 6 },
{ "otsítsipottaatpi pi’kssiiksi", 6 },
{ "pi’kssííksi otsitaowayiihpiaawa", 6 },
{ "niipóómahkátoyiiksistsikaa to’s", 7 },
{ "iitáyiitsimaahkao’p", 8 },
{ "pákkii’pistsi otsíai’tssp", 8 },
{ "áwákaasiiki’somm", 9 },
{ "iitáípa’ksiksini’kayi pa’kki’pistsi", 9 },
{ "iitáómatapapittssko", 9 },
{ "mo’kááto’s" , 10 },
{ "sa’áiksi itáómatooyi", 10 },
{ "iitáóhkohtao’p", 11 },
{ "iitáó’tsstoyi" , 11 },
{ "iitáóhkanaikokotoyi niítahtaistsi", 11 },
{ "isstááato’s" , 12 },
{ "misámiko’komiaato’s", 12 },
{ "omahkátoyiiki’sommiatto’s", 12 },
{ "omahkátoyiiksistsiko", 12 },
// Bosnian | bosanski / босански
{ "januar" , 1 },
{ "februar" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "septembar" , 9 },
{ "oktobar" , 10 },
{ "novembar" , 11 },
{ "decembar" , 12 },
{ "јануар" , 1 },
{ "фебруар" , 2 },
{ "март" , 3 },
{ "април" , 4 },
{ "мај" , 5 },
{ "јуни" , 6 },
{ "јули" , 7 },
{ "аугуст" , 8 },
{ "септембар", 9 },
{ "октобар" , 10 },
{ "новембар" , 11 },
{ "децембар" , 12 },
// Breton | brezhoneg
{ "miz genver" , 1 },
{ "miz c’hwevrer" , 2 },
{ "miz meurzh" , 3 },
{ "miz ebrel" , 4 },
{ "miz mae" , 5 },
{ "miz mezheven" , 6 },
{ "miz gouere" , 7 },
{ "miz eost" , 8 },
{ "miz gwengolo" , 9 },
{ "miz here" , 10 },
{ "miz du" , 11 },
{ "miz kerzu" , 12 },
// Brithenig
{ "ianeir" , 1 },
{ "marth" , 3 },
{ "ebril" , 4 },
{ "mai" , 5 },
{ "methef" , 6 },
{ "ffinystiw" , 7 },
{ "awst" , 8 },
{ "ystreblanc" , 9 },
{ "sedref" , 10 },
{ "muisnir" , 11 },
{ "arbennir" , 12 },
// Bulgarian | български (bǎlgarski)
{ "януари" , 1 },
{ "февруари" , 2 },
{ "март" , 3 },
{ "април" , 4 },
{ "май" , 5 },
{ "юни" , 6 },
{ "юли" , 7 },
{ "август" , 8 },
{ "септември", 9 },
{ "октомври" , 10 },
{ "ноември" , 11 },
{ "декември" , 12 },
{ "januari" , 1 },
{ "fevruari" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "avgust" , 8 },
{ "septemvri" , 9 },
{ "oktomvri" , 10 },
{ "noemvri" , 11 },
{ "dekemvri" , 12 },
// Burmese | မ္ရန္မာစာ (mẏãmasa)
{ "ဇန္နဝာရီ", 1 },
{ "ဖေဖော္ဝာရီ", 2 },
{ "မတ္" , 3 },
{ "ဧပ္ရီ" , 4 },
{ "မေ" , 5 },
{ "ဇ္ဝန္" , 6 },
{ "ဇူလုိင္", 7 },
{ "ဩဂုတ္" , 8 },
{ "စက္တင္ဘာ", 9 },
{ "အောက္တုိဘာ", 10 },
{ "နုိဝင္ဘာ", 11 },
{ "ဒီဇင္ဘာ", 12 },
{ "zãnáwaẏi" , 1 },
{ "pʰepʰɔwaẏi" , 2 },
{ "maʿ" , 3 },
{ "epẏi" , 4 },
{ "me" , 5 },
{ "zũ" , 6 },
{ "zulaĩ" , 7 },
{ "ɔ̀gouʿ" , 8 },
{ "seʿtĩbʰa" , 9 },
{ "auʿtobʰa" , 10 },
{ "nowĩbʰa" , 11 },
{ "dizĩbʰa" , 12 },
// Catalan | català
{ "gener" , 1 },
{ "febrer" , 2 },
{ "març" , 3 },
{ "abril" , 4 },
{ "maig" , 5 },
{ "juny" , 6 },
{ "juliol" , 7 },
{ "agost" , 8 },
{ "setembre" , 9 },
{ "octubre" , 10 },
{ "novembre" , 11 },
{ "desembre" , 12 },
// Chamorro | Chamoru
{ "ineru" , 1 },
{ "fibreru" , 2 },
{ "måtso" , 3 },
{ "abrit" , 4 },
{ "måyu" , 5 },
{ "huño" , 6 },
{ "hulio" , 7 },
{ "agosto" , 8 },
{ "septembre" , 9 },
{ "oktubri" , 10 },
{ "nubembre" , 11 },
{ "disembre" , 12 },
// Cherokee | ᏣᎳᎩ / tsalagi
{ "ᏚᏃᎸᏔᏂ" , 1 },
{ "ᎧᎦᎵ" , 2 },
{ "ᎠᏄᏱ" , 3 },
{ "ᎧᏩᏂ" , 4 },
{ "ᎠᎾᎠᎬᏘ" , 5 },
{ "ᏕᎭᎷᏱ" , 6 },
{ "ᎫᏰᏉᏂ" , 7 },
{ "ᎦᎶᏂᎢ" , 8 },
{ "ᏚᎵᎢᏍᏗ" , 9 },
{ "ᏚᏂᏅᏗ" , 10 },
{ "ᏄᏓᏕᏆ" , 11 },
{ "ᎥᏍᎩᎦ" , 12 },
{ "dunolvtani" , 1 },
{ "kagali" , 2 },
{ "anuyi" , 3 },
{ "kawani" , 4 },
{ "anaagvti" , 5 },
{ "dehaluyi" , 6 },
{ "guyequoni" , 7 },
{ "galonii" , 8 },
{ "duliisdi" , 9 },
{ "duninvdi" , 10 },
{ "nudadequa" , 11 },
{ "vsgiga" , 12 },
// Cheyenne | Tsétsêhéstaestse
{ "hohtseeše’he" , 1 },
{ "ma’xêhohtseeše’he", 2 },
{ "ponoma’a’êhaseneeše’he", 3 },
{ "vehpotseeše’he" , 4 },
{ "matse’omeeše’he", 5 },
{ "enano’eeše’he", 6 },
{ "meaneeše’he" , 7 },
{ "oeneneeše’he" , 8 },
{ "tonoeveeše’he" , 9 },
{ "se’enehe" , 10 },
{ "he’koneneeše’he", 11 },
{ "ma’xêhe’koneneeše’he", 12 },
// Chinese | æ¼¢èª/æ±è¯ (hànyǔ), ᪘ئ (zhōngwén)
{ "ä¸æ" , 1 },
{ "äºæ" , 2 },
{ "ä¸æ" , 3 },
{ "åæ" , 4 },
{ "äºæ" , 5 },
{ "å
æ" , 6 },
{ "ä¸æ" , 7 },
{ "å
«æ" , 8 },
{ "ä¹æ" , 9 },
{ "åæ" , 10 },
{ "åä¸æ" , 11 },
{ "åäºæ" , 12 },
{ "yīyuè" , 1 },
{ "èryuè" , 2 },
{ "sānyuè" , 3 },
{ "sìyuè" , 4 },
{ "wǔyuè" , 5 },
{ "liùyuè" , 6 },
{ "qīyuè" , 7 },
{ "bāyuè" , 8 },
{ "jiǔyuè" , 9 },
{ "shíyuè" , 10 },
{ "shíyīyuè" , 11 },
{ "shí'èryuè" , 12 },
// Chuvash 1 | чӑваш (čăvaš)
{ "кӑрлач" , 1 },
{ "нарӑс" , 2 },
{ "кӗҫӗн кӑрлач", 2 },
{ "пуш" , 3 },
{ "ака" , 4 },
{ "ҫу" , 5 },
{ "ҫӗртме" , 6 },
{ "утӑ" , 7 },
{ "ҫурла" , 8 },
{ "авӑн" , 9 },
{ "юпа" , 10 },
{ "чӳк" , 11 },
{ "раштав" , 12 },
{ "kărlač" , 1 },
{ "narăs" , 2 },
{ "kĕśĕn kărlač" , 2 },
{ "puš" , 3 },
{ "aka" , 4 },
{ "śu" , 5 },
{ "śĕrtme" , 6 },
{ "ută" , 7 },
{ "śurla" , 8 },
{ "avăn" , 9 },
{ "jupa" , 10 },
{ "čük" , 11 },
{ "raštav" , 12 },
// Classical Mongolian | ᠮᠣᠩᠭᠣᠯ (moŋġol)
{ "ᠨᠢᠭᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 1 },
{ "ᠬᠣᠶᠠᠷᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 2 },
{ "ᠭᠤᠷᠪᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 3 },
{ "ᠳᠥᠷᠪᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 4 },
{ "ᠲᠠᠪᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 5 },
{ "ᠵᠢᠷᠭᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 6 },
{ "ᠳᠣᠯᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 7 },
{ "ᠨᠠᠢᠮᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ᠂ ᠨᠠᠶᠢᠮᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 8 },
{ "ᠶᠢᠰᠦᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 9 },
{ "ᠠᠷᠪᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 10 },
{ "ᠠᠷᠪᠠᠨ ᠨᠢᠭᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 11 },
{ "ᠠᠷᠪᠠᠨ ᠬᠣᠶᠠᠷᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 12 },
{ "nigedüɣer sar-a" , 1 },
{ "qoyarduɣar sar-a" , 2 },
{ "gurbaduɣar sar-a" , 3 },
{ "dörbedüɣer sar-a", 4 },
{ "tabuduɣar sar-a" , 5 },
{ "jirguduɣar sar-a" , 6 },
{ "doluduɣar sar-a" , 7 },
{ "naimaduɣar sar-a" , 8 },
{ "nayimaduɣar sar-a", 8 },
{ "yisüdüɣer sar-a", 9 },
{ "arbaduɣar sar-a" , 10 },
{ "arban nigedüɣer sar-a", 11 },
{ "arban qoyarduɣar sar-a", 12 },
// Coptic (Bohairic) | ⲙⲉⲧⲛ̄ⲣⲉⲙⲛ̄ⲭⲏⲙⲓ (metənremənkhīmi)
{ "Ⲓⲁⲛⲟⲩⲁ̄ⲣⲓⲟⲥ", 1 },
{ "Ⲫⲉⲃⲣⲟⲩⲁ̄ⲣⲓⲟⲥ", 2 },
{ "Ⲙⲁⲣⲧⲓⲟⲥ", 3 },
{ "Ⲁⲡⲣⲓⲗⲓⲟⲥ", 4 },
{ "Ⲙⲁⲓⲟⲥ" , 5 },
{ "Ⲓⲟⲩⲛⲓⲟⲥ", 6 },
{ "Ⲓⲟⲩⲗⲓⲟⲥ", 7 },
{ "Ⲁⲩⲅⲟⲩⲥⲧⲟⲥ", 8 },
{ "Ⲥⲉⲡⲧⲉⲙⲃⲣⲓⲟⲥ", 9 },
{ "Ⲟⲕⲧⲱⲃⲣⲓⲟⲥ", 10 },
{ "Ⲛⲟⲉ̄ⲙⲃⲣⲓⲟⲥ", 11 },
{ "Ⲇⲉⲕⲉⲙⲃⲣⲓⲟⲥ", 12 },
{ "ianouàrios" , 1 },
{ "Ḟevrouàrios" , 2 },
{ "martios" , 3 },
{ "aprilios" , 4 },
{ "maios" , 5 },
{ "iounios" , 6 },
{ "ioulios" , 7 },
{ "augoustos" , 8 },
{ "septemvrios" , 9 },
{ "oktōvrios" , 10 },
{ "noèmvrios" , 11 },
{ "dekemvrios" , 12 },
// Cornish | Kernewek
{ "mys genver" , 1 },
{ "mys whevrer" , 2 },
{ "mys merth" , 3 },
{ "mys ebrel" , 4 },
{ "mys me" , 5 },
{ "mys metheven" , 6 },
{ "mys gortheren" , 7 },
{ "mys est" , 8 },
{ "mys gwyngala" , 9 },
{ "mys hedra" , 10 },
{ "mys du" , 11 },
{ "mys kevardhu" , 12 },
// Corsican | corsu
{ "ghjennaghju" , 1 },
{ "ferraghju" , 2 },
{ "marzu" , 3 },
{ "aprile" , 4 },
{ "maghju" , 5 },
{ "ghjugnu" , 6 },
{ "lugliu" , 7 },
{ "aostu" , 8 },
{ "sittembre" , 9 },
{ "uttobre" , 10 },
{ "nuvembre" , 11 },
{ "dicembre" , 12 },
// Crimean Tatar | qırımtatar / къырымтатар
{ "ocaq" , 1 },
{ "şubat" , 2 },
{ "mart" , 3 },
{ "nisan" , 4 },
{ "mayıs" , 5 },
{ "haziran" , 6 },
{ "temmuz" , 7 },
{ "ağustos" , 8 },
{ "eylül" , 9 },
{ "ekim" , 10 },
{ "qasım" , 11 },
{ "aralıq" , 12 },
{ "оджакъ" , 1 },
{ "шубат" , 2 },
{ "март" , 3 },
{ "нисан" , 4 },
{ "майыс" , 5 },
{ "хазиран" , 6 },
{ "теммуз" , 7 },
{ "агъустос" , 8 },
{ "эйлул" , 9 },
{ "эким" , 10 },
{ "къасым" , 11 },
{ "аралыкъ" , 12 },
// Croatian | hrvatski
{ "siječanj" , 1 },
{ "veljača" , 2 },
{ "ožujak" , 3 },
{ "travanj" , 4 },
{ "svibanj" , 5 },
{ "lipanj" , 6 },
{ "srpanj" , 7 },
{ "kolovoz" , 8 },
{ "rujan" , 9 },
{ "listopad" , 10 },
{ "studeni" , 11 },
{ "prosinac" , 12 },
// Czech | čeština
{ "leden" , 1 },
{ "únor" , 2 },
{ "březen" , 3 },
{ "duben" , 4 },
{ "květen" , 5 },
{ "červen" , 6 },
{ "červenec" , 7 },
{ "srpen" , 8 },
{ "září" , 9 },
{ "říjen" , 10 },
{ "listopad" , 11 },
{ "prosinec" , 12 },
// Danish 1 | dansk
{ "januar" , 1 },
{ "februar" , 2 },
{ "marts" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Dari | دری (darī)
{ "جنوری" , 1 },
{ "فبرری" , 2 },
{ "مارچ" , 3 },
{ "اپریل" , 4 },
{ "مئ" , 5 },
{ "جون" , 6 },
{ "جولای" , 7 },
{ "اگست" , 8 },
{ "سپتمبر" , 9 },
{ "آکتوبر" , 10 },
{ "نومبر" , 11 },
{ "دسمبر" , 12 },
{ "janvarī" , 1 },
{ "febrarī" , 2 },
{ "mārč" , 3 },
{ "aprīl" , 4 },
{ "maʾi" , 5 },
{ "jūn" , 6 },
{ "jūlāy" , 7 },
{ "agast" , 8 },
{ "septambar" , 9 },
{ "āktōbar" , 10 },
{ "novambar" , 11 },
{ "disambar" , 12 },
// Delaware | Lënape
{ "enikwsi kishux" , 1 },
{ "chkwali kishux" , 2 },
{ "xamokhwite kishux" , 3 },
{ "kwetayoxe kishux" , 4 },
{ "tainipën" , 5 },
{ "kichinipën" , 6 },
{ "yakatamwe kishux" , 7 },
{ "lainipën" , 7 },
{ "sakayoxe kishux" , 8 },
{ "winaminge" , 8 },
{ "kichitahkok kishux", 9 },
{ "puksit kishux" , 10 },
{ "wini kishux" , 11 },
{ "xakhokwe kishux" , 12 },
{ "kichiluwàn" , 12 },
{ "muxkòtae kishux" , 12 },
// Divehi | ދިވެހިބަސް (divehibas)
{ "ޖެނުއަރީ" , 1 },
{ "ފެބުރުއަރީ", 2 },
{ "މާޗް" , 3 },
{ "އެޕްރިލް" , 4 },
{ "މޭ" , 5 },
{ "ޖޫން" , 6 },
{ "ޖުލައި" , 7 },
{ "އޯގަސްޓް" , 8 },
{ "ސެޕްޓެމްބަރު", 9 },
{ "އޮކްޓޯބަރު", 10 },
{ "ނޮވެމްބަރު", 11 },
{ "ޑިސެމްބަރު", 12 },
{ "jenu'arī" , 1 },
{ "feburu'arī" , 2 },
{ "māč" , 3 },
{ "epril" , 4 },
{ "mē" , 5 },
{ "jūn" , 6 },
{ "jula'i" , 7 },
{ "ōgasṫ" , 8 },
{ "sepṫembaru" , 9 },
{ "okṫōbaru" , 10 },
{ "novembaru" , 11 },
{ "ḋisembaru" , 12 },
// Dogri | डोगरी (ḍogrī)
{ "जनवरी" , 1 },
{ "फरवरी" , 2 },
{ "मार्च" , 3 },
{ "अप्रैल", 4 },
{ "मेई" , 5 },
{ "जून" , 6 },
{ "जुलाई" , 7 },
{ "अगस्त" , 8 },
{ "सतम्बर", 9 },
{ "अक्तूबर", 10 },
{ "नवम्बर", 11 },
{ "दसम्बर", 12 },
{ "janvarī" , 1 },
{ "pʰarvarī" , 2 },
{ "mārč" , 3 },
{ "apræl" , 4 },
{ "meī" , 5 },
{ "jūn" , 6 },
{ "julāī" , 7 },
{ "agast" , 8 },
{ "satambar" , 9 },
{ "aktūbar" , 10 },
{ "navambar" , 11 },
{ "dasambar" , 12 },
// Dutch 1 | Nederlands
{ "januari" , 1 },
{ "februari" , 2 },
{ "maart" , 3 },
{ "april" , 4 },
{ "mei" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "augustus" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Dzongkha | རྫོང་ཁ་ (rdzoṅ.kʰa.)
{ "སྤྱི་ཟླཝ་དང་པོ་", 1 },
{ "སྤྱི་ཟླཝ་གཉིས་པ་", 2 },
{ "སྤྱི་ཟླཝ་གསུམ་པ་", 3 },
{ "སྤྱི་ཟླཝ་བཞི་པ་", 4 },
{ "སྤྱི་ཟླཝ་ལྔ་པ་", 5 },
{ "སྤྱི་ཟླཝ་དྲུག་པ་", 6 },
{ "སྤྱི་ཟླཝ་བདུན་པ་", 7 },
{ "སྤྱི་ཟླཝ་བརྒྱད་པ་", 8 },
{ "སྤྱི་ཟླཝ་དགུ་པ་", 9 },
{ "སྤྱི་ཟླཝ་བཅུ་པ་", 10 },
{ "སྤྱི་ཟླཝ་བཅུ་གཅིག་པ་", 11 },
{ "སྤྱི་ཟླཝ་བཅུ་གཉིས་པ་", 12 },
{ "spyi.zlaw.daṅ.po.", 1 },
{ "spyi.zlaw.gñis.pa.", 2 },
{ "spyi.zlaw.gsum.pa.", 3 },
{ "spyi.zlaw.bži.pa.", 4 },
{ "spyi.zlaw.lṅa.pa.", 5 },
{ "spyi.zlaw.drug.pa.", 6 },
{ "spyi.zlaw.bdun.pa.", 7 },
{ "spyi.zlaw.brgyad.pa.", 8 },
{ "spyi.zlaw.dgu.pa." , 9 },
{ "spyi.zlaw.bču.pa.", 10 },
{ "spyi.zlaw.bču.gčig.pa.", 11 },
{ "spyi.zlaw.bču.gñis.pa.", 12 },
// English
{ "january" , 1 },
{ "february" , 2 },
{ "march" , 3 },
{ "april" , 4 },
{ "may" , 5 },
{ "june" , 6 },
{ "july" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "october" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Esperanto
{ "januaro" , 1 },
{ "februaro" , 2 },
{ "marto" , 3 },
{ "aprilo" , 4 },
{ "majo" , 5 },
{ "junio" , 6 },
{ "julio" , 7 },
{ "aŭgusto" , 8 },
{ "septembro" , 9 },
{ "oktobro" , 10 },
{ "novembro" , 11 },
{ "decembro" , 12 },
// Estonian 1 | eesti
{ "jaanuar" , 1 },
{ "veebruar" , 2 },
{ "märts" , 3 },
{ "aprill" , 4 },
{ "mai" , 5 },
{ "juuni" , 6 },
{ "juuli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktoober" , 10 },
{ "november" , 11 },
{ "detsember" , 12 },
// Even 1 | эвэды (ėvėdy) see rus
{ "тугэни хэе", 1 },
{ "эври мир" , 2 },
{ "эври ечэн" , 3 },
{ "эври билэн", 4 },
{ "эври унма" , 5 },
{ "эври чордакич", 6 },
{ "дюгани хэе", 7 },
{ "ойчири чордакич", 8 },
{ "ойчири унма", 9 },
{ "ойчири билэн", 10 },
{ "ойчири ечэн", 11 },
{ "ойчири мир", 12 },
{ "tugėni ĥėje" , 1 },
{ "ėvri mir" , 2 },
{ "ėvri ječėn" , 3 },
{ "ėvri bilėn" , 4 },
{ "ėvri unma" , 5 },
{ "ėvri čordakič" , 6 },
{ "djugani ĥėje" , 7 },
{ "ojčiri čordakič", 8 },
{ "ojčiri unma" , 9 },
{ "ojčiri bilėn" , 10 },
{ "ojčiri ječėn" , 11 },
{ "ojčiri mir" , 12 },
// Evenki | эвэды (ėvėdy)
{ "январь" , 1 },
{ "мирэ" , 1 },
{ "февраль" , 2 },
{ "мирэкэн" , 2 },
{ "гиравун" , 2 },
{ "мирэ" , 2 },
{ "март" , 3 },
{ "эктэӈкирэ", 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "мучун" , 6 },
{ "июль" , 7 },
{ "иркин" , 7 },
{ "август" , 8 },
{ "иркин" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "хугдарпи" , 11 },
{ "декабрь" , 12 },
{ "хэгдыг" , 12 },
{ "janvar'" , 1 },
{ "mirė" , 1 },
{ "fevral'" , 2 },
{ "mirėkėn" , 2 },
{ "giravun" , 2 },
{ "mirė" , 2 },
{ "mart" , 3 },
{ "ėktėṅkirė" , 3 },
{ "aprel'" , 4 },
{ "maj" , 5 },
{ "ijun'" , 6 },
{ "mučun" , 6 },
{ "ijul'" , 7 },
{ "irkin" , 7 },
{ "avgust" , 8 },
{ "irkin" , 8 },
{ "sentjabr'" , 9 },
{ "oktjabr'" , 10 },
{ "nojabr'" , 11 },
{ "ĥugdarpi" , 11 },
{ "dekabr'" , 12 },
{ "ĥėgdyg" , 12 },
// Ewe 1 | Ɛʋɛgbɛ
{ "dzove" , 1 },
{ "dzodze" , 2 },
{ "tedoxe" , 3 },
{ "afɔfiɛ" , 4 },
{ "damɛ" , 5 },
{ "masa" , 6 },
{ "siamlɔm" , 7 },
{ "dasiamime" , 8 },
{ "anyɔnyɔ" , 9 },
{ "kele" , 10 },
{ "adeɛmekpɔxe" , 11 },
{ "dzome" , 12 },
// Faroese | føroyskt
{ "januar" , 1 },
{ "februar" , 2 },
{ "mars" , 3 },
{ "apríl" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "desember" , 12 },
// Fijian | vosa Vakaviti
{ "janueri" , 1 },
{ "feperueri" , 2 },
{ "maji" , 3 },
{ "epereli" , 4 },
{ "me" , 5 },
{ "june" , 6 },
{ "julai" , 7 },
{ "okosita" , 8 },
{ "sepiteba" , 9 },
{ "okotova" , 10 },
{ "noveba" , 11 },
{ "tiseba" , 12 },
// Finnish | suomi
{ "tammikuu" , 1 },
{ "helmikuu" , 2 },
{ "maaliskuu" , 3 },
{ "huhtikuu" , 4 },
{ "toukokuu" , 5 },
{ "kesäkuu" , 6 },
{ "heinäkuu" , 7 },
{ "elokuu" , 8 },
{ "syyskuu" , 9 },
{ "lokakuu" , 10 },
{ "marraskuu" , 11 },
{ "joulukuu" , 12 },
// Francoprovençal | arpitan
{ "janviér" , 1 },
{ "fevriér" , 2 },
{ "mârs" , 3 },
{ "avril" , 4 },
{ "mê" , 5 },
{ "jouen" , 6 },
{ "julyèt" , 7 },
{ "oût" , 8 },
{ "septembro" , 9 },
{ "octobro" , 10 },
{ "novembro" , 11 },
{ "dècembro" , 12 },
// French | français
{ "janvier" , 1 },
{ "février" , 2 },
{ "mars" , 3 },
{ "avril" , 4 },
{ "mai" , 5 },
{ "juin" , 6 },
{ "juillet" , 7 },
{ "août" , 8 },
{ "septembre" , 9 },
{ "octobre" , 10 },
{ "novembre" , 11 },
{ "décembre" , 12 },
// Frisian 1 | Frysk
{ "jannewaris" , 1 },
{ "febrewaris" , 2 },
{ "maart" , 3 },
{ "april" , 4 },
{ "maaie" , 5 },
{ "juny" , 6 },
{ "july" , 7 },
{ "augustus" , 8 },
{ "septimber" , 9 },
{ "oktober" , 10 },
{ "novimber" , 11 },
{ "desimber" , 12 },
// Friulian | furlan
{ "genâr" , 1 },
{ "fevrâr" , 2 },
{ "març" , 3 },
{ "avrîl" , 4 },
{ "mai" , 5 },
{ "jugn" , 6 },
{ "lui" , 7 },
{ "avost" , 8 },
{ "setembar" , 9 },
{ "otubar" , 10 },
{ "novembar" , 11 },
{ "decembar" , 12 },
// Gagauz | gagauz / гагауз
{ "yanvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "may" , 5 },
{ "iyün" , 6 },
{ "iyül" , 7 },
{ "avgust" , 8 },
{ "sentäbri" , 9 },
{ "oktäbri" , 10 },
{ "noyabri" , 11 },
{ "dekabri" , 12 },
{ "январ" , 1 },
{ "феврал" , 2 },
{ "март" , 3 },
{ "април" , 4 },
{ "май" , 5 },
{ "ийӱн" , 6 },
{ "ийӱл" , 7 },
{ "август" , 8 },
{ "сентӓбри" , 9 },
{ "октӓбри" , 10 },
{ "ноябри" , 11 },
{ "декабри" , 12 },
// Gallegan | galego
{ "xaneiro" , 1 },
{ "febreiro" , 2 },
{ "marzo" , 3 },
{ "abril" , 4 },
{ "maio" , 5 },
{ "xuño" , 6 },
{ "xullo" , 7 },
{ "agosto" , 8 },
{ "setembro" , 9 },
{ "outubro" , 10 },
{ "novembro" , 11 },
{ "decembro" , 12 },
// Georgian | ქართული (ḳarṭuli)
{ "იანვარი", 1 },
{ "თებერვალი", 2 },
{ "მარტი" , 3 },
{ "აპრილი", 4 },
{ "მაისი" , 5 },
{ "ივნისი", 6 },
{ "ივლისი", 7 },
{ "აგვისტო", 8 },
{ "სექტემბერი", 9 },
{ "ოქტომბერი", 10 },
{ "ნოემბერი", 11 },
{ "დეკემბერი", 12 },
{ "ianvari" , 1 },
{ "ṭebervali" , 2 },
{ "marti" , 3 },
{ "aprili" , 4 },
{ "maisi" , 5 },
{ "ivnisi" , 6 },
{ "ivlisi" , 7 },
{ "agvisto" , 8 },
{ "seḳtemberi" , 9 },
{ "oḳtomberi" , 10 },
{ "noemberi" , 11 },
{ "dekemberi" , 12 },
// German 1 | Deutsch / Deutſch
{ "januar" , 1 },
{ "jänner" , 1 },
{ "februar" , 2 },
{ "feber" , 2 },
{ "märz" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
{ "januar" , 1 },
{ "jänner" , 1 },
{ "februar" , 2 },
{ "feber" , 2 },
{ "märz" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "auguſt" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
// Greek, Katharevousa | ελληνικά, καθαρεύουσα (ellīniká, kaṯareýoysa)
{ "Ἰανουάριος", 1 },
{ "Φεβρουάριος", 2 },
{ "Μάρτιος" , 3 },
{ "Ἀπρίλιος" , 4 },
{ "Μάϊος" , 5 },
{ "Ἰούνιος" , 6 },
{ "Ἰούλιος" , 7 },
{ "Αὔγουστος", 8 },
{ "Σεπτέμβριος", 9 },
{ "Ὀκτώβριος", 10 },
{ "Νοέμβριος", 11 },
{ "Δεκέμβριος", 12 },
{ "ianoyários" , 1 },
{ "fevroyários" , 2 },
{ "mártios" , 3 },
{ "aprílios" , 4 },
{ "máïos" , 5 },
{ "ioýnios" , 6 },
{ "ioýlios" , 7 },
{ "aúgoystos" , 8 },
{ "septémvrios" , 9 },
{ "oktṓvrios" , 10 },
{ "noémvrios" , 11 },
{ "dekémvrios" , 12 },
// Greenlandic | kalaallisut
{ "januari" , 1 },
{ "februari" , 2 },
{ "martsi" , 3 },
{ "aprili" , 4 },
{ "maji" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "augustusi" , 8 },
{ "septemberi" , 9 },
{ "oktoberi" , 10 },
{ "novemberi" , 11 },
{ "decemberi" , 12 },
// Gujarati | ગુજરાતી (gujrātī)
{ "જાન્યુઆરી", 1 },
{ "ફેબ્રુઆરી", 2 },
{ "માર્ચ" , 3 },
{ "એપ્રિલ", 4 },
{ "મે" , 5 },
{ "જૂન" , 6 },
{ "જુલાઈ" , 7 },
{ "ઑગસ્ટ" , 8 },
{ "સપ્ટેમ્બર", 9 },
{ "ઑક્ટ્બર", 10 },
{ "નવેમ્બર", 11 },
{ "ડિસેમ્બર", 12 },
{ "jānyuārī" , 1 },
{ "pʰebruārī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jūn" , 6 },
{ "julāī" , 7 },
{ "ogasṭ" , 8 },
{ "sapṭembar" , 9 },
{ "okṭobar" , 10 },
{ "navembar" , 11 },
{ "ḍisembar" , 12 },
// Haitian Creole | krèyol
{ "janvye" , 1 },
{ "fevriye" , 2 },
{ "mas" , 3 },
{ "avril" , 4 },
{ "me" , 5 },
{ "jen" , 6 },
{ "juyè" , 7 },
{ "out" , 8 },
{ "septanm" , 9 },
{ "oktòb" , 10 },
{ "novanm" , 11 },
{ "desanm" , 12 },
// Hausa | Hausa / حَوْسَ (ḥausa)
{ "janairu" , 1 },
{ "fabrairu" , 2 },
{ "fabara’ir" , 2 },
{ "maris" , 3 },
{ "afril" , 4 },
{ "afrilu" , 4 },
{ "mayu" , 5 },
{ "mayibi" , 5 },
{ "yuni" , 6 },
{ "yunihi" , 6 },
{ "jun" , 6 },
{ "yuli" , 7 },
{ "yulizi" , 7 },
{ "agusta" , 8 },
{ "angusta" , 8 },
{ "angushat" , 8 },
{ "satumba" , 9 },
{ "sitamba" , 9 },
{ "shatumbar" , 9 },
{ "oktoba" , 10 },
{ "akatubar" , 10 },
{ "nuwamba" , 11 },
{ "nuwambar" , 11 },
{ "dizamba" , 12 },
{ "disamba" , 12 },
{ "dijambar" , 12 },
{ "dujambar" , 12 },
{ "جَنَيْرُ" , 1 },
{ "ڢَبْرَيْرُ ؛ ڢَبَرَاءِر", 2 },
{ "مَارِسْ" , 3 },
{ "ٲڢْرِلْ ؛ ٲڢْرِيلُ", 4 },
{ "مَايُ ؛ مَايِبِ", 5 },
{ "يُونِ ؛ يُونِحِ ؛ جُنْ", 6 },
{ "يُولِ ؛ يُولِزِ", 7 },
{ "ٲغُسْتَ ؛ ٲنْغُسْتَ ؛ ٲنْغُشَتْ", 8 },
{ "سَتُمْبَ ؛ سِتَمْبَ ؛ شَاتُمْبَرْ", 9 },
{ "أُكْتوُبَ ؛ ٲكَتُوبَر", 10 },
{ "نُوَمْبَ ؛ نُوَمْبَرْ", 11 },
{ "دِيزَمْبَ ؛ دِيسَمْبَ ؛ دِجَمْبَرْ ؛ دُجَمْبَرْ", 12 },
// Hawaiian | ʻōlelo Hawaiʻi
{ "ianuali" , 1 },
{ "pepeluali" , 2 },
{ "malaki" , 3 },
{ "ʻapelila" , 4 },
{ "mei" , 5 },
{ "iune" , 6 },
{ "iulai" , 7 },
{ "ʻaukake" , 8 },
{ "kepakemapa" , 9 },
{ "ʻokakopa" , 10 },
{ "nowemapa" , 11 },
{ "kekemapa" , 12 },
// Hebrew | עברית (ʿiṿrît)
{ "ינואר" , 1 },
{ "יאנואר" , 1 },
{ "פברואר" , 2 },
{ "מרץ" , 3 },
{ "מרס" , 3 },
{ "מארס" , 3 },
{ "אפריל" , 4 },
{ "מי" , 5 },
{ "מאי" , 5 },
{ "יוני" , 6 },
{ "יולי" , 7 },
{ "אבגוסט" , 8 },
{ "אוגוסט" , 8 },
{ "ספטמבר" , 9 },
{ "אוקטובר" , 10 },
{ "נובמבר" , 11 },
{ "דצמבר" , 12 },
{ "yanûʾar" , 1 },
{ "yânûʾar" , 1 },
{ "febrûʾar" , 2 },
{ "merts" , 3 },
{ "mars" , 3 },
{ "mârs" , 3 },
{ "aprîl" , 4 },
{ "may" , 5 },
{ "mây" , 5 },
{ "yûnî" , 6 },
{ "yûlî" , 7 },
{ "aṿgûsṭ" , 8 },
{ "ôgûsṭ" , 8 },
{ "sepṭember" , 9 },
{ "ôqṭôber" , 10 },
{ "nôṿember" , 11 },
{ "detsember" , 12 },
// Hindi | हिंदी (hiṁdī)
{ "जनवरी" , 1 },
{ "फ़रवरी", 2 },
{ "मार्च" , 3 },
{ "अप्रैल", 4 },
{ "मे" , 5 },
{ "जून" , 6 },
{ "जुलाई" , 7 },
{ "अगस्त" , 8 },
{ "सितंबर", 9 },
{ "अकटूबर", 10 },
{ "नवंबर" , 11 },
{ "दिसंबर", 12 },
{ "janvarī" , 1 },
{ "farvarī" , 2 },
{ "mārč" , 3 },
{ "apræl" , 4 },
{ "me" , 5 },
{ "jūn" , 6 },
{ "julāī" , 7 },
{ "agast" , 8 },
{ "sitaṁbar" , 9 },
{ "aktūbar" , 10 },
{ "navaṁbar" , 11 },
{ "disaṁbar" , 12 },
// Hmong | / Hmoob
{ " / ", 1 },
{ " / ", 2 },
{ " / ", 3 },
{ " / ", 4 },
{ " / ", 5 },
{ " / ", 6 },
{ " / ", 7 },
{ " / ", 8 },
{ " / ", 9 },
{ " / ", 10 },
{ " / ", 11 },
{ " / ", 12 },
{ "ib hlis ntuj" , 1 },
{ "ob hlis ntuj" , 2 },
{ "peb hlis ntuj" , 3 },
{ "plaub hlis ntuj" , 4 },
{ "tsib hlis ntuj" , 5 },
{ "rau hlis ntuj" , 6 },
{ "xya hlis ntuj" , 7 },
{ "yim hlis ntuj" , 8 },
{ "cuaj hlis ntuj" , 9 },
{ "kaum hlis ntuj" , 10 },
{ "kaum ib hlis ntuj" , 11 },
{ "kaum ob hlis ntuj" , 12 },
// Hungarian 1 | magyar
{ "január" , 1 },
{ "február" , 2 },
{ "március" , 3 },
{ "április" , 4 },
{ "május" , 5 },
{ "június" , 6 },
{ "július" , 7 },
{ "augusztus" , 8 },
{ "szeptember" , 9 },
{ "október" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Icelandic | íslenska
{ "janúar" , 1 },
{ "febrúar" , 2 },
{ "mars" , 3 },
{ "apríl" , 4 },
{ "maí" , 5 },
{ "júní" , 6 },
{ "júlí" , 7 },
{ "ágúst" , 8 },
{ "september" , 9 },
{ "október" , 10 },
{ "nóvember" , 11 },
{ "desember" , 12 },
// Ido
{ "januaro" , 1 },
{ "februaro" , 2 },
{ "marto" , 3 },
{ "aprilo" , 4 },
{ "mayo" , 5 },
{ "junio" , 6 },
{ "julio" , 7 },
{ "agosto" , 8 },
{ "septembro" , 9 },
{ "oktobro" , 10 },
{ "novembro" , 11 },
{ "decembro" , 12 },
// Inari Sami | anarâškielâ
{ "uđđâivemáánu" , 1 },
{ "kuovâmáánu" , 2 },
{ "njuhčâmáánu" , 3 },
{ "cuáŋuimáánu" , 4 },
{ "vyesimáánu" , 5 },
{ "kesimáánu" , 6 },
{ "syeinimáánu" , 7 },
{ "porgemáánu" , 8 },
{ "čohčâmáánu" , 9 },
{ "roovvâdmáánu" , 10 },
{ "skammâmáánu" , 11 },
{ "juovlâmáánu" , 12 },
// Indonesian | bahasa Indonesia / بهاس ايندونيسيا
{ "januari" , 1 },
{ "februari" , 2 },
{ "maret" , 3 },
{ "april" , 4 },
{ "mei" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "agustus" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "desember" , 12 },
{ "جانواري" , 1 },
{ "فيبرواري" , 2 },
{ "ماريت" , 3 },
{ "اڨريل" , 4 },
{ "مي" , 5 },
{ "جوني" , 6 },
{ "جولي" , 7 },
{ "اڬوستوس" , 8 },
{ "سيڨتيمبر" , 9 },
{ "اوكتوبر" , 10 },
{ "نوۏيمبر" , 11 },
{ "ديسيمبر" , 12 },
// Interlingua
{ "januario" , 1 },
{ "februario" , 2 },
{ "martio" , 3 },
{ "april" , 4 },
{ "maio" , 5 },
{ "junio" , 6 },
{ "julio" , 7 },
{ "augusto" , 8 },
{ "septembre" , 9 },
{ "october" , 10 },
{ "novembre" , 11 },
{ "decembre" , 12 },
// Inuktitut | ᐃᓄᒃᑎᑐᑦ / inuktitut
{ "ᔭᓄᐊᕆ" , 1 },
{ "ᕕᐳᐊᕆ" , 2 },
{ "ᒫᕐᓯ" , 3 },
{ "ᐊᐃᐳᕆᓪ" , 4 },
{ "ᒪᐃ" , 5 },
{ "ᔪᓂ" , 6 },
{ "ᔪᓚᐃ" , 7 },
{ "ᐊᐅᒍᔅ" , 8 },
{ "ᓯᑎᒻᐳᕆ" , 9 },
{ "ᐅᒃᑐᐳᕆ" , 10 },
{ "ᓄᕕᒻᐳᕆ" , 11 },
{ "ᑎᓯᒻᐳᕆ" , 12 },
{ "januari" , 1 },
{ "vipuari" , 2 },
{ "maarsi" , 3 },
{ "aipuril" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "julai" , 7 },
{ "augus" , 8 },
{ "sitimpuri" , 9 },
{ "uktupuri" , 10 },
{ "nuvimpuri" , 11 },
{ "tisimpuri" , 12 },
// Irish Gaelic | Gaeilge / Gaeilge
{ "eanáir" , 1 },
{ "feabhra" , 2 },
{ "márta" , 3 },
{ "aibreán" , 4 },
{ "bealtaine" , 5 },
{ "meitheamh" , 6 },
{ "iúil" , 7 },
{ "lúnasa" , 8 },
{ "meán fómhair" , 9 },
{ "deireadh fómhair" , 10 },
{ "samhain" , 11 },
{ "nollaig" , 12 },
{ "eanáir" , 1 },
{ "feaḃra" , 2 },
{ "márta" , 3 },
{ "aibreán" , 4 },
{ "bealtaine" , 5 },
{ "meiṫeaṁ" , 6 },
{ "iúil" , 7 },
{ "lúnasa" , 8 },
{ "meán fóṁair" , 9 },
{ "deireaḋ fóṁair", 10 },
{ "saṁain" , 11 },
{ "nollaig" , 12 },
// Italian | italiano
{ "gennaio" , 1 },
{ "febbraio" , 2 },
{ "marzo" , 3 },
{ "aprile" , 4 },
{ "maggio" , 5 },
{ "giugno" , 6 },
{ "luglio" , 7 },
{ "agosto" , 8 },
{ "settembre" , 9 },
{ "ottobre" , 10 },
{ "novembre" , 11 },
{ "dicembre" , 12 },
// Japanese 1 | æ¥æ¬èª (nihongo)
{ "1月" , 1 },
{ "2月" , 2 },
{ "3月" , 3 },
{ "4月" , 4 },
{ "5月" , 5 },
{ "6月" , 6 },
{ "7月" , 7 },
{ "8月" , 8 },
{ "9月" , 9 },
{ "10月" , 10 },
{ "11月" , 11 },
{ "12月" , 12 },
{ "ä¸æ" , 1 },
{ "äºæ" , 2 },
{ "ä¸æ" , 3 },
{ "åæ" , 4 },
{ "äºæ" , 5 },
{ "å
æ" , 6 },
{ "ä¸æ" , 7 },
{ "å
«æ" , 8 },
{ "ä¹æ" , 9 },
{ "åæ" , 10 },
{ "åä¸æ" , 11 },
{ "åäºæ" , 12 },
{ "ichigatsu" , 1 },
{ "nigatsu" , 2 },
{ "sangatsu" , 3 },
{ "shigatsu" , 4 },
{ "gogatsu" , 5 },
{ "rokugatsu" , 6 },
{ "shichigatsu" , 7 },
{ "hachigatsu" , 8 },
{ "kugatsu" , 9 },
{ "jūgatsu" , 10 },
{ "jūichigatsu" , 11 },
{ "jūnigatsu" , 12 },
// Javanese | basa Jawa
{ "januari" , 1 },
{ "pébruari" , 2 },
{ "maret" , 3 },
{ "april" , 4 },
{ "mèi" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "agustus" , 8 },
{ "sèptèmber" , 9 },
{ "oktober" , 10 },
{ "nopèmber" , 11 },
{ "Ḍésèmber" , 12 },
// Kalmyk | хальмг (ĥal'mg)
{ "туула" , 1 },
{ "лу" , 2 },
{ "моһа" , 3 },
{ "мөрн" , 4 },
{ "хөн" , 5 },
{ "мөчн" , 6 },
{ "така" , 7 },
{ "ноха" , 8 },
{ "һаха" , 9 },
{ "хулһн" , 10 },
{ "үкр" , 11 },
{ "бар" , 12 },
{ "tuula" , 1 },
{ "lu" , 2 },
{ "moġa" , 3 },
{ "mörn" , 4 },
{ "ĥön" , 5 },
{ "möčn" , 6 },
{ "taka" , 7 },
{ "noĥa" , 8 },
{ "ġaĥa" , 9 },
{ "ĥulġn" , 10 },
{ "ükr" , 11 },
{ "bar" , 12 },
// Kalé Romani | Romanó Kaló
{ "enerín" , 1 },
{ "ibraín" , 2 },
{ "kirdaré" , 3 },
{ "alpandy" , 4 },
{ "kindalé" , 5 },
{ "ňutivé" , 6 },
{ "ňuntivé" , 7 },
{ "kerosto" , 8 },
{ "xetava" , 9 },
{ "oktorbar" , 10 },
{ "ňudikoy" , 11 },
{ "kendebré" , 12 },
// Kannada | ಕನ್ನಡ (kannaḍa)
{ "ಜನವರೀ" , 1 },
{ "ಫೆಬ್ರವರೀ", 2 },
{ "ಮಾರ್ಚ್", 3 },
{ "ಎಪ್ರಿಲ್", 4 },
{ "ಮೆ" , 5 },
{ "ಜೂನ್" , 6 },
{ "ಜುಲೈ" , 7 },
{ "ಆಗಸ್ಟ್", 8 },
{ "ಸಪ್ಟೆಂಬರ್", 9 },
{ "ಅಕ್ಟೋಬರ್", 10 },
{ "ನವೆಂಬರ್", 11 },
{ "ಡಿಸೆಂಬರ್", 12 },
{ "janvarī" , 1 },
{ "pʰebravarī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jūn" , 6 },
{ "julai" , 7 },
{ "āgasṭ" , 8 },
{ "sapṭeṁbar" , 9 },
{ "akṭōbar" , 10 },
{ "naveṁbar" , 11 },
{ "ḍiseṁbar" , 12 },
// Kapampangan
{ "eneru" , 1 },
{ "pebreru" , 2 },
{ "marsu" , 3 },
{ "abril" , 4 },
{ "mayu" , 5 },
{ "juniu" , 6 },
{ "juliu" , 7 },
{ "agostu" , 8 },
{ "septiembri" , 9 },
{ "octubri" , 10 },
{ "nobiembri" , 11 },
{ "disiembri" , 12 },
// Karaim | karaj
{ "artarych aj" , 1 },
{ "kural aj" , 2 },
{ "baškuschan aj" , 3 },
{ "jaz aj" , 4 },
{ "ulah aj" , 5 },
{ "čirik aj" , 6 },
{ "ajrychsy aj" , 7 },
{ "kiuź aj" , 8 },
{ "sohum aj" , 9 },
{ "kyš aj" , 10 },
{ "karakyš aj" , 11 },
{ "siuviuńč aj" , 12 },
// Karakalpak | qaraqalpaq / қарақалпақ
{ "yanvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "may" , 5 },
{ "iyun" , 6 },
{ "iyul" , 7 },
{ "avgust" , 8 },
{ "sentyabr" , 9 },
{ "oktyabr" , 10 },
{ "noyabr" , 11 },
{ "dekabr" , 12 },
{ "январь" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
// Kashmiri | کٲشُر (kạ̄šur) / कॉशुर (kọ̄šur)
{ "جَنْوَرى" , 1 },
{ "پھَرْوَرى", 2 },
{ "مارٕچ" , 3 },
{ "اَپْرِل ؛ اَپْريل", 4 },
{ "مے" , 5 },
{ "جٷن" , 6 },
{ "جُلَے" , 7 },
{ "اَگَسْت" , 8 },
{ "سِتَمْبَر", 9 },
{ "اۆکْتؤبَر", 10 },
{ "نَوَمْبَر", 11 },
{ "دِسَمْبَر", 12 },
{ "जन्वरी", 1 },
{ "फर्वरी", 2 },
{ "मारु'च" , 3 },
{ "अप्रिल", 4 },
{ "अप्रेल", 4 },
{ "मे" , 5 },
{ "जून" , 6 },
{ "जुलय" , 7 },
{ "अगस्त" , 8 },
{ "सितम्बर", 9 },
{ "ओ'क्तोबर", 10 },
{ "नवम्बर", 11 },
{ "दिसम्बर", 12 },
{ "janvarī" , 1 },
{ "pʰarvarī" , 2 },
{ "mārụč" , 3 },
{ "april" , 4 },
{ "aprēl" , 4 },
{ "mē" , 5 },
{ "jūn" , 6 },
{ "julay" , 7 },
{ "agast" , 8 },
{ "sitambar" , 9 },
{ "oktōbar" , 10 },
{ "navambar" , 11 },
{ "disambar" , 12 },
// Kashubian | kaszëbsczi
{ "stëcznik" , 1 },
{ "gromicznik" , 2 },
{ "strëmiannik" , 3 },
{ "łżëkwiat" , 4 },
{ "môj" , 5 },
{ "czerwińc" , 6 },
{ "lëpińc" , 7 },
{ "zélnik" , 8 },
{ "séwnik" , 9 },
{ "rujan" , 10 },
{ "lëstopadnik" , 11 },
{ "gòdnik" , 12 },
// Kazakh 1 | қазақ / qazaq / قازاق
{ "Қаңтар" , 1 },
{ "Ақпан" , 2 },
{ "Наурыз" , 3 },
{ "Сәуір" , 4 },
{ "Көкек" , 4 },
{ "Мамыр" , 5 },
{ "Маусым" , 6 },
{ "Шілде" , 7 },
{ "Тамыз" , 8 },
{ "Қыркүйек" , 9 },
{ "Қазан" , 10 },
{ "Қараша" , 11 },
{ "Желтоқсан", 12 },
{ "qañtar" , 1 },
{ "aqpan" , 2 },
{ "nawrız" , 3 },
{ "säwir" , 4 },
{ "kökek" , 4 },
{ "mamır" , 5 },
{ "mawsım" , 6 },
{ "Şilde" , 7 },
{ "tamız" , 8 },
{ "qırküyek" , 9 },
{ "qazan" , 10 },
{ "qaraşa" , 11 },
{ "jeltoqsan" , 12 },
{ "قاڭتار" , 1 },
{ "اقپان" , 2 },
{ "ناۋرىز" , 3 },
{ "ءساۋىر ؛ كوكەك", 4 },
{ "مامىر" , 5 },
{ "ماۋسىم" , 6 },
{ "شىلدە" , 7 },
{ "تامىز" , 8 },
{ "قىركۇيەك" , 9 },
{ "قازان" , 10 },
{ "قاراشا" , 11 },
{ "جەلتوقسان", 12 },
// Khalkha Mongolian | монгол (mongol) / ᠮᠣᠩᠭᠣᠯ (moṅgol)
{ "1 дүгээр сар", 1 },
{ "2 дугаар сар", 2 },
{ "3 дугаар сар", 3 },
{ "4 дүгээр сар", 4 },
{ "5 дугаар сар", 5 },
{ "6 дугаар сар", 6 },
{ "7 дугаар сар", 7 },
{ "8 дугаар сар", 8 },
{ "9 дүгээр сар", 9 },
{ "10 дугаар сар", 10 },
{ "11 дүгээр сар", 11 },
{ "12 дугаар сар", 12 },
{ "нэгдүгээр сар", 1 },
{ "хоёрдугаар сар", 2 },
{ "гуравдугаар сар", 3 },
{ "дөрөвдүгээр сар", 4 },
{ "тавдугаар сар", 5 },
{ "зургадугаар сар", 6 },
{ "долдугаар сар", 7 },
{ "наймдугаар сар", 8 },
{ "есдүгээр сар", 9 },
{ "аравдугаар сар", 10 },
{ "арван нэгдүгээр сар", 11 },
{ "арван хоёрдугаар сар", 12 },
{ "nägdügäär sar" , 1 },
{ "ĥoërdugaar sar" , 2 },
{ "guravdugaar sar" , 3 },
{ "dörövdügäär sar", 4 },
{ "tavdugaar sar" , 5 },
{ "dzurgadugaar sar" , 6 },
{ "doldugaar sar" , 7 },
{ "najmdugaar sar" , 8 },
{ "jesdügäär sar" , 9 },
{ "aravdugaar sar" , 10 },
{ "arvan nägdügäär sar", 11 },
{ "arvan ĥoërdugaar sar", 12 },
{ "ᠨᠢᠭᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 1 },
{ "ᠬᠣᠶᠠᠷᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 2 },
{ "ᠭᠤᠷᠪᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 3 },
{ "ᠳᠥᠷᠪᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 4 },
{ "ᠲᠠᠪᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 5 },
{ "ᠵᠢᠷᠭᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 6 },
{ "ᠳᠣᠯᠤᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 7 },
{ "ᠨᠠᠢᠮᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 8 },
{ "ᠶᠢᠰᠦᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 9 },
{ "ᠠᠷᠪᠠᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 10 },
{ "ᠠᠷᠪᠠᠨ ᠨᠢᠭᠡᠳᠦᠭᠡᠷ ᠰᠠᠷᠠ", 11 },
{ "ᠠᠷᠪᠠᠨ ᠬᠣᠶᠠᠷᠳᠤᠭᠠᠷ ᠰᠠᠷᠠ", 12 },
{ "nigedüger sar-a" , 1 },
{ "qoyardugar sar-a" , 2 },
{ "gurbadugar sar-a" , 3 },
{ "dörbedüger sar-a", 4 },
{ "tabudugar sar-a" , 5 },
{ "jirgudugar sar-a" , 6 },
{ "doludugar sar-a" , 7 },
{ "naimadugar sar-a" , 8 },
{ "yisüdüger sar-a" , 9 },
{ "arbadugar sar-a" , 10 },
{ "arban nigedüger sar-a", 11 },
{ "arban qoyardugar sar-a", 12 },
// Khmer | ភាសាខ្មែរ (pʰāsā kʰmær)
{ "យ៉ាំងវីយ៉េ", 1 },
{ "ហ្វេវ្រីយេ", 2 },
{ "ម៉ាស" , 3 },
{ "អាវ្រិល់", 4 },
{ "ម៉េ" , 5 },
{ "យូវ៉ាំង", 6 },
{ "ស៊ូយេត", 7 },
{ "អ៊ូត" , 8 },
{ "សេតមប្រ", 9 },
{ "អុកតូប្រ", 10 },
{ "ណូវ៉មប្រ", 11 },
{ "ដេសមប្រ", 12 },
{ "yāṁṅvīye" , 1 },
{ "hvevrīye" , 2 },
{ "mās" , 3 },
{ "āvrĭl" , 4 },
{ "me" , 5 },
{ "yūvāṁṅ" , 6 },
{ "sūyet" , 7 },
{ "ūt" , 8 },
{ "setamb[r]" , 9 },
{ "uktūb[r]" , 10 },
{ "ṇūvamb[r]" , 11 },
{ "desamb[r]" , 12 },
// Klallam | nəxʷsƛ̕áy̕əm̕
{ "x̣aʔwəsčiʔánəŋ", 1 },
{ "č̕aʔyéʔəɬ ɬqáy̕əč", 2 },
{ "x̣áƛ̕ ɬqáy̕əč", 3 },
{ "čən̕máʔəxʷ" , 4 },
{ "čən̕lílu" , 5 },
{ "čən̕kʷítšən", 6 },
{ "čən̕q̕ə́čqs", 7 },
{ "čən̕t̕áqaʔ" , 8 },
{ "čən̕hə́nən" , 9 },
{ "sx̣ʷúpč" , 10 },
{ "čən̕háʔnəŋ" , 11 },
{ "x̣əp̕sčiʔánəŋ", 12 },
// Komi 1 | коми (komi)
{ "январ" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябр" , 9 },
{ "октябр" , 10 },
{ "ноябр" , 11 },
{ "декабр" , 12 },
{ "janvar" , 1 },
{ "fevral'" , 2 },
{ "mart" , 3 },
{ "aprel'" , 4 },
{ "maj" , 5 },
{ "ijun'" , 6 },
{ "ijul'" , 7 },
{ "avgust" , 8 },
{ "sentjabr" , 9 },
{ "oktjabr" , 10 },
{ "nojabr" , 11 },
{ "dekabr" , 12 },
// Konkani | कोंकणी (koṁkaṇī)
{ "जानेवारी", 1 },
{ "फ़ेब्रुवारी", 2 },
{ "मार्च" , 3 },
{ "एप्रिल", 4 },
{ "मे" , 5 },
{ "जून" , 6 },
{ "जुलै" , 7 },
{ "ओगस्ट" , 8 },
{ "सेप्टेंबर", 9 },
{ "ओक्टोबर", 10 },
{ "नोव्हेंबर", 11 },
{ "डिसेंबर", 12 },
{ "jānevārī" , 1 },
{ "februvārī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jūn" , 6 },
{ "julæ" , 7 },
{ "ogasṭ" , 8 },
{ "sepṭeṁbar" , 9 },
{ "okṭobar" , 10 },
{ "novʰeṁbar" , 11 },
{ "ḍiseṁbar" , 12 },
// Korean | íêµì´ (hangukeo)
{ "1ì" , 1 },
{ "2ì" , 2 },
{ "3ì" , 3 },
{ "4ì" , 4 },
{ "5ì" , 5 },
{ "6ì" , 6 },
{ "7ì" , 7 },
{ "8ì" , 8 },
{ "9ì" , 9 },
{ "10ì" , 10 },
{ "11ì" , 11 },
{ "12ì" , 12 },
{ "ì¼ì" , 1 },
{ "ì´ì" , 2 },
{ "ì¼ì" , 3 },
{ "ì¬ì" , 4 },
{ "ì¤ì" , 5 },
{ "ì ì" , 6 },
{ "ì¹ ì" , 7 },
{ "íì" , 8 },
{ "구ì" , 9 },
{ "ìì" , 10 },
{ "ìì¼ì" , 11 },
{ "ìì´ì" , 12 },
{ "ilweol" , 1 },
{ "iweol" , 2 },
{ "samweol" , 3 },
{ "saweol" , 4 },
{ "oweol" , 5 },
{ "yuweol" , 6 },
{ "chilweol" , 7 },
{ "palweol" , 8 },
{ "guweol" , 9 },
{ "siweol" , 10 },
{ "sipilweol" , 11 },
{ "sipiweol" , 12 },
// Kurdish (Sorani) 1 | کوردی / kurdî, سۆرانی / soranî
{ "رێبهندان", 1 },
{ "رهشهمێ", 2 },
{ "نهورۆز" , 3 },
{ "گولان" , 4 },
{ "جۆزهردان", 5 },
{ "گهرماجمان ؛ پووشپهڕ", 6 },
{ "خهرمانان", 7 },
{ "گهلاوێژ" , 8 },
{ "ڕهزبهر", 9 },
{ "گهلاڕێزان", 10 },
{ "سهرماوهز", 11 },
{ "بهفرانبار", 12 },
{ "rêbendan" , 1 },
{ "reşemê" , 2 },
{ "newroz" , 3 },
{ "gulan" , 4 },
{ "cozerdan" , 5 },
{ "germaciman" , 6 },
{ "pûşpeṟ" , 6 },
{ "xermanan" , 7 },
{ "gelawêj" , 8 },
{ "ṟezber" , 9 },
{ "gelaṟêzan" , 10 },
{ "sermawez" , 11 },
{ "befranbar" , 12 },
// Kurdish 1 | kurdî / کوردی, kurmancî / کورمانجی
{ "çile" , 1 },
{ "sibat" , 2 },
{ "adar" , 3 },
{ "nîsan" , 4 },
{ "gulan" , 5 },
{ "hezîran" , 6 },
{ "tîrmeh" , 7 },
{ "tebax" , 8 },
{ "îlon" , 9 },
{ "cotmeh" , 10 },
{ "mijdar" , 11 },
{ "kanûn" , 12 },
{ "چله" , 1 },
{ "سبات" , 2 },
{ "ئادار" , 3 },
{ "نیسان" , 4 },
{ "گولان" , 5 },
{ "ههزیران" , 6 },
{ "تیرمهه" , 7 },
{ "تهباخ" , 8 },
{ "ئیلۆن" , 9 },
{ "جۆتمهه" , 10 },
{ "مژدار" , 11 },
{ "کانوون" , 12 },
// Ladin (Gardena) | ladin de Gherdëina
{ "jené" , 1 },
{ "fauré" , 2 },
{ "merz" , 3 },
{ "auril" , 4 },
{ "mei" , 5 },
{ "juni" , 6 },
{ "lugio" , 7 },
{ "agost" , 8 },
{ "setëmber" , 9 },
{ "utober" , 10 },
{ "nuvëmber" , 11 },
{ "dezëmber" , 12 },
// Ladino | ג'ודיאו-איספאנייול / djudeo-espanyol
{ "אינירו" , 1 },
{ "פ'יברירו" , 2 },
{ "מארסו" , 3 },
{ "אב'ריל" , 4 },
{ "מאייו" , 5 },
{ "ג'וניו" , 6 },
{ "ג'וליו" , 7 },
{ "אגוסטו" , 8 },
{ "סיפטימברי", 9 },
{ "אוקטוברי" , 10 },
{ "נוב'ימברי" , 11 },
{ "דיסיימברי", 12 },
{ "enero" , 1 },
{ "febrero" , 2 },
{ "marso" , 3 },
{ "avril" , 4 },
{ "mayo" , 5 },
{ "djunio" , 6 },
{ "djulio" , 7 },
{ "agosto" , 8 },
{ "septembre" , 9 },
{ "oktobre" , 10 },
{ "novembre" , 11 },
{ "desiembre" , 12 },
// Lao 1 | ພາສາລາວ (pʰāsā lāw)
{ "ມັງກອນ", 1 },
{ "ກຸມພາ" , 2 },
{ "ມີນາ" , 3 },
{ "ເມສາ" , 4 },
{ "ພຶດສະພາ", 5 },
{ "ມິຖຸນາ", 6 },
{ "ກໍລະກົດ", 7 },
{ "ສິງຫາ" , 8 },
{ "ກັນຍາ" , 9 },
{ "ຕຸລາ" , 10 },
{ "ພະຈິກ" , 11 },
{ "ທັນວາ" , 12 },
{ "mâṅkɔ̄n" , 1 },
{ "kumpʰā" , 2 },
{ "mīnā" , 3 },
{ "mēsā" , 4 },
{ "pʰʉtsapʰā" , 5 },
{ "mitʰunā" , 6 },
{ "kaṁlakôt" , 7 },
{ "siṅhā" , 8 },
{ "kânñā" , 9 },
{ "tulā" , 10 },
{ "pʰačik" , 11 },
{ "tʰânvā" , 12 },
// Latin | latine
{ "ianuarius" , 1 },
{ "februarius" , 2 },
{ "martius" , 3 },
{ "aprilis" , 4 },
{ "maius" , 5 },
{ "iunius" , 6 },
{ "iulius" , 7 },
{ "augustus" , 8 },
{ "september" , 9 },
{ "october" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Latvian | latviešu
{ "janvāris" , 1 },
{ "februāris" , 2 },
{ "marts" , 3 },
{ "aprīlis" , 4 },
{ "maijs" , 5 },
{ "jūnijs" , 6 },
{ "jūlijs" , 7 },
{ "augusts" , 8 },
{ "septembris" , 9 },
{ "oktobris" , 10 },
{ "novembris" , 11 },
{ "decembris" , 12 },
// Lezgi 1 | лезги (lezgi) see rus
{ "гьер" , 1 },
{ "эхем" , 2 },
{ "ибне" , 3 },
{ "нава" , 4 },
{ "тӀул" , 5 },
{ "къамуг" , 6 },
{ "чиле" , 7 },
{ "пахун" , 8 },
{ "мара" , 9 },
{ "баскӀум" , 10 },
{ "цӀехуьл" , 11 },
{ "фандукӀ" , 12 },
{ "her" , 1 },
{ "ėḫem" , 2 },
{ "ibne" , 3 },
{ "nava" , 4 },
{ "ṭul" , 5 },
{ "q̄amug" , 6 },
{ "čile" , 7 },
{ "paḫun" , 8 },
{ "mara" , 9 },
{ "basḳum" , 10 },
{ "c̣eḫül" , 11 },
{ "fanduḳ" , 12 },
// Ligurian | líguru
{ "zenná" , 1 },
{ "frevá" , 2 },
{ "marsu" , 3 },
{ "arví" , 4 },
{ "mazzu" , 5 },
{ "zûgnu" , 6 },
{ "lûggiu" , 7 },
{ "agustu" , 8 },
{ "settembre" , 9 },
{ "ottubre" , 10 },
{ "nuvembre" , 11 },
{ "dexembre" , 12 },
// Limburgish | Limburgs
{ "jannewarie" , 1 },
{ "fibberwarie" , 2 },
{ "miert" , 3 },
{ "eprèl" , 4 },
{ "meij" , 5 },
{ "junie" , 6 },
{ "julie" , 7 },
{ "augustus" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "desember" , 12 },
// Lingala | lingála
{ "sánzá ya libosó", 1 },
{ "sánzá ya míbalé", 2 },
{ "sánzá ya mísáto", 3 },
{ "sánzá ya mínéi", 4 },
{ "sánzá ya mítáno", 5 },
{ "sánzá ya motóbá", 6 },
{ "sánzá ya nsambo" , 7 },
{ "sánzá ya mwambe" , 8 },
{ "sánzá ya libwa" , 9 },
{ "sánzá ya zómí" , 10 },
{ "sánzá ya zómí na mɔ̌kɔ́", 11 },
{ "sánzá ya zómí na míbalé", 12 },
// Lithuanian | lietuvių
{ "sausis" , 1 },
{ "vasaris" , 2 },
{ "kovas" , 3 },
{ "balandis" , 4 },
{ "gegužė" , 5 },
{ "birželis" , 6 },
{ "liepa" , 7 },
{ "rugpjūtis" , 8 },
{ "rugsėjis" , 9 },
{ "spalis" , 10 },
{ "lapkritis" , 11 },
{ "gruodis" , 12 },
// Livonian | līvõ
{ "janvār" , 1 },
{ "februar" , 2 },
{ "märts" , 3 },
{ "april" , 4 },
{ "maij" , 5 },
{ "jūnij" , 6 },
{ "jūlij" , 7 },
{ "ougust" , 8 },
{ "septembõr" , 9 },
{ "oktōbõr" , 10 },
{ "novembõr" , 11 },
{ "detsembõr" , 12 },
// Low German 1 | Plattdüütsch / Plattdüütſch
{ "januoor" , 1 },
{ "januwoor" , 1 },
{ "februoor" , 2 },
{ "feberwoor" , 2 },
{ "märz" , 3 },
{ "märzmaand" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "maimaand" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "aust" , 8 },
{ "augst" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
{ "januoor" , 1 },
{ "januwoor" , 1 },
{ "februoor" , 2 },
{ "feberwoor" , 2 },
{ "märz" , 3 },
{ "märzmaand" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "maimaand" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "auſt" , 8 },
{ "augſt" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
// Lower Sorbian | dolnoserbšćina
{ "januar" , 1 },
{ "februar" , 2 },
{ "měrc" , 3 },
{ "pózymski" , 3 },
{ "apryl" , 4 },
{ "maj" , 5 },
{ "junij" , 6 },
{ "julij" , 7 },
{ "awgust" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "nowember" , 11 },
{ "december" , 12 },
// Lule Sami | julevsámegiella
{ "ådåjakmánno" , 1 },
{ "guovvamánno" , 2 },
{ "sjnjuktjamánno" , 3 },
{ "vuoratjismánno" , 4 },
{ "moarmesmánno" , 5 },
{ "biehtsemánno" , 6 },
{ "sjnjilltjamánno" , 7 },
{ "bårggemánno" , 8 },
{ "ragátmánno" , 9 },
{ "gålgådismánno" , 10 },
{ "basádismánno" , 11 },
{ "javllamánno" , 12 },
// Luxembourgish 1 | Lëtzebuergesch / Lëtzebuergeſch
{ "januar" , 1 },
{ "februar" , 2 },
{ "mäerz" , 3 },
{ "aprëll" , 4 },
{ "mee" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
{ "januar" , 1 },
{ "februar" , 2 },
{ "mäerz" , 3 },
{ "aprëll" , 4 },
{ "mee" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "auguſt" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "dezember" , 12 },
// Macedonian | македонски (makedonski)
{ "јануари" , 1 },
{ "февруари" , 2 },
{ "март" , 3 },
{ "април" , 4 },
{ "мај" , 5 },
{ "јуни" , 6 },
{ "јули" , 7 },
{ "август" , 8 },
{ "септември", 9 },
{ "октомври" , 10 },
{ "ноември" , 11 },
{ "декември" , 12 },
{ "januari" , 1 },
{ "fevruari" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "avgust" , 8 },
{ "septemvri" , 9 },
{ "oktomvri" , 10 },
{ "noemvri" , 11 },
{ "dekemvri" , 12 },
// Malagasy
{ "janoary" , 1 },
{ "febroary" , 2 },
{ "marsa" , 3 },
{ "martsa" , 3 },
{ "avrily" , 4 },
{ "mey" , 5 },
{ "may" , 5 },
{ "jiona" , 6 },
{ "jona" , 6 },
{ "jolay" , 7 },
{ "aogositra" , 8 },
{ "septambra" , 9 },
{ "ôktôbra" , 10 },
{ "nôvambra" , 11 },
{ "desambra" , 12 },
// Malay | bahasa Melayu / بهاس ملايو
{ "januari" , 1 },
{ "februari" , 2 },
{ "mac" , 3 },
{ "april" , 4 },
{ "mei" , 5 },
{ "jun" , 6 },
{ "julai" , 7 },
{ "ogos" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "disember" , 12 },
{ "جانواري" , 1 },
{ "فيبرواري" , 2 },
{ "مچ" , 3 },
{ "اڨريل" , 4 },
{ "مي" , 5 },
{ "جون" , 6 },
{ "جولاي" , 7 },
{ "اوڬوس" , 8 },
{ "سيڨتيمبر" , 9 },
{ "اوكتوبر" , 10 },
{ "نوۏيمبر" , 11 },
{ "ديسيمبر" , 12 },
// Malayalam | മലയാളം (malayāḷaṁ)
{ "ജനുവരി", 1 },
{ "ഫെബ്രുവരി", 2 },
{ "മാര്ച്ച്", 3 },
{ "ഏപ്രില്", 4 },
{ "മേയ്" , 5 },
{ "ജൂണ്" , 6 },
{ "ജൂലൈ" , 7 },
{ "ആഗസ്റ്റ്", 8 },
{ "സെപ്റ്റംബര്", 9 },
{ "ഒക്ടോബര്", 10 },
{ "നവംബര്", 11 },
{ "ഡിസംബര്", 12 },
{ "januvari" , 1 },
{ "pʰebruvari" , 2 },
{ "mārčč" , 3 },
{ "ēpril" , 4 },
{ "mēy" , 5 },
{ "jūṇ" , 6 },
{ "jūlai" , 7 },
{ "āgasṟṟ" , 8 },
{ "sepṟṟaṁbar" , 9 },
{ "okṭōbar" , 10 },
{ "navaṁbar" , 11 },
{ "ḍisaṁbar" , 12 },
// Maltese | Malti
{ "jannar" , 1 },
{ "frar" , 2 },
{ "marzu" , 3 },
{ "april" , 4 },
{ "mejju" , 5 },
{ "Ġunju" , 6 },
{ "lulju" , 7 },
{ "awissu" , 8 },
{ "settembru" , 9 },
{ "ottubru" , 10 },
{ "novembru" , 11 },
{ "diċembru" , 12 },
// Manipuri / Meitei | mYtYloN / মৈইতৈইলোন (maitailon)
{ "wacciq" , 1 },
{ "pairel" , 2 },
{ "lmta" , 3 },
{ "sjibu" , 4 },
{ "kalen" , 5 },
{ "iq" , 6 },
{ "iqel" , 7 },
{ "twan" , 8 },
{ "laqbn" , 9 },
{ "mera" , 10 },
{ "hiyaqgy" , 11 },
{ "poinu" , 12 },
{ "wākčiṅ" , 1 },
{ "pʰāírel" , 2 },
{ "lamtā" , 3 },
{ "sajibu" , 4 },
{ "kālen" , 5 },
{ "íṅa" , 6 },
{ "íṅel" , 7 },
{ "tʰawān" , 8 },
{ "lāṅban" , 9 },
{ "merā" , 10 },
{ "hiyāṅgai" , 11 },
{ "poínu" , 12 },
{ "যাকচিঙ", 1 },
{ "ফাইরেল", 2 },
{ "লমতা" , 3 },
{ "সজিবু" , 4 },
{ "কালেন" , 5 },
{ "ইঙা" , 6 },
{ "ইঙেল" , 7 },
{ "থযান" , 8 },
{ "লাঙবন" , 9 },
{ "মেরা" , 10 },
{ "হিয়ঙগৈই", 11 },
{ "পোইনু" , 12 },
{ "wākčiṅ" , 1 },
{ "pʰāirel" , 2 },
{ "lamtā" , 3 },
{ "sajibu" , 4 },
{ "kālen" , 5 },
{ "iṅā" , 6 },
{ "iṅel" , 7 },
{ "tʰawān" , 8 },
{ "lāṅban" , 9 },
{ "merā" , 10 },
{ "hiyāṅgai" , 11 },
{ "poinu" , 12 },
// Manx | Gaelg
{ "jerrey-geuree" , 1 },
{ "toshiaght-arree" , 2 },
{ "mayrnt" , 3 },
{ "averil" , 4 },
{ "boaldyn" , 5 },
{ "mean-souree" , 6 },
{ "jerrey-souree" , 7 },
{ "luanistyn" , 8 },
{ "mean-fouyir" , 9 },
{ "jerrey-fouyir" , 10 },
{ "mee houney" , 11 },
{ "mee ny nollick" , 12 },
// Maori 1 | reo Māori
{ "hānuere" , 1 },
{ "pepuere" , 2 },
{ "maehe" , 3 },
{ "Āperira" , 4 },
{ "mei" , 5 },
{ "hune" , 6 },
{ "hūrae" , 7 },
{ "Ākuheta" , 8 },
{ "hepetema" , 9 },
{ "oketopa" , 10 },
{ "noema" , 11 },
{ "tīhema" , 12 },
// Marathi | मराठी (marāṭʰī)
{ "जानेवारी", 1 },
{ "फ़ेबृवारी", 2 },
{ "मार्च" , 3 },
{ "एप्रिल", 4 },
{ "मे" , 5 },
{ "जून" , 6 },
{ "जुलै" , 7 },
{ "ओगस्ट" , 8 },
{ "सेप्टेंबर", 9 },
{ "ओक्टोबर", 10 },
{ "नोव्हेंबर", 11 },
{ "डिसेंबर", 12 },
{ "jānevārī" , 1 },
{ "febravārī" , 2 },
{ "mārč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jūn" , 6 },
{ "julæ" , 7 },
{ "ogasṭ" , 8 },
{ "sepṭeṁbar" , 9 },
{ "okṭobar" , 10 },
{ "novʰeṁbar" , 11 },
{ "ḍiseṁbar" , 12 },
// Mauritius Creole | morisyin
{ "zanvye" , 1 },
{ "fevriye" , 2 },
{ "mars" , 3 },
{ "avril" , 4 },
{ "me" , 5 },
{ "zin" , 6 },
{ "ziyet" , 7 },
{ "ut" , 8 },
{ "septam" , 9 },
{ "oktob" , 10 },
{ "novam" , 11 },
{ "desam" , 12 },
// Meadow Mari 1 | олык марий (olyk marij) see rus
{ "шорыкйолтылзе", 1 },
{ "пугыжтылзе", 2 },
{ "ӱйарнятылзе", 3 },
{ "вӱдшортылзе", 4 },
{ "агатылзе" , 5 },
{ "пеледыштылзе", 6 },
{ "сӱремтылзе", 7 },
{ "сорлатылзе", 8 },
{ "идымтылзе", 9 },
{ "шыжатылзе", 10 },
{ "кылметылзе", 11 },
{ "телетылзе", 12 },
{ "šorykjoltylze" , 1 },
{ "pugyžtylze" , 2 },
{ "üjarnjatylze" , 3 },
{ "vüdšortylze" , 4 },
{ "agatylze" , 5 },
{ "peledyštylze" , 6 },
{ "süremtylze" , 7 },
{ "sorlatylze" , 8 },
{ "idymtylze" , 9 },
{ "šyžatylze" , 10 },
{ "kylmetylze" , 11 },
{ "teletylze" , 12 },
// Miami | myaamia
{ "ayaapia kiilhswa" , 1 },
{ "mahkwa kiilhswa" , 2 },
{ "mahkoonsa kiilhswa", 3 },
{ "aanteekwa kiilhswa", 4 },
{ "cecaahkwa kiilhswa", 5 },
{ "wiihkoowia kiilhswa", 6 },
{ "paaphsaahka niipinwiki", 7 },
{ "kišiinkwia kiilhswa", 8 },
{ "mihšiiwia kiilhswa", 9 },
{ "šaašakaayolia kiilhswa", 10 },
{ "kiiyolia kiilhswa" , 11 },
{ "ayaapeensa kiilhswa", 12 },
// Micmac | Mi’gmaq
{ "pnamujuigu’s" , 1 },
{ "apignajit" , 2 },
{ "si’gowigu’s" , 3 },
{ "penatmuigu’s" , 4 },
{ "sqoljuigu’s" , 5 },
{ "nipnigu’s" , 6 },
{ "ps’guigu’s" , 7 },
{ "gisigwegewigu’s" , 8 },
{ "wigumgewigu’s" , 9 },
{ "wigewigu’s" , 10 },
{ "gept’gewigu’s" , 11 },
{ "gesigewigu’s" , 12 },
// Moldavian | moldovenească / молдовеняскэ
{ "ianuarie" , 1 },
{ "februarie" , 2 },
{ "faur" , 2 },
{ "martie" , 3 },
{ "aprilie" , 4 },
{ "mai" , 5 },
{ "iunie" , 6 },
{ "iulie" , 7 },
{ "august" , 8 },
{ "septembrie" , 9 },
{ "octombrie" , 10 },
{ "noiembrie" , 11 },
{ "decembrie" , 12 },
{ "януарие" , 1 },
{ "фебруарие", 2 },
{ "фаур" , 2 },
{ "мартие" , 3 },
{ "априлие" , 4 },
{ "май" , 5 },
{ "юние" , 6 },
{ "юлие" , 7 },
{ "аугуст" , 8 },
{ "септембрие", 9 },
{ "октомбрие", 10 },
{ "ноембрие" , 11 },
{ "дечембрие", 12 },
// Nahuatl | nahuatlahtolli
{ "tlacenti" , 1 },
{ "tlaonti" , 2 },
{ "tlayeti" , 3 },
{ "tlanauhti" , 4 },
{ "tlamacuilti" , 5 },
{ "tlachicuazti" , 6 },
{ "tlachiconti" , 7 },
{ "tlachicueiti" , 8 },
{ "tlachicnauhti" , 9 },
{ "tlamatlacti" , 10 },
{ "tlamactlihuanceti" , 11 },
{ "tlamactlihuanonti" , 12 },
// Neapolitan | nnapulitano
{ "jennaro" , 1 },
{ "frevaro" , 2 },
{ "màrzo" , 3 },
{ "abbrile" , 4 },
{ "maggio" , 5 },
{ "giùgno" , 6 },
{ "luglio" , 7 },
{ "aùsto" , 8 },
{ "settembre" , 9 },
{ "ottovre" , 10 },
{ "nuvembre" , 11 },
{ "dicembre" , 12 },
// Nenets 1 | ненэця” (nenėcjaʿ) see rus
{ "лимбя ирий", 1 },
{ "яре ирий" , 2 },
{ "сие ниць ирий", 3 },
{ "ненэй ниць ирий", 4 },
{ "ты’ саполана ирий", 4 },
{ "нёвды ирий", 5 },
{ "неняӈг’ ирий", 6 },
{ "нявды ирий", 6 },
{ "пилё ирий" , 7 },
{ "таӈы ирий" , 7 },
{ "яв’халы’ ирий", 8 },
{ "пилю’ ирий", 8 },
{ "сельбе ирий", 9 },
{ "вэба ирий" , 9 },
{ "ӈэрёй ирий", 10 },
{ "хор’ ирий", 10 },
{ "носиндалава ирий", 10 },
{ "нюдя пэвдей", 11 },
{ "ӈарка пэвдей", 12 },
{ "limbja irij" , 1 },
{ "jare irij" , 2 },
{ "sije nic' irij" , 3 },
{ "nenėj nic' irij" , 4 },
{ "ty’ sapolana irij", 4 },
{ "nëvdy irij" , 5 },
{ "nenjaṅg’ irij" , 6 },
{ "njavdy irij" , 6 },
{ "pilë irij" , 7 },
{ "taṅy irij" , 7 },
{ "jav’ĥaly’ irij", 8 },
{ "pilju’ irij" , 8 },
{ "sel'be irij" , 9 },
{ "vėba irij" , 9 },
{ "ṅėrëj irij" , 10 },
{ "ĥor’ irij" , 10 },
{ "nosindalava irij" , 10 },
{ "njudja pėvdej" , 11 },
{ "ṅarka pėvdej" , 12 },
// Neo
{ "janar" , 1 },
{ "febrar" , 2 },
{ "mars" , 3 },
{ "april" , 4 },
{ "mey" , 5 },
{ "yunyo" , 6 },
{ "yul" , 7 },
{ "agost" , 8 },
{ "septem(bro)" , 9 },
{ "oktob(bro)" , 10 },
{ "novem(bro)" , 11 },
{ "decem(bro)" , 12 },
// Nepali | नेपाली (nepālī)
{ "जन्वरी", 1 },
{ "फेब्रुअरी", 2 },
{ "मार्च" , 3 },
{ "अप्रिल", 4 },
{ "मई" , 5 },
{ "जून" , 6 },
{ "जूलाई" , 7 },
{ "अगस्त" , 8 },
{ "सितेम्बर", 9 },
{ "अक्टोबर", 10 },
{ "नोभेम्बर", 11 },
{ "डिसेम्बर", 12 },
{ "dzanvarī" , 1 },
{ "pʰebruarī" , 2 },
{ "mārc" , 3 },
{ "april" , 4 },
{ "maī" , 5 },
{ "dzūn" , 6 },
{ "dzūlāī" , 7 },
{ "agast" , 8 },
{ "sitembar" , 9 },
{ "akṭobar" , 10 },
{ "nobʰembar" , 11 },
{ "ḍisembar" , 12 },
// Norman French | nouormand
{ "jaunvyi" , 1 },
{ "févryi" , 2 },
{ "mâr" , 3 },
{ "avri" , 4 },
{ "mouai" , 5 },
{ "juin" , 6 },
{ "juilet" , 7 },
{ "âot" , 8 },
{ "s’tembe" , 9 },
{ "octobe" , 10 },
{ "novembe" , 11 },
{ "décembe" , 12 },
// Northern Sami | davvisámegiella
{ "ođđajagemánnu" , 1 },
{ "guovvamánnu" , 2 },
{ "njukčamánnu" , 3 },
{ "cuoŋománnu" , 4 },
{ "miessemánnu" , 5 },
{ "geassemánnu" , 6 },
{ "suoidnemánnu" , 7 },
{ "borgemánnu" , 8 },
{ "čakčamánnu" , 9 },
{ "golggotmánnu" , 10 },
{ "skábmamánnu" , 11 },
{ "juovlamánnu" , 12 },
// Norwegian | norsk
{ "januar" , 1 },
{ "februar" , 2 },
{ "mars" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "desember" , 12 },
// Novial
{ "januare" , 1 },
{ "februare" , 2 },
{ "marte" , 3 },
{ "aprile" , 4 },
{ "maye" , 5 },
{ "june" , 6 },
{ "julie" , 7 },
{ "auguste" , 8 },
{ "septembre" , 9 },
{ "oktobre" , 10 },
{ "novembre" , 11 },
{ "desembre" , 12 },
// Occitan | occitan
{ "genièr" , 1 },
{ "febrièr" , 2 },
{ "març" , 3 },
{ "abrial" , 4 },
{ "mai" , 5 },
{ "junh" , 6 },
{ "julhet" , 7 },
{ "agóst" , 8 },
{ "setembre" , 9 },
{ "octobre" , 10 },
{ "novembre" , 11 },
{ "decembre" , 12 },
// Old Church Slavonic | словѣньскъ (slověnĭskŭ)
{ "ианѹарии" , 1 },
{ "феврѹарии", 2 },
{ "мартъ" , 3 },
{ "априль" , 4 },
{ "маи" , 5 },
{ "июнии" , 6 },
{ "июлии" , 7 },
{ "аугѹстъ" , 8 },
{ "септѧбрь" , 9 },
{ "октобрь" , 10 },
{ "ноѩбрь" , 11 },
{ "декѧбрь" , 12 },
{ "ianuarii" , 1 },
{ "fevruarii" , 2 },
{ "martŭ" , 3 },
{ "aprilĭ" , 4 },
{ "mai" , 5 },
{ "ijunii" , 6 },
{ "ijulii" , 7 },
{ "avgustŭ" , 8 },
{ "septębrĭ" , 9 },
{ "oktobrĭ" , 10 },
{ "nojębrĭ" , 11 },
{ "dekębrĭ" , 12 },
// Old English | Englisc
{ "se æfterra gēola", 1 },
{ "solmōnaþ" , 2 },
{ "hreþmōnaþ" , 3 },
{ "Ēastermōnaþ" , 4 },
{ "Þrimilcemōnaþ" , 5 },
{ "sēremōnaþ" , 6 },
{ "mǣdmōnaþ" , 7 },
{ "wēodmōnaþ" , 8 },
{ "hāligmōnaþ" , 9 },
{ "winterfylleþ" , 10 },
{ "blōtmōnaþ" , 11 },
{ "gēolmōnaþ" , 12 },
// Oriya | ଓଡ଼ିଆ (oṛiā)
{ "ଜାନୁଆରି", 1 },
{ "ଫେବ୍ରୁଆରି", 2 },
{ "ମାର୍ଚ୍ଚ", 3 },
{ "ଏପ୍ରିଲ", 4 },
{ "ମେ" , 5 },
{ "ଜୁନ" , 6 },
{ "ଜୁଲାଇ" , 7 },
{ "ଅଗଷ୍ଟ" , 8 },
{ "ସେପ୍ଟେମ୍ବର", 9 },
{ "ଅକ୍ଟୋବର", 10 },
{ "ନଭେମ୍ବର", 11 },
{ "ଡିସେମ୍ବର", 12 },
{ "jānuāri" , 1 },
{ "pʰebruāri" , 2 },
{ "mārčč" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jun" , 6 },
{ "julāi" , 7 },
{ "ôgôṣṭ" , 8 },
{ "sepṭembôr" , 9 },
{ "ôkṭobôr" , 10 },
{ "nôbʰembôr" , 11 },
{ "ḍisembôr" , 12 },
// Oromo | oromoo
{ "amajjii" , 1 },
{ "guraandhala" , 2 },
{ "bitooteessa" , 3 },
{ "elba" , 4 },
{ "caamsa" , 5 },
{ "waxabajjii" , 6 },
{ "adooleessa" , 7 },
{ "hagayya" , 8 },
{ "fuulbana" , 9 },
{ "onkololeessa" , 10 },
{ "sadaasa" , 11 },
{ "muddee" , 12 },
// Ossetian | ирон (iron)
{ "январь" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
{ "janvar'" , 1 },
{ "fevral'" , 2 },
{ "mart" , 3 },
{ "aprel'" , 4 },
{ "maj" , 5 },
{ "ijun'" , 6 },
{ "ijul'" , 7 },
{ "avgust" , 8 },
{ "sentjabr'" , 9 },
{ "oktjabr'" , 10 },
{ "nojabr'" , 11 },
{ "dekabr'" , 12 },
// Papiamento | Papiamentu
{ "yanuari" , 1 },
{ "febrüari" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "mei" , 5 },
{ "yüni" , 6 },
{ "yüli" , 7 },
{ "ougùstùs" , 8 },
{ "sèptèmber" , 9 },
{ "òktober" , 10 },
{ "novèmber" , 11 },
{ "desèmber" , 12 },
// Pashto | پښتو (paŝto)
{ "جنوري" , 1 },
{ "فبروري" , 2 },
{ "مارچ" , 3 },
{ "اپريل" , 4 },
{ "مې ؛ مۍ" , 5 },
{ "جون" , 6 },
{ "جولاي" , 7 },
{ "اګست" , 8 },
{ "سپتمبر" , 9 },
{ "اکتوبر" , 10 },
{ "نومبر" , 11 },
{ "دسمبر" , 12 },
{ "janwarī" , 1 },
{ "fabrūarī" , 2 },
{ "mārč" , 3 },
{ "aprīl" , 4 },
{ "me" , 5 },
{ "məy" , 5 },
{ "jūn" , 6 },
{ "jūlāy" , 7 },
{ "agəst" , 8 },
{ "siptambər" , 9 },
{ "aktobər" , 10 },
{ "nuwambər" , 11 },
{ "disambər" , 12 },
// Pedi 1 | sePedi
{ "janaware" , 1 },
{ "feberware" , 2 },
{ "matšhe" , 3 },
{ "aporele" , 4 },
{ "mei" , 5 },
{ "june" , 6 },
{ "julae" , 7 },
{ "agostose" , 8 },
{ "setemere" , 9 },
{ "oktobore" , 10 },
{ "nofemere" , 11 },
{ "disemere" , 12 },
// Pennsylvania German | Pennsilfaani-Deitsch / Pennſilfaani-Deitſch
{ "yenner" , 1 },
{ "harning" , 2 },
{ "marz" , 3 },
{ "abril" , 4 },
{ "moi" , 5 },
{ "tschun" , 6 },
{ "tschulei" , 7 },
{ "aaguscht" , 8 },
{ "augscht" , 8 },
{ "september" , 9 },
{ "oktower" , 10 },
{ "nowember" , 11 },
{ "disember" , 12 },
{ "dezember" , 12 },
{ "yenner" , 1 },
{ "harning" , 2 },
{ "marz" , 3 },
{ "abril" , 4 },
{ "moi" , 5 },
{ "tſchun" , 6 },
{ "tſchulei" , 7 },
{ "aaguſcht" , 8 },
{ "augſcht" , 8 },
{ "september" , 9 },
{ "oktower" , 10 },
{ "nowember" , 11 },
{ "diſember" , 12 },
{ "dezember" , 12 },
// Persian 1 | فارسی (fārsī)
{ "ژانویه" , 1 },
{ "فوریه" , 2 },
{ "مارس" , 3 },
{ "آوریل" , 4 },
{ "مه" , 5 },
{ "ژوئن" , 6 },
{ "ژوئیه ؛ ژویه", 7 },
{ "اوت" , 8 },
{ "سپتامبر" , 9 },
{ "اکتبر" , 10 },
{ "نوامبر" , 11 },
{ "دسامبر" , 12 },
{ "žanvīye" , 1 },
{ "fevrīye" , 2 },
{ "mārs" , 3 },
{ "āvrīl" , 4 },
{ "me" , 5 },
{ "žūʾan" , 6 },
{ "žūʾīye" , 7 },
{ "žūye" , 7 },
{ "ūt" , 8 },
{ "septāmbr" , 9 },
{ "oktobr" , 10 },
{ "novāmbr" , 11 },
{ "desāmbr" , 12 },
// Plautdietsch / Plautdietſch
{ "jaunwoa" , 1 },
{ "febawoa" , 2 },
{ "moaz" , 3 },
{ "aprel" , 4 },
{ "mai" , 5 },
{ "jüni" , 6 },
{ "jüli" , 7 },
{ "august" , 8 },
{ "septamba" , 9 },
{ "oktoba" , 10 },
{ "novamba" , 11 },
{ "dezamba" , 12 },
{ "jaunwoa" , 1 },
{ "febawoa" , 2 },
{ "moaz" , 3 },
{ "aprel" , 4 },
{ "mai" , 5 },
{ "jüni" , 6 },
{ "jüli" , 7 },
{ "auguſt" , 8 },
{ "septamba" , 9 },
{ "oktoba" , 10 },
{ "novamba" , 11 },
{ "dezamba" , 12 },
// Polish | polski
{ "styczeń" , 1 },
{ "luty" , 2 },
{ "marzec" , 3 },
{ "kwiecień" , 4 },
{ "maj" , 5 },
{ "czerwiec" , 6 },
{ "lipiec" , 7 },
{ "sierpień" , 8 },
{ "wrzesień" , 9 },
{ "październik" , 10 },
{ "listopad" , 11 },
{ "grudzień" , 12 },
// Portuguese | português
{ "janeiro" , 1 },
{ "fevereiro" , 2 },
{ "março" , 3 },
{ "abril" , 4 },
{ "maio" , 5 },
{ "junho" , 6 },
{ "julho" , 7 },
{ "agosto" , 8 },
{ "setembro" , 9 },
{ "outubro" , 10 },
{ "novembro" , 11 },
{ "dezembro" , 12 },
// Provençal | prouvençau
{ "janvié" , 1 },
{ "febrié" , 2 },
{ "mars" , 3 },
{ "abriéu" , 4 },
{ "mai" , 5 },
{ "jun" , 6 },
{ "juliet" , 7 },
{ "avoust" , 8 },
{ "sètembre" , 9 },
{ "óutobre" , 10 },
{ "nouvèmbre" , 11 },
{ "desèmbre" , 12 },
// Punjabi (India) | ਪੰਜਾਬੀ (paṁjābī)
{ "ਜਨਵਰੀ" , 1 },
{ "ਫ਼ਰਵਰੀ" , 2 },
{ "ਮਾਰਚ" , 3 },
{ "ਅਪ੍ਰੈਲ", 4 },
{ "ਮਈ" , 5 },
{ "ਜੂਨ" , 6 },
{ "ਜੁਲਾਈ" , 7 },
{ "ਅਗਸਤ" , 8 },
{ "ਸਤੰਬਰ" , 9 },
{ "ਅਕਤੂਬਰ", 10 },
{ "ਨਵੰਬਰ" , 11 },
{ "ਦਸੰਬਰ" , 12 },
{ "janvarī" , 1 },
{ "farvarī" , 2 },
{ "mārč" , 3 },
{ "apræl" , 4 },
{ "maī" , 5 },
{ "jūn" , 6 },
{ "julāī" , 7 },
{ "agast" , 8 },
{ "sataṁbar" , 9 },
{ "aktūbar" , 10 },
{ "navaṁbar" , 11 },
{ "dasaṁbar" , 12 },
// Quechua 1 | Runasimi; Qhichwa
{ "qhulla puquy killa", 1 },
{ "hatun puquy killa" , 2 },
{ "pawqar waray killa", 3 },
{ "ayriway killa" , 4 },
{ "aymuray killa" , 5 },
{ "inti raymi killa" , 6 },
{ "anta situwa killa" , 7 },
{ "chakra yapuy killa", 8 },
{ "qhapaq situwa killa", 8 },
{ "tarpuy killa" , 9 },
{ "quya raymi killa" , 9 },
{ "uma raymi killa" , 9 },
{ "kantaray killa" , 10 },
{ "ayamarq’a killa" , 11 },
{ "qhapaq raymi killa", 12 },
// Raeto-Romance, Grisons | rumantsch grischun
{ "schaner" , 1 },
{ "favrer" , 2 },
{ "mars" , 3 },
{ "avrigl" , 4 },
{ "matg" , 5 },
{ "zercladur" , 6 },
{ "fanadur" , 7 },
{ "avust" , 8 },
{ "settember" , 9 },
{ "october" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Red Kurdish 1 (Caucasus) | kurdî / к’ӧрди / کوردی, kurmancî / кӧрманщи / کورمانجی
{ "kanûna sanî" , 1 },
{ "kanûna paşin" , 1 },
{ "şivat" , 2 },
{ "sibat" , 2 },
{ "adar" , 3 },
{ "nîsan" , 4 },
{ "gulan" , 5 },
{ "ḧezîran" , 6 },
{ "tîrmeh" , 7 },
{ "tîrme" , 7 },
{ "temûz" , 7 },
{ "tebax" , 8 },
{ "îlon" , 9 },
{ "îlûn" , 9 },
{ "teşrînê ewil" , 10 },
{ "teşrînê pêşin", 10 },
{ "çirîya ewil" , 10 },
{ "çirîya pêşin" , 10 },
{ "teşrînê sanî" , 11 },
{ "teşrînê paşin" , 11 },
{ "çirîya sanî" , 11 },
{ "çirîyapaşin" , 11 },
{ "kanûna ewil" , 12 },
{ "kanûna pêşin" , 12 },
{ "к’ануна сани", 1 },
{ "к’ануна пашьн", 1 },
{ "шьват" , 2 },
{ "сьбат" , 2 },
{ "адар" , 3 },
{ "нисан" , 4 },
{ "гӧлан" , 5 },
{ "һ’әзиран" , 6 },
{ "тирмәһ" , 7 },
{ "тирмә" , 7 },
{ "т’әмуз" , 7 },
{ "т’әбах" , 8 },
{ "илон" , 9 },
{ "илун" , 9 },
{ "т’әшрине әwьл", 10 },
{ "т’әшрине пешьн", 10 },
{ "чьрийа әwьл", 10 },
{ "чьрийа пешьн", 10 },
{ "т’әшрине сани", 11 },
{ "т’әшрине пашьн", 11 },
{ "чьрийа сани", 11 },
{ "чьрийа пашьн", 11 },
{ "к’ануна әwьл", 12 },
{ "к’ануна пешьн", 12 },
{ "کانوونا سانی ؛ کانوونا پاشن", 1 },
{ "شڤات ؛ سبات", 2 },
{ "ئادار" , 3 },
{ "نیسان" , 4 },
{ "گولان" , 5 },
{ "حهزیران" , 6 },
{ "تیرمهه ؛ تیرمه ؛ تهمووز", 7 },
{ "تهباخ" , 8 },
{ "ئیلۆن ؛ ئیلوون", 9 },
{ "تهشرینێ ئهول ؛ تهشرینێ پێشن ؛ چرییا ئهول ؛ چرییا پێشن", 10 },
{ "تهشرینێ سانی ؛ تهشرینێ پاشن ؛ چرییا سانی ؛ چرییا پاشن", 11 },
{ "کانوونا ئهول ؛ کانوونا پێشن", 12 },
// Romanian | română
{ "ianuarie" , 1 },
{ "februarie" , 2 },
{ "martie" , 3 },
{ "aprilie" , 4 },
{ "mai" , 5 },
{ "iunie" , 6 },
{ "iulie" , 7 },
{ "august" , 8 },
{ "septembrie" , 9 },
{ "octombrie" , 10 },
{ "noiembrie" , 11 },
{ "decembrie" , 12 },
// Rundi | kiRundi
{ "ukwambwere" , 1 },
{ "ukwakabiri" , 2 },
{ "ukwagatatu" , 3 },
{ "ukwakane" , 4 },
{ "ukwagatanu" , 5 },
{ "ukwagatandatu" , 6 },
{ "ukwindwi" , 7 },
{ "ukwumunani" , 8 },
{ "ukwicenda" , 9 },
{ "ukwicumi" , 10 },
{ "ukwicuminarimwe" , 11 },
{ "ukwicuminakabiri" , 12 },
// Russian | русский (russkij)
{ "январь" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
{ "janvar'" , 1 },
{ "fevral'" , 2 },
{ "mart" , 3 },
{ "aprel'" , 4 },
{ "maj" , 5 },
{ "ijun'" , 6 },
{ "ijul'" , 7 },
{ "avgust" , 8 },
{ "sentjabr'" , 9 },
{ "oktjabr'" , 10 },
{ "nojabr'" , 11 },
{ "dekabr'" , 12 },
// Saanich | xʷsenəčqən
{ "siʔsət" , 1 },
{ "ŋiʔŋənəʔ" , 2 },
{ "wəx̣əs" , 3 },
{ "pəx̣sisəŋ" , 4 },
{ "sx̣ʷeʔnəɬ" , 5 },
{ "pən̕exʷəŋ" , 6 },
{ "čən̕θəqəy̕" , 7 },
{ "čən̕hənən" , 8 },
{ "čən̕θew̕ən" , 9 },
{ "pəq̕əlenəxʷ" , 10 },
{ "xʷəsəlenəxʷ" , 11 },
{ "xʷsčəl̕kʷeʔsən", 12 },
// Saint Lucia Creole | kwéyòl
{ "janvyé" , 1 },
{ "févwiyé" , 2 },
{ "mas" , 3 },
{ "avwi" , 4 },
{ "mé" , 5 },
{ "jen" , 6 },
{ "jwiyèt" , 7 },
{ "au" , 8 },
{ "sèptanm" , 9 },
{ "òktòb" , 10 },
{ "novanm" , 11 },
{ "désanm" , 12 },
// Sakha / Yakut | саха (saĥa)
{ "тохсунньу", 1 },
{ "олунньу" , 2 },
{ "кулун тутар ый", 3 },
{ "муус устар ый", 4 },
{ "ыам ыйа" , 5 },
{ "бэс ыйа" , 6 },
{ "от ыйа" , 7 },
{ "атырдьах ыйа", 8 },
{ "балаҕан ыйа", 9 },
{ "алтынньы" , 10 },
{ "сэтинньи" , 11 },
{ "ахсынньы" , 12 },
{ "toĥsunn'u" , 1 },
{ "olunn'u" , 2 },
{ "kulun tutar yj" , 3 },
{ "muus ustar yj" , 4 },
{ "yam yja" , 5 },
{ "bäs yja" , 6 },
{ "ot yja" , 7 },
{ "atyrd'aĥ yja" , 8 },
{ "balaġan yja" , 9 },
{ "altynn'y" , 10 },
{ "sätinn'i" , 11 },
{ "aĥsynn'y" , 12 },
// Sango 1 | sängö
{ "zamviëe" , 1 },
{ "fevriëe" , 2 },
{ "mârsi" , 3 },
{ "avrîli" , 4 },
{ "mêe" , 5 },
{ "zuyën" , 6 },
{ "zuyêti" , 7 },
{ "ûti" , 8 },
{ "sëtâmbere" , 9 },
{ "ötôbere" , 10 },
{ "növâmbere" , 11 },
{ "dïsâmbere" , 12 },
// Sanskrit | संस्कृतम् (saṁskr̥tam)
{ "जनवरी" , 1 },
{ "फरवरी" , 2 },
{ "मार्च" , 3 },
{ "अप्रैल", 4 },
{ "मई" , 5 },
{ "जून" , 6 },
{ "जुलाई" , 7 },
{ "अगस्त" , 8 },
{ "सितम्बर", 9 },
{ "अक्तूबर", 10 },
{ "नवम्बर", 11 },
{ "दिसम्बर", 12 },
{ "janvarī" , 1 },
{ "pʰarvarī" , 2 },
{ "mārč" , 3 },
{ "apræl" , 4 },
{ "maī" , 5 },
{ "jūn" , 6 },
{ "julāī" , 7 },
{ "agast" , 8 },
{ "sitambar" , 9 },
{ "aktūbar" , 10 },
{ "navambar" , 11 },
{ "disambar" , 12 },
// Sardinian | sardu
{ "bennàlzu" , 1 },
{ "fiàrgiu" , 2 },
{ "màltu" , 3 },
{ "abríbi" , 4 },
{ "màgiu" , 5 },
{ "làmpadas" , 6 },
{ "alzòlas" , 7 },
{ "agústu" , 8 },
{ "cabidànne" , 9 },
{ "santigaíni" , 10 },
{ "santandría" , 11 },
{ "nadàbi" , 12 },
// Scots
{ "januar" , 1 },
{ "februar" , 2 },
{ "mairch" , 3 },
{ "apryle" , 4 },
{ "mey" , 5 },
{ "juin" , 6 },
{ "julie" , 7 },
{ "augist" , 8 },
{ "september" , 9 },
{ "october" , 10 },
{ "november" , 11 },
{ "dizember" , 12 },
// Scots Gaelic | Gàidhlig
{ "an faoilteach" , 1 },
{ "an gearran" , 2 },
{ "an màrt" , 3 },
{ "an giblean" , 4 },
{ "an ceitean" , 5 },
{ "an t-Òg-mhios" , 6 },
{ "an t-luchar" , 7 },
{ "an lùnasdal" , 8 },
{ "an t-sultain" , 9 },
{ "an dàmhair" , 10 },
{ "an t-samhain" , 11 },
{ "an dùbhlachd" , 12 },
// Serbian | српски / srpski
{ "јануар" , 1 },
{ "фебруар" , 2 },
{ "март" , 3 },
{ "април" , 4 },
{ "мај" , 5 },
{ "јуни" , 6 },
{ "јули" , 7 },
{ "август" , 8 },
{ "септембар", 9 },
{ "октобар" , 10 },
{ "новембар" , 11 },
{ "децембар" , 12 },
{ "januar" , 1 },
{ "februar" , 2 },
{ "mart" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "avgust" , 8 },
{ "septembar" , 9 },
{ "oktobar" , 10 },
{ "novembar" , 11 },
{ "decembar" , 12 },
// Seychelles Creole | seselwa
{ "zanvye" , 1 },
{ "fevriye" , 2 },
{ "mars" , 3 },
{ "avril" , 4 },
{ "me" , 5 },
{ "zen" , 6 },
{ "zilyet" , 7 },
{ "out" , 8 },
{ "septanm" , 9 },
{ "oktob" , 10 },
{ "novanm" , 11 },
{ "desanm" , 12 },
// Shona | chiShona
{ "ndira" , 1 },
{ "kukadzi" , 2 },
{ "kurume" , 3 },
{ "kubvumbi" , 4 },
{ "chivabvu" , 5 },
{ "chikumi" , 6 },
{ "chikunguru" , 7 },
{ "nyamavhuvhu" , 8 },
{ "gunyana" , 9 },
{ "gumiguru" , 10 },
{ "mbudzi" , 11 },
{ "zvita" , 12 },
// Sicilian | sicilianu
{ "jinnaru" , 1 },
{ "frivaru" , 2 },
{ "marzu" , 3 },
{ "aprili" , 4 },
{ "maiu" , 5 },
{ "giugnu" , 6 },
{ "giugnettu" , 7 },
{ "austu" , 8 },
{ "sittèmmiru" , 9 },
{ "uttùviru" , 10 },
{ "nuvèmmiru" , 11 },
{ "dicèmmiru" , 12 },
// Sindhi | سنڌي (sindʰī)
{ "جنوري" , 1 },
{ "فيبروري" , 2 },
{ "مارچ" , 3 },
{ "اپريل" , 4 },
{ "مئي" , 5 },
{ "جون" , 6 },
{ "جولاءِ" , 7 },
{ "آگسٽ" , 8 },
{ "سيپٽمبر" , 9 },
{ "آڪٽوبر" , 10 },
{ "نومبر" , 11 },
{ "ڊسمبر" , 12 },
{ "janvarī" , 1 },
{ "febravarī" , 2 },
{ "mārču" , 3 },
{ "aprīlu" , 4 },
{ "meʾī" , 5 },
{ "jūn" , 6 },
{ "jūlāʾi" , 7 },
{ "āgasṫu" , 8 },
{ "sepṫambaru" , 9 },
{ "ākṫobaru" , 10 },
{ "navambaru" , 11 },
{ "ḍisambaru" , 12 },
// Sinhalese | සිංහල (siṁhala)
{ "ජනවාරි", 1 },
{ "පෙබරවාරි", 2 },
{ "මාර්තු", 3 },
{ "අප්රේල්", 4 },
{ "මැයි" , 5 },
{ "ජූනි" , 6 },
{ "ජූලි" , 7 },
{ "අගෝස්තු", 8 },
{ "සැප්තැම්බර්", 9 },
{ "ඔක්තෝබර්", 10 },
{ "නොවැම්බර්", 11 },
{ "දෙසැම්බර්", 12 },
{ "janavāri" , 1 },
{ "pebaravāri" , 2 },
{ "mārtu" , 3 },
{ "aprēl" , 4 },
{ "mæyi" , 5 },
{ "jūni" , 6 },
{ "jūli" , 7 },
{ "agōstu" , 8 },
{ "sæptæmbar" , 9 },
{ "oktōbar" , 10 },
{ "novæmbar" , 11 },
{ "desæmbar" , 12 },
// Skolt Sami | sää´mǩiõll
{ "ođđee´jjmään" , 1 },
{ "tä´lvvmään" , 2 },
{ "pâ´zzlâšttammään", 3 },
{ "njuhččmään" , 4 },
{ "vue´ssmään" , 5 },
{ "ǩie´ssmään" , 6 },
{ "suei´nnmään" , 7 },
{ "på´rǧǧmään" , 8 },
{ "čõhččmään" , 9 },
{ "kålggmään" , 10 },
{ "skamm’mään" , 11 },
{ "rosttovmään" , 12 },
// Slovak | slovenčina
{ "január" , 1 },
{ "február" , 2 },
{ "marec" , 3 },
{ "apríl" , 4 },
{ "máj" , 5 },
{ "jún" , 6 },
{ "júl" , 7 },
{ "august" , 8 },
{ "september" , 9 },
{ "október" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Slovenian 1 | slovenščina
{ "januar" , 1 },
{ "februar" , 2 },
{ "marec" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "junij" , 6 },
{ "julij" , 7 },
{ "avgust" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Slovio | Slovio / Словио
{ "januar" , 1 },
{ "februar" , 2 },
{ "marc" , 3 },
{ "april" , 4 },
{ "mai" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "august" , 8 },
{ "septembr" , 9 },
{ "oktobr" , 10 },
{ "novembr" , 11 },
{ "decembr" , 12 },
{ "Йануар" , 1 },
{ "Фебруар" , 2 },
{ "Марц" , 3 },
{ "Април" , 4 },
{ "Маи" , 5 },
{ "Йуни" , 6 },
{ "Йули" , 7 },
{ "Аугуст" , 8 },
{ "Септембр" , 9 },
{ "Октобр" , 10 },
{ "Новембр" , 11 },
{ "Децембр" , 12 },
// Somali | Soomaaliga
{ "jannaayo" , 1 },
{ "febraayo" , 2 },
{ "maarso" , 3 },
{ "abriil" , 4 },
{ "abriile" , 4 },
{ "maajo" , 5 },
{ "juunyo" , 6 },
{ "juun" , 6 },
{ "luulyo" , 7 },
{ "agoosto" , 8 },
{ "septembar" , 9 },
{ "setembar" , 9 },
{ "sibtambar" , 9 },
{ "oktoobar" , 10 },
{ "otoobar" , 10 },
{ "noofembar" , 11 },
{ "disembar" , 12 },
// Sotho | seSotho
{ "pherekgong" , 1 },
{ "hlakola" , 2 },
{ "hlakubele" , 3 },
{ "mmesa" , 4 },
{ "motsheanong" , 5 },
{ "phupjane" , 6 },
{ "phupu" , 7 },
{ "phato" , 8 },
{ "lwetse" , 9 },
{ "mphalane" , 10 },
{ "pudungwana" , 11 },
{ "tshitwe" , 12 },
// Southern Sami | åarjelsaemiengïele
{ "tsïengele" , 1 },
{ "goevte" , 2 },
{ "njoktje" , 3 },
{ "voerhtje" , 4 },
{ "suehpede" , 5 },
{ "ruffie" , 6 },
{ "snjaltje" , 7 },
{ "mïetske" , 8 },
{ "skïerede" , 9 },
{ "golke" , 10 },
{ "rahka" , 11 },
{ "goeve" , 12 },
// Spanish | español
{ "enero" , 1 },
{ "febrero" , 2 },
{ "marzo" , 3 },
{ "abril" , 4 },
{ "mayo" , 5 },
{ "junio" , 6 },
{ "julio" , 7 },
{ "agosto" , 8 },
{ "septiembre" , 9 },
{ "octubre" , 10 },
{ "noviembre" , 11 },
{ "diciembre" , 12 },
// Sundanese | basa Sunda
{ "januari" , 1 },
{ "pébruari" , 2 },
{ "maret" , 3 },
{ "april" , 4 },
{ "méi" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "agustus" , 8 },
{ "séptémber" , 9 },
{ "oktober" , 10 },
{ "nopémber" , 11 },
{ "désémber" , 12 },
// Swahili | kiswahili
{ "januari" , 1 },
{ "februari" , 2 },
{ "machi" , 3 },
{ "aprili" , 4 },
{ "mei" , 5 },
{ "juni" , 6 },
{ "julai" , 7 },
{ "agosti" , 8 },
{ "septemba" , 9 },
{ "oktoba" , 10 },
{ "novemba" , 11 },
{ "desemba" , 12 },
// Swati | siSwati
{ "bhimbidvwane" , 1 },
{ "indlovana" , 2 },
{ "indlovu-lenkhulu" , 3 },
{ "mabasa" , 4 },
{ "inkhwekhweti" , 5 },
{ "inhlaba" , 6 },
{ "kholwane" , 7 },
{ "ingci" , 8 },
{ "inyoni" , 9 },
{ "imphala" , 10 },
{ "lidvuba" , 11 },
{ "lweti" , 11 },
{ "ingongoni" , 12 },
// Swedish 1 | svenska
{ "januari" , 1 },
{ "februari" , 2 },
{ "mars" , 3 },
{ "april" , 4 },
{ "maj" , 5 },
{ "juni" , 6 },
{ "juli" , 7 },
{ "augusti" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "december" , 12 },
// Syriac 1 (Common) | ܣܘܪܝܐܝܐ (sūryāyā)
{ "ܟܢܘܢ ܐܚܪܝ" , 1 },
{ "ܫܒܛ" , 2 },
{ "ܐܕܪ" , 3 },
{ "ܢܝܣܢ" , 4 },
{ "ܐܝܪ" , 5 },
{ "ܚܙܝܪܢ" , 6 },
{ "ܬܡܘܙ" , 7 },
{ "ܐܒ" , 8 },
{ "ܐܝܠܘܠ" , 9 },
{ "ܐܠܘܠ" , 9 },
{ "ܬܫܪܝܢ ܩܕܡ" , 10 },
{ "ܬܫܪܝܢ ܩܕܝܡ", 10 },
{ "ܬܫܪܝܢ ܐܚܪܝ", 11 },
{ "ܟܢܘܢ ܩܕܡ" , 12 },
{ "ܟܢܘܢ ܩܕܝܡ" , 12 },
{ "kānūn [’]ḥərāy", 1 },
{ "šəḇāṭ" , 2 },
{ "āḏār" , 3 },
{ "nīsān" , 4 },
{ "īyār" , 5 },
{ "ḥəzīrān" , 6 },
{ "tāmūz" , 7 },
{ "āḇ" , 8 },
{ "ẹ̄lūl" , 9 },
{ "tešrī[n] qəḏem", 10 },
{ "tešrī[n] qəḏīm", 10 },
{ "tešrī[n] [’]ḥərāy", 11 },
{ "kānūn qəḏem" , 12 },
{ "kānūn qəḏīm" , 12 },
// Tagalog
{ "enero" , 1 },
{ "pebrero" , 2 },
{ "marso" , 3 },
{ "abril" , 4 },
{ "mayo" , 5 },
{ "hunyo" , 6 },
{ "hulyo" , 7 },
{ "agosto" , 8 },
{ "setyembre" , 9 },
{ "oktubre" , 10 },
{ "nobyembre" , 11 },
{ "disyembre" , 12 },
// Tahitian | reo Tahiti
{ "tenuare" , 1 },
{ "fepuare" , 2 },
{ "māti" , 3 },
{ "ʻeperēra" , 4 },
{ "mē" , 5 },
{ "tiunu" , 6 },
{ "tiurai" , 7 },
{ "ʻātete" , 8 },
{ "tetepa" , 9 },
{ "ʻatopa" , 10 },
{ "noema" , 11 },
{ "novema" , 11 },
{ "titema" , 12 },
// Taiwanese | å°ç£è©± (tâi-oân-oē)
{ "ä¸æ" , 1 },
{ "äºæ" , 2 },
{ "ä¸æ" , 3 },
{ "åæ" , 4 },
{ "äºæ" , 5 },
{ "å
æ" , 6 },
{ "ä¸æ" , 7 },
{ "å
«æ" , 8 },
{ "ä¹æ" , 9 },
{ "åæ" , 10 },
{ "åä¸æ" , 11 },
{ "åäºæ" , 12 },
{ "it-gue̍h" , 1 },
{ "jī-gue̍h" , 2 },
{ "saⁿ-gue̍h" , 3 },
{ "sì-gue̍h" , 4 },
{ "gơ̅-gue̍h" , 5 },
{ "la̍k-gue̍h" , 6 },
{ "chit-gue̍h" , 7 },
{ "peh-gue̍h" , 8 },
{ "káu-gue̍h" , 9 },
{ "tsa̍p-gue̍h" , 10 },
{ "tsa̍p-it-gue̍h" , 11 },
{ "tsa̍p-jī-gue̍h" , 12 },
// Tajik | тоҷикӣ (toǧikī) / تاجیکی (tōjīkī)
{ "январ" , 1 },
{ "феврал" , 2 },
{ "март" , 3 },
{ "апрел" , 4 },
{ "май" , 5 },
{ "июн" , 6 },
{ "июл" , 7 },
{ "август" , 8 },
{ "сентябр" , 9 },
{ "октябр" , 10 },
{ "ноябр" , 11 },
{ "декабр" , 12 },
{ "janvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "maj" , 5 },
{ "ijun" , 6 },
{ "ijul" , 7 },
{ "avgust" , 8 },
{ "sentjabr" , 9 },
{ "oktjabr" , 10 },
{ "nojabr" , 11 },
{ "dekabr" , 12 },
{ "ینور" , 1 },
{ "فیورل" , 2 },
{ "مرت" , 3 },
{ "اپریل" , 4 },
{ "می" , 5 },
{ "اییون" , 6 },
{ "اییول" , 7 },
{ "اوگوست" , 8 },
{ "سینتیبر" , 9 },
{ "آکتیبر" , 10 },
{ "نایبر" , 11 },
{ "دیکبر" , 12 },
{ "yanvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "mai" , 5 },
{ "īyūn" , 6 },
{ "īyūl" , 7 },
{ "augūst" , 8 },
{ "sentyabr" , 9 },
{ "ōktyabr" , 10 },
{ "nōyabr" , 11 },
{ "dekabr" , 12 },
// Tamazight (Libya) | ⵜⴰⵎⴰⵣⵉⵖⵜ / tamaziγt
{ "ⵢⴻⵏⴰⵕ" , 1 },
{ "ⴼⵓⵕⴰⵕ" , 2 },
{ "ⵎⴰⵔⴻⵙ" , 3 },
{ "ⵢⴻⴱⵔⵉⵔ", 4 },
{ "ⵎⴰⵢⵓ" , 5 },
{ "ⵢⵓⵏⵢⵓ" , 6 },
{ "ⵢⵓⵍⵢⵓⵣ", 7 },
{ "ⵖⵓⵛⴻⵜ" , 8 },
{ "ⵛⵜⴻⵏⴱⴻⵕ", 9 },
{ "ⵜⵓⴱⴻⵕ" , 10 },
{ "ⵓⵡⴻⵏⴱⵉⵔ", 11 },
{ "ⴷⵉⵊⴻⵏⴱⵉⵔ", 12 },
{ "yenaṛ" , 1 },
{ "fuṛaṛ" , 2 },
{ "mares" , 3 },
{ "yebrir" , 4 },
{ "mayu" , 5 },
{ "yunyu" , 6 },
{ "yulyuz" , 7 },
{ "γušet" , 8 },
{ "štenbeṛ" , 9 },
{ "tubeṛ" , 10 },
{ "uwenbir" , 11 },
{ "diženbir" , 12 },
// Tamil | தமிழ் (tamiḻ)
{ "ஜனவரி" , 1 },
{ "பெப்ரவரி", 2 },
{ "பிப்ரவரி", 2 },
{ "பெப்ருவரி", 2 },
{ "மார்ச்", 3 },
{ "மார்சு", 3 },
{ "ஏப்ரல்", 4 },
{ "ஏப்ரில்", 4 },
{ "மே" , 5 },
{ "ஜூன்" , 6 },
{ "ஜூலை" , 7 },
{ "ஆகஸ்ட்", 8 },
{ "ஆகஸ்டு", 8 },
{ "செப்டம்பர்", 9 },
{ "செப்டெம்பர்", 9 },
{ "அக்டோபர்", 10 },
{ "நவம்பர்", 11 },
{ "நொவம்பர்", 11 },
{ "டிசம்பர்", 12 },
{ "jaṉavari" , 1 },
{ "pepravari" , 2 },
{ "pipravari" , 2 },
{ "pepruvari" , 2 },
{ "mārč" , 3 },
{ "mārču" , 3 },
{ "ēpral" , 4 },
{ "ēpril" , 4 },
{ "mē" , 5 },
{ "jūṉ" , 6 },
{ "jūlai" , 7 },
{ "ākasṭ" , 8 },
{ "ākasṭu" , 8 },
{ "čepṭampar" , 9 },
{ "čepṭempar" , 9 },
{ "akṭōpar" , 10 },
{ "navampar" , 11 },
{ "novampar" , 11 },
{ "ṭičampar" , 12 },
// Tatar | татарча / tatarça
{ "гыйнвар" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
{ "ğıynvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "may" , 5 },
{ "iyun" , 6 },
{ "iyul" , 7 },
{ "avgust" , 8 },
{ "sentyabr" , 9 },
{ "oktyabr" , 10 },
{ "noyabr" , 11 },
{ "dekabr" , 12 },
// Telugu | తెలుగు (telugu)
{ "జనవరి" , 1 },
{ "ఫిబ్రవరి", 2 },
{ "మార్చి", 3 },
{ "ఏప్రిల్", 4 },
{ "మే" , 5 },
{ "జూన్" , 6 },
{ "జూలై" , 7 },
{ "ఆగస్టు", 8 },
{ "సెప్టెంబర్", 9 },
{ "అక్టోబర్", 10 },
{ "నవంబర్", 11 },
{ "డిసెంబర్", 12 },
{ "janvari" , 1 },
{ "pʰibravari" , 2 },
{ "mārči" , 3 },
{ "ēpril" , 4 },
{ "mē" , 5 },
{ "jūn" , 6 },
{ "jūlai" , 7 },
{ "āgasṭu" , 8 },
{ "sepṭeṁbar" , 9 },
{ "akṭōbar" , 10 },
{ "navaṁbar" , 11 },
{ "ḍisaṁbar" , 12 },
// Tetum | tetun
{ "janeiru" , 1 },
{ "fevereiru" , 2 },
{ "marsu" , 3 },
{ "abríl" , 4 },
{ "maiu" , 5 },
{ "juñu" , 6 },
{ "jullu" , 7 },
{ "agostu" , 8 },
{ "setembru" , 9 },
{ "outubru" , 10 },
{ "novembru" , 11 },
{ "dezembru" , 12 },
// Thai 1 | ภาษาไทย (pʰāsā tʰai[y])
{ "มกราคม", 1 },
{ "กุมภาพันธ์", 2 },
{ "มีนาคม", 3 },
{ "เมษายน", 4 },
{ "พฤษภาคม", 5 },
{ "มิถุนายน", 6 },
{ "กรกฎาคม", 7 },
{ "สิงหาคม", 8 },
{ "กันยายน", 9 },
{ "ตุลาคม", 10 },
{ "พฤศจิกายน", 11 },
{ "ธันวาคม", 12 },
{ "mokarākʰom" , 1 },
{ "kumpāpʰân[t]" , 2 },
{ "mīnākʰom" , 3 },
{ "mēsāyon" , 4 },
{ "prʉtpʰākʰom" , 5 },
{ "mitʰunāyon" , 6 },
{ "karakadākʰom" , 7 },
{ "siṅhākʰom" , 8 },
{ "kânyāyon" , 9 },
{ "tulākʰom" , 10 },
{ "prʉtčikāyon" , 11 },
{ "tʰânwākʰom" , 12 },
// Tibetan | བོད་སྐད་ (bod.skad.)
{ "ཟླ་དང་པོ་", 1 },
{ "ཟླ་གཉིས་པ་", 2 },
{ "ཟླ་གསུམ་པ་", 3 },
{ "ཟླ་བཞི་པ་", 4 },
{ "ཟླ་ལྔ་པ་", 5 },
{ "ཟླ་དྲུག་པ་", 6 },
{ "ཟླ་བདུན་པ་", 7 },
{ "ཟླ་བརྒྱད་པ་", 8 },
{ "ཟླ་དགུ་པ་", 9 },
{ "ཟླ་བཅུ་པ་", 10 },
{ "ཟླ་བཅུ་གཅིག་པ་", 11 },
{ "ཟླ་བཅུ་གཉིས་པ་", 12 },
{ "zla.daṅ.po." , 1 },
{ "zla.gñis.pa." , 2 },
{ "zla.gsum.pa." , 3 },
{ "zla.bži.pa." , 4 },
{ "zla.lṅa.pa." , 5 },
{ "zla.drug.pa." , 6 },
{ "zla.bdun.pa." , 7 },
{ "zla.brgyad.pa." , 8 },
{ "zla.dgu.pa." , 9 },
{ "zla.bču.pa." , 10 },
{ "zla.bču.gčig.pa.", 11 },
{ "zla.bču.gñis.pa.", 12 },
// Tok Pisin
{ "janueri" , 1 },
{ "februeri" , 2 },
{ "mas" , 3 },
{ "epril" , 4 },
{ "me" , 5 },
{ "jun" , 6 },
{ "julai" , 7 },
{ "ogas" , 8 },
{ "septemba" , 9 },
{ "oktoba" , 10 },
{ "novemba" , 11 },
{ "disemba" , 12 },
// Tongan | faka-Tonga
{ "sanuali" , 1 },
{ "fepueli" , 2 },
{ "maʻasi" , 3 },
{ "ʻepeleli" , 4 },
{ "me" , 5 },
{ "sune" , 6 },
{ "siulai" , 7 },
{ "ʻaokosi" , 8 },
{ "sepitema" , 9 },
{ "ʻokatopa" , 10 },
{ "novema" , 11 },
{ "tisema" , 12 },
// Tsonga 1 | xiTsonga
{ "janiwari" , 1 },
{ "febriwari" , 2 },
{ "machi" , 3 },
{ "apireli" , 4 },
{ "meyi" , 5 },
{ "juni" , 6 },
{ "julayi" , 7 },
{ "agoste" , 8 },
{ "septembere" , 9 },
{ "oktoba" , 10 },
{ "novhemba" , 11 },
{ "disemba" , 12 },
// Turkish | Türkçe
{ "ocak" , 1 },
{ "şubat" , 2 },
{ "mart" , 3 },
{ "nisan" , 4 },
{ "mayıs" , 5 },
{ "haziran" , 6 },
{ "temmuz" , 7 },
{ "ağustos" , 8 },
{ "eylül" , 9 },
{ "ekim" , 10 },
{ "kasım" , 11 },
{ "aralık" , 12 },
// Turkmen 1 | türkmen / түркмен
{ "türkmenbaşy" , 1 },
{ "baýdak" , 2 },
{ "nowruz" , 3 },
{ "gurbansoltan" , 4 },
{ "magtymguly" , 5 },
{ "oguz" , 6 },
{ "gorkut" , 7 },
{ "alparslan" , 8 },
{ "ruhnama" , 9 },
{ "garaşsyzlyk" , 10 },
{ "sanjar" , 11 },
{ "bitaraplyk" , 12 },
{ "түркменбашы", 1 },
{ "байдак" , 2 },
{ "новруз" , 3 },
{ "гурбансолтан", 4 },
{ "магтымгулы", 5 },
{ "огуз" , 6 },
{ "горкут" , 7 },
{ "алпарслан", 8 },
{ "рухнама" , 9 },
{ "гарашсызлык", 10 },
{ "санҗар" , 11 },
{ "битараплык", 12 },
// Tuvan 1 | тыва (tyva) see rus
{ "бир ай" , 1 },
{ "ийи ай" , 2 },
{ "үш ай" , 3 },
{ "дөрт ай" , 4 },
{ "беш ай" , 5 },
{ "алды ай" , 6 },
{ "чеди ай" , 7 },
{ "сес ай" , 8 },
{ "тос ай" , 9 },
{ "он ай" , 10 },
{ "он бир ай" , 11 },
{ "он ийи ай" , 12 },
{ "bir aj" , 1 },
{ "iji aj" , 2 },
{ "üš aj" , 3 },
{ "dört aj" , 4 },
{ "beš aj" , 5 },
{ "aldy aj" , 6 },
{ "čedi aj" , 7 },
{ "ses aj" , 8 },
{ "tos aj" , 9 },
{ "on aj" , 10 },
{ "on bir aj" , 11 },
{ "on iji aj" , 12 },
// Ukrainian | українська (ukraïns'ka)
{ "січень" , 1 },
{ "лютий" , 2 },
{ "березень" , 3 },
{ "квітень" , 4 },
{ "травень" , 5 },
{ "червень" , 6 },
{ "липень" , 7 },
{ "серпень" , 8 },
{ "вересень" , 9 },
{ "жовтень" , 10 },
{ "листопад" , 11 },
{ "грудень" , 12 },
{ "sičen'" , 1 },
{ "ljutyj" , 2 },
{ "berezen'" , 3 },
{ "kviten'" , 4 },
{ "traven'" , 5 },
{ "červen'" , 6 },
{ "lypen'" , 7 },
{ "serpen'" , 8 },
{ "veresen'" , 9 },
{ "žovten'" , 10 },
{ "lystopad" , 11 },
{ "hruden'" , 12 },
// Upper Sorbian 1 | hornjoserbšćina
{ "wulki róžk" , 1 },
{ "mały róžk" , 2 },
{ "nalětnik" , 3 },
{ "jutrownik" , 4 },
{ "róžownik" , 5 },
{ "smažnik" , 6 },
{ "pražnik" , 7 },
{ "žnjenc" , 8 },
{ "požnjenc" , 9 },
{ "winowc" , 10 },
{ "nazymnik" , 11 },
{ "hodownik" , 12 },
// Urdu | اردو (urdū)
{ "جنوری" , 1 },
{ "فروری" , 2 },
{ "مارچ" , 3 },
{ "اپریل" , 4 },
{ "مئ" , 5 },
{ "جون" , 6 },
{ "جولائی" , 7 },
{ "اگست" , 8 },
{ "ستمبر" , 9 },
{ "اکتوبر" , 10 },
{ "نومبر" , 11 },
{ "دسمبر" , 12 },
{ "janvarī" , 1 },
{ "farvarī" , 2 },
{ "mārč" , 3 },
{ "aprīl" , 4 },
{ "maʾi" , 5 },
{ "jūn" , 6 },
{ "jūlāʾī" , 7 },
{ "agast" , 8 },
{ "sitambar" , 9 },
{ "aktūbar" , 10 },
{ "navambar" , 11 },
{ "disambar" , 12 },
// Uyghur | ئۇيغۇرچە / uyghurche
{ "يانۋار" , 1 },
{ "فېۋرال" , 2 },
{ "مارت" , 3 },
{ "ئاپرېل" , 4 },
{ "ماي" , 5 },
{ "ئىيۇن" , 6 },
{ "ئىيۇل" , 7 },
{ "ئاۋغۇست" , 8 },
{ "سېنتەبىر" , 9 },
{ "ئۆكتەبىر" , 10 },
{ "نويابىر" , 11 },
{ "دېكابىر" , 12 },
{ "yanwar" , 1 },
{ "féwral" , 2 },
{ "mart" , 3 },
{ "aprél" , 4 },
{ "may" , 5 },
{ "iyun" , 6 },
{ "iyul" , 7 },
{ "awghust" , 8 },
{ "séntebir" , 9 },
{ "öktebir" , 10 },
{ "noyabir" , 11 },
{ "dékabir" , 12 },
// Uzbek | oʻzbek / ўзбек
{ "yanvar" , 1 },
{ "fevral" , 2 },
{ "mart" , 3 },
{ "aprel" , 4 },
{ "may" , 5 },
{ "iyun" , 6 },
{ "iyul" , 7 },
{ "avgust" , 8 },
{ "sentyabr" , 9 },
{ "oktyabr" , 10 },
{ "noyabr" , 11 },
{ "dekabr" , 12 },
{ "январь" , 1 },
{ "февраль" , 2 },
{ "март" , 3 },
{ "апрель" , 4 },
{ "май" , 5 },
{ "июнь" , 6 },
{ "июль" , 7 },
{ "август" , 8 },
{ "сентябрь" , 9 },
{ "октябрь" , 10 },
{ "ноябрь" , 11 },
{ "декабрь" , 12 },
// Venda | tshiVenḓa
{ "phando" , 1 },
{ "luhuhi" , 2 },
{ "Ṱhafamuhwe" , 3 },
{ "lambamai" , 4 },
{ "shundunthule" , 5 },
{ "fulwi" , 6 },
{ "fulwana" , 7 },
{ "Ṱhangule" , 8 },
{ "khubvumedzi" , 9 },
{ "tshimedzi" , 10 },
{ "Ḽara" , 11 },
{ "nyendavhusiku" , 12 },
// Venedic | Wenedyk
{ "jąwarz" , 1 },
{ "fiewrarz" , 2 },
{ "marć" , 3 },
{ "oprzyl" , 4 },
{ "maj" , 5 },
{ "juń" , 6 },
{ "jul" , 7 },
{ "ugust" , 8 },
{ "sieciębierz" , 9 },
{ "ocębierz" , 10 },
{ "nowiębierz" , 11 },
{ "dzieczębierz" , 12 },
// Vietnamese | tiếng Việt
{ "tháng một" , 1 },
{ "tháng hai" , 2 },
{ "tháng ba" , 3 },
{ "tháng tư" , 4 },
{ "tháng năm" , 5 },
{ "tháng sáu" , 6 },
{ "tháng bảy" , 7 },
{ "tháng tám" , 8 },
{ "tháng chín" , 9 },
{ "tháng mười" , 10 },
{ "tháng mười một", 11 },
{ "tháng mười hai", 12 },
// Volapük 1
{ "balul" , 1 },
{ "telul" , 2 },
{ "kilul" , 3 },
{ "folul" , 4 },
{ "lulul" , 5 },
{ "mälul" , 6 },
{ "velul" , 7 },
{ "jölul" , 8 },
{ "zülul" , 9 },
{ "balsul" , 10 },
{ "babul" , 11 },
{ "balsebalul" , 11 },
{ "batul" , 12 },
{ "balsetelul" , 12 },
// Voro 1 | võro
{ "vahtsõaastakuu" , 1 },
{ "radokuu" , 2 },
{ "urbõkuu" , 3 },
{ "mahlakuu" , 4 },
{ "lehekuu" , 5 },
{ "piimäkuu" , 6 },
{ "hainakuu" , 7 },
{ "põimukuu" , 8 },
{ "süküskuu" , 9 },
{ "rehekuu" , 10 },
{ "märtekuu" , 11 },
{ "joulukuu" , 12 },
// Walloon | walon
{ "djanvî" , 1 },
{ "fevrî" , 2 },
{ "måss" , 3 },
{ "avri" , 4 },
{ "may" , 5 },
{ "djun" , 6 },
{ "djulete" , 7 },
{ "awousse" , 8 },
{ "setimbe" , 9 },
{ "octôbe" , 10 },
{ "nôvimbe" , 11 },
{ "decimbe" , 12 },
// Welsh | Cymraeg
{ "ionawr" , 1 },
{ "chwefror" , 2 },
{ "mawrth" , 3 },
{ "ebrill" , 4 },
{ "mai" , 5 },
{ "mehefin" , 6 },
{ "gorffennaf" , 7 },
{ "awst" , 8 },
{ "medi" , 9 },
{ "hydref" , 10 },
{ "tachwedd" , 11 },
{ "rhagfyr" , 12 },
// Wolof | wolof
{ "samfiyee" , 1 },
{ "feebarye" , 2 },
{ "mars" , 3 },
{ "awril" , 4 },
{ "meey" , 5 },
{ "suwee" , 6 },
{ "yuuliyoo" , 7 },
{ "waxset" , 8 },
{ "ut" , 8 },
{ "sàttumbar" , 9 },
{ "oktoobar" , 10 },
{ "nofàmbar" , 11 },
{ "desàmbar" , 12 },
// Xhosa 1 | isiXhosa
{ "ujanuwari" , 1 },
{ "ufebhruwari" , 2 },
{ "ufebruwari" , 2 },
{ "umatshi" , 3 },
{ "uepreli" , 4 },
{ "uaprili" , 4 },
{ "umeyi" , 5 },
{ "ujuni" , 6 },
{ "ujulayi" , 7 },
{ "uagasti" , 8 },
{ "useptemba" , 9 },
{ "uoktobha" , 10 },
{ "unovemba" , 11 },
{ "udisemba" , 12 },
// Yiddish | ייִדיש (yidiš)
{ "יאַנואַר" , 1 },
{ "פֿעברואַר", 2 },
{ "מאַרץ" , 3 },
{ "אַפּריל" , 4 },
{ "מײַ" , 5 },
{ "יוני" , 6 },
{ "יולי" , 7 },
{ "אױגוסט" , 8 },
{ "סעפּטעמבער", 9 },
{ "אָקטאָבער", 10 },
{ "נאָװעמבער", 11 },
{ "דעצעמבער" , 12 },
{ "yanuar" , 1 },
{ "februar" , 2 },
{ "marts" , 3 },
{ "april" , 4 },
{ "may" , 5 },
{ "yuni" , 6 },
{ "yuli" , 7 },
{ "oygust" , 8 },
{ "september" , 9 },
{ "oktober" , 10 },
{ "november" , 11 },
{ "detsember" , 12 },
// Zazaki | zazaki
{ "çele" , 1 },
{ "gucige" , 2 },
{ "adare" , 3 },
{ "nisane" , 4 },
{ "gulane" , 5 },
{ "hezirane" , 6 },
{ "temmuze" , 7 },
{ "tebaxe" , 8 },
{ "keşkelun" , 9 },
{ "tişrino verên" , 10 },
{ "tişrino peyên" , 11 },
{ "gağande" , 12 },
// Zulu 1 | isiZulu
{ "ujanuwari" , 1 },
{ "ufebruwari" , 2 },
{ "umashi" , 3 },
{ "uephuleli" , 4 },
{ "uapreli" , 4 },
{ "uaphrili" , 4 },
{ "umeyi" , 5 },
{ "ujuni" , 6 },
{ "ujulayi" , 7 },
{ "uagasti" , 8 },
{ "usebutemba" , 9 },
{ "uokthoba" , 10 },
{ "uoktoba" , 10 },
{ "unovemba" , 11 },
{ "udisemba" , 12 },
{ "\0" , 0 }};
*/
// hash table of months
static HashTableX s_mt;
static char s_mbuf [ 6000 ];
// returns -1 if not a valid month
char getMonth ( int64_t wid ) {
// only init the table once
static bool s_init12 = false;
// set up the month name hashtable
if ( ! s_init12 ) {
// set the keysize to 8 and month size to 1 byte
if ( ! s_mt.set( 8,1,300,s_mbuf,6000,false,0,"months"))
return false;
// load month names and their values into hashtable from above
for ( int32_t i = 0 ; *months[i].month ; i++ ) {
char *m = months[i].month;
int32_t mlen = gbstrlen(m);
uint64_t h = hash64Lower_utf8(m,mlen);
// add should always be success since we are pre-alloc
if ( ! s_mt.addKey(&h,&months[i].value)){
char*xx=NULL;*xx=0;}
}
// do not repeat this
s_init12 = true;
}
char *month = (char *)s_mt.getValue64 ( wid );
// bail if no match
if ( ! month ) return -1;
// otherwise, return it
return *month;
}
#define MAX_INTERVALS 30000
// . called by Events.cpp to store all the intervals for a particular date
// . TODO: we basically do this once for hashing and once for the call to
// Events::getEventsData(), so try to fix that
// . constrain intervals to [year0,year1) year range
bool Dates::getIntervals2 ( Date *dp ,
SafeBuf *sb,
int32_t year0 ,
int32_t year1 ,
Date **closeDates ,
int32_t numCloseDates ,
char timeZone ,
char useDST ,
Words *words ) {
// sanity
if ( timeZone < -13 || timeZone > 13 ) { char *xx=NULL;*xx=0; }
if ( useDST != 0 && useDST != 1 ) { char *xx=NULL;*xx=0; }
// set it i guess
if ( ! m_words ) m_words = words;
m_year0 = year0;
m_year1 = year1;
// if we had an assumed year, do the restriction now
//if ( dp->m_flags & DF_ASSUMED_YEAR ) {
// // sanity check
// if ( dp->m_year <= 0 ) { char *xx=NULL;*xx=0; }
// m_year0 = dp->m_year;
// m_year1 = dp->m_year+1;
//}
if ( dp->m_flags & DF_ASSUMED_YEAR ) {
// sanity check
//if ( dp->m_year <= 0 ) { char *xx=NULL;*xx=0; }
// dates are too old if this is true, return empty
//if ( dp->m_year + 1 < m_year0 ) return true;
}
// use the telescoped date if we got that, it has more info
//if ( dp->m_telescope ) dp = dp->m_telescope;
//if ( dp->m_telescope ) { char *xx=NULL;*xx=0; }
// fill in the final set of intervals for this date
Interval finalInt [ MAX_INTERVALS + 1 ];
// . add in the intervals for this date into m_int1
// . each Interval is a range of time_t's like [a,b), closed on
// the left and open on the right.
// . may dates in m_datePtrs are ranges, and this takes care of it
int32_t ni = addIntervals ( dp , 0 , finalInt , 0 , dp );
// this would have set this to -1 and g_errno on error
if ( ni == -1 ) return false;
// the return ptr
Interval *retInt = finalInt;
int32_t retni = ni;
// store result here
Interval int3 [ MAX_INTERVALS + 1 ];
// init ptrs
Interval *arg1 = finalInt;
Interval *arg3 = int3;
// this is int1
int32_t ni1 = ni;
int32_t ni3 = 0;
// . now intersect with our 365 day assumed range
// . m_minPubDate is from SpiderRequest::m_parentPrevSpiderTime
// which we use to estimate our pub date if this page's outlink was
// added to its parent since the last time the parent was spidered
// . NOTE: i changed 365 to 90 days since much more than 90 days and
// people usually put a year in the date
if ( (dp->m_flags & DF_ASSUMED_YEAR) ) { //dp->m_minPubDate > 0 ) {
// wtf?
if ( dp->m_minStartFocus == 0 ) { char *xx=NULL;*xx=0; }
// a simple interval
Interval simple[1];
//simple[0].m_a = dp->m_minPubDate;
simple[0].m_a = dp->m_minStartFocus;
// do not change this 90*24*3600 without also changing it
// in the line above!
//simple[0].m_b = dp->m_minPubDate + DAYLIMIT*24*3600;
simple[0].m_b = dp->m_maxStartFocus;
// int1 INTERSECT simple and stored into int3.
ni3 = intersect3 ( arg1,simple,arg3,ni1,1,0 , false,false);
// error?
if ( ni3 == -1 ) return false;
// store result in case we return
retInt = arg3;
retni = ni3;
// swap for next iteration, if we do it
Interval *tmp = arg1;
arg1 = arg3;
arg3 = tmp;
ni1 = ni3;
}
// subtract the close dates
for ( int32_t i = 0 ; i < numCloseDates ; i++ ) {
// int16_tcut
Date *cd = closeDates[i];
// sanity check
if ( ! ( cd->m_flags & DF_CLOSE_DATE )){char *xx=NULL;*xx=0; }
// fill this up
Interval int2 [ MAX_INTERVALS + 1 ];
// subtract them!
int32_t ni2 = addIntervals ( cd,0,int2,0,cd);
// int1 - int2 and stored into int3. subtract = true
ni3 = intersect3 ( arg1,int2,arg3,ni1,ni2,0 , true,false);
// error?
if ( ni3 == -1 ) return false;
// just for debugging unm.edu!!
//if ( ni3 == ni1 ) { char *xx=NULL;*xx=0; }
// store result in case we return
retInt = arg3;
retni = ni3;
// swap for next iteration, if we do it
Interval *tmp = arg1;
arg1 = arg3;
arg3 = tmp;
ni1 = ni3;
}
// if date has a tod but no tod range, then set Interval::m_b to -1
// for each Interval instead of default it to midnight. this way
// Events.cpp knows not to set the EV_STORE_HOURS bit if it has no
// ending time. and to set the EV_STORE_HOURS bit if it does have
// an ending tod.
if ( (dp->m_hasType & DT_TOD) &&
!(dp->m_hasType & DT_RANGE_TOD) &&
// fix "October 25, 2011 4:00pm End: October 25, 2011 8:00 pm"
// for http://www.seattle24x7.com/calendar/calendar.htm
!(dp->m_hasType & DT_RANGE_TIMEPOINT) ) {
// loop over every interval
for ( int32_t i = 0 ; i < retni ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// set to -1
retInt[i].m_b = retInt[i].m_a; // -1;
}
}
// if we do not have store hours or substore hours set then
// make the end point equal the start point so it is treat as
// an event that starts at that time, and is pointless to showup
// in the middle of it.
// NO! this causes us to lose some events for rateclubs.com that
// has some event from x to y and also has it with before 9:30
// so that the endpoint is different. but now that the endpoint
// was nuked from this code here, it got SPECIAL_DUP'ed out! and
// really even though the rateclubs.com thing was not labelled
// by us as store hours, it really was, it was substore hours, and
// you could show up at any time...
/*
dateflags_t mask = DF_STORE_HOURS | DF_SUBSTORE_HOURS;
if ( !(dp->m_flags & mask) ) {
// loop over every interval and nuke the end time
for ( int32_t i = 0 ; i < retni ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// set to -1
retInt[i].m_b = retInt[i].m_a; // -1;
}
}
*/
//
// now convert the interval times from local times into UTC
//
int32_t i = 0;
int32_t j = 0;
// timeZone is in hours, usually -5,-6,-7,-8 (EST/CST/MST/PST)
int32_t tzoff = timeZone * 3600;
// . now convert from local time into utc... and handle dst
// . set then dst intervals for each year
for ( int32_t y = m_year0 ; y <= m_year1 ; y++ ) {
// not if facebook though! they are already in utc
//if ( isFacebook ) break;
// set daylight start for UTC
int32_t daylightStart ;
int32_t daylightEnd ;
// . this function is in Address.cpp above getIsDst()
// . daylightStart is time_t in UTC when daylight savings time
// starts for this year.
getDSTInterval ( y , &daylightStart, &daylightEnd );
// get year range
int32_t ystart = getYearMonthStart(y ,1);
int32_t yend = getYearMonthStart(y+1,1);
// breathe
QUICKPOLL(m_niceness);
// now scan the intervals that fall into this year
for ( ; i < retni ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// stop if beyond year and advance year
if ( retInt[i].m_a >= yend ) break;
// . sanity check
// . this hurts "10am to 6pm - 7 days a week" for
// www.corsicanastorage.com
// . retInt[i].m_b is >= ystart but the 10am part
// is from the previous day. so it spans the years
// so i added retInt[i].m_b <= ystart here
if ( retInt[i].m_a < ystart &&
retInt[i].m_b <= ystart ) { char *xx=NULL;*xx=0;}
// convert to UTC
retInt[i].m_a -= tzoff;
// convert the a point (in UTC!)
if ( useDST &&
retInt[i].m_a >= daylightStart &&
retInt[i].m_a < daylightEnd )
// remove DST's additional hour
retInt[i].m_a -= 3600;
}
// same for b
for ( ; j < retni ; j++ ) {
// breathe
QUICKPOLL(m_niceness);
// stop if beyond year and advance year
if ( retInt[j].m_b >= yend ) break;
// convert to UTC
retInt[j].m_b -= tzoff;
// convert the a point (in UTC!)
if ( useDST &&
retInt[j].m_b >= daylightStart &&
retInt[j].m_b < daylightEnd )
// remove DST's additional hour
retInt[j].m_b -= 3600;
}
}
// copy into there
return sb->safeMemcpy ( (char *)retInt , retni * sizeof(Interval) ) ;
}
#define MIN_UNBOUNDED 1
#define MAX_UNBOUNDED 2
//#define _DLOG_ 1
// . returns -1 and sets g_errno on error
// . otherwise returns # of intervals stored into "retInt" array
// . every date generates time intervals whose endpoints are in seconds since
// the epoch (jan 1, 1970)
// . we restrict to the spidered year and the following year to save resources
// so if someone said "every wednesday" we'd only add up to two years worth
// of "wednesday intervals" to the tree
// . adds intervals to m_tree
// . TODO: SUPPORT: "nov 1 7pm - nov 3 8pm" will have an end tod > 1 day
// . TODO: SUPPORT: "dev 11,12, 15 jan 4" (list of two MONTH|DAYNUMs where
// one has a list of daynums...
int32_t Dates::addIntervals ( Date *di ,
char hflag ,
// fill up this buffer with the intervals
Interval *retInt ,
int32_t depth ,
Date *orig ) {
int32_t ni = addIntervalsB ( di,hflag,retInt,depth,orig);
// . now if we had the word "non" before the date then we must
// complement the intervals.
// . fixes "non-holiday mondays" for collectorsguide.com
if ( ! (di->m_suppFlags & SF_NON) ) return ni;
// return if none
if ( ni == 0 ) return ni;
// copy into this buffer
Interval buf[MAX_INTERVALS];
if ( ni > MAX_INTERVALS ) ni = MAX_INTERVALS;
gbmemcpy(buf,retInt,ni*sizeof(Interval));
// store here
Interval *dst = retInt;
int32_t j = 0;
// complement them
for ( int32_t i = 0 ; i < ni ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// finish last one
if ( i == 0 ) {
dst[j].m_a = 0;
dst[j].m_b = buf[i].m_a;
j++;
}
dst[j].m_a = buf[i].m_b;
if ( i + 1 < ni ) dst[j].m_b = buf[i+1].m_a;
else dst[j].m_b = 0x7fffffff; // MAX_DATE;
j++;
}
return j;
}
//int32_t nd = s_numDaysInMonth[m-1];
char s_numDaysInMonth [] = {31,28,31,30,31,30,31, 31,30,31,30,31};
// month = 0 to 11. 0=jan 1=feb ...
int32_t getNumDaysInMonth ( int32_t month , int32_t year ) {
// sanity. month is 0 to 11, not 1 to 12
if ( month >= 12 ) { char *xx=NULL;*xx=0; }
// sanity more. year is like 1900+
if ( year < 100 ) { char *xx=NULL;*xx=0; }
// get days in month
int32_t nd = s_numDaysInMonth[month];
// are we a leap year?
bool isLeapYear = ( (year % 4) == 0 );
// but every century we skip a leap year
// -- unless divisble by 400... (wikipedia)
if ( (year % 100) == 0 && (year % 400) != 0 )
isLeapYear = false;
// feb and a leap year?
if ( month == 1 && isLeapYear ) nd++;
return nd;
}
int32_t Dates::addIntervalsB ( Date *di ,
char hflag ,
// fill up this buffer with the intervals
Interval *retInt ,
int32_t depth ,
Date *orig ) {
// int16_tcut
char *u = ""; if ( m_url ) u = m_url->getUrl();
// each simple date type uses this to store its intervals before
// intersecting
Interval tmp1[MAX_INTERVALS];
Interval *int1 = tmp1;
int32_t ni1 = 0;
// and we intersect those with m_int2, the accumulator
Interval tmp2[MAX_INTERVALS];
Interval *int2 = tmp2;
int32_t ni2 = 0;
// . and store intersection into m_int3
// . and right after swap m_int2 with m_int3
Interval *int3 = retInt;
int32_t ni3 = 0;
// . quick skip if totally wrong year
// . fixes obits.abqjournal.com some
if ( di->m_year >= 0 && di->m_year < m_year0 ) return 0;
if ( di->m_year >= 0 && di->m_year > m_year1 ) return 0;
// this range algo only works on simple date types for now
datetype_t simpleFlags = DT_DOW|DT_DAYNUM|DT_MONTH|DT_YEAR;
#ifdef _DLOG_
// debug log
char *ps = "unknown";
if ( di->m_type == DT_TOD ) ps = "tod";
if ( di->m_type == DT_DAYNUM ) ps = "daynum";
if ( di->m_type == DT_MONTH ) ps = "month";
if ( di->m_type == DT_YEAR ) ps = "year";
if ( di->m_type == DT_DOW ) ps = "dow";
if ( di->m_type == DT_HOLIDAY ) ps = "holiday";
if ( di->m_type == DT_SUBDAY ) ps = "subday";
if ( di->m_type == DT_SUBWEEK ) ps = "subweek";
if ( di->m_type == DT_SUBMONTH ) ps = "submonth";
if ( di->m_type == DT_EVERY_DAY ) ps = "everyday";
if ( di->m_type == DT_SEASON ) ps = "season";
if ( di->m_type == DT_ALL_HOLIDAYS ) ps = "allholidays";
if ( di->m_type == DT_TIMESTAMP ) ps = "timestamp";
if ( di->m_type == DT_RANGE ) ps = "range";
if ( di->m_type == DT_LIST_DAYNUM ) ps = "listdaynum";
if ( di->m_type == DT_LIST_MONTH ) ps = "listmonth";
if ( di->m_type == DT_LIST_MONTHDAY ) ps = "listmonthday";
if ( di->m_type == DT_LIST_DOW ) ps = "listdow";
if ( di->m_type == DT_LIST_TOD ) ps = "listtod";
if ( di->m_type == DT_LIST_OTHER ) ps = "listother";
if ( di->m_type == DT_COMPOUND ) ps = "compound";
if ( di->m_type == DT_TELESCOPE ) ps = "telescope";
if ( di->m_type == DT_RANGE_TOD ) ps = "rangetod";
if ( di->m_type == DT_RANGE_DOW ) ps = "rangedow";
if ( di->m_type == DT_RANGE_YEAR ) ps = "rangeyear";
// a depth indicator
char ds[40];
for ( int32_t k = 0 ; k < depth ; k++ ) ds[k]='-';
ds[depth]='\0';
logf(LOG_DEBUG,"dates: %s adding intervals for date type %s num=%"INT32"",
ds,ps,di->m_num);
#endif
// . do range intersections ourselves since they are tricky
// . "july 2nd 8pm - dec 3rd 3pm" (complex range)
// . "8pm - 9pm" (simple range)
// . "2008 - 2010" (simple range)
if ( di->m_type & DT_RANGE_ANY ) {
// must have just two ptrs to make a range
if ( di->m_numPtrs != 2 ) { char *xx=NULL;*xx=0; }
// add in the associated intervals for this complex date
// into either int1 or int2, if int1 is occupied
ni1 = addIntervals ( di->m_ptrs[0], 0, int1 , depth+1, orig);
ni2 = addIntervals ( di->m_ptrs[1], 0, int2 , depth+1, orig);
// point to the two sets of intervals
Interval *tmp1 = int1;
Interval *tmp2 = int2;
// . fix "Tuesday-Sunday"
// . swap the two sets if range is backwards
// . no! that would be the complement of what we want!
if ( di->m_ptrs[0]->m_num > di->m_ptrs[1]->m_num ) {
//tmp1 = int2;
//tmp2 = int1;
// add this to the right end point
//addOff = 7*3600*24;
}
// fix oct 15 - march 15
if ( di->m_ptrs[0]->m_month > di->m_ptrs[1]->m_month &&
di->m_ptrs[0]->m_year == -1 &&
di->m_ptrs[1]->m_year == -1 ) {
// add this to the right end point
//addOff = yoff1;
}
// correction? for boundary conditions
int32_t corr = 0;
// if endpoint of first time interval of tmp2 array
// is less than edpoint of first time interval of
// tmp1 array, then skip the first time interval
// of the tmp2 array. tmp2 might be like "friday"
// and tmp1 might be "monday" for a date range like
// "monday - friday" and it turns out that for m_year0
// friday is on jan 1, so we want to ignore tmp2[0]'s
// interval which is the time range for that friday
// in seconds since the epoch. this is kinda an
// "of by one offset" error.
//
// this also fixes "oct 15 - march 15" because it
// will skip the first oct 15, year0 interval which
// is an interval of 86400 seconds.
if ( ni2>0 && ni1>0 && tmp2[0].m_b0 && ni1>0 &&
// int2 might have only had one interval which
// got "removed" because it was before int1!
// so make sure its valid now...
// . seems like this is triggered by
// "last friday of each month from 11pm to saturday 6am"
// by http://hrweb.brevard.k12.fl.us/
// basically many end times to one start time
corr < ni2 &&
tmp2[corr].m_b < tmp1[0].m_b ) {
// so because of hrweb.brevard.k12.fl.us just
// nuke this whole thing rather than core
return 0;
// ignore the bogus end time then
char *xx=NULL;*xx=0; }
// set this
bool simple = (di->m_ptrs[0]->m_type & simpleFlags);
// if no TOD use the simple algo
if ( ! ( di->m_hasType & DT_TOD ) ) simple = true;
// reset
ni3 = 0;
// get year constraints
int32_t yoff1 = getYearMonthStart ( m_year0 , 1 );
int32_t yoff2 = getYearMonthStart ( m_year1 , 1 );
// if end point intervals are first, then correct that
if ( corr == 1 ) {
int3[ni3].m_a = yoff1;
if ( simple ) int3[ni3].m_b = tmp2[0].m_b;
else int3[ni3].m_b = tmp2[0].m_a;
ni3++;
}
// PROBLEM: ni1 is zero because it is before m_year0 but
// ni2 is > 0.
// fixes "2010-11" when m_year0 is 2011 and m_year1 is 2013 for
// http://www.zvents.com/san-jose-ca/events/show/159288785-in-
// the-mood-a-1940s-musical-revue
if ( ni1 == 0 &&
ni2 > 0 &&
di->m_ptrs[0]->m_type == DT_YEAR &&
di->m_ptrs[0]->m_year > 1 &&
di->m_ptrs[0]->m_year < m_year0 ) {
for ( int32_t k = 0 ; k < ni2 ; k++ ) {
int3[ni3].m_a = yoff1;
int3[ni3].m_b = int2[k].m_b;
ni3++;
}
}
// loop over intervals
for ( int32_t k = 0 ; k < ni1 ; k++ ) {
int3[ni3].m_a = tmp1[k].m_a;
// if ran out of intervals in tmp2,use year end
if ( k+corr >= ni2 ) {
int3[ni3].m_b = yoff2;
continue;
}
// if a date is a DT_TOD (timeofday) then its
// interval is like [9pm,midnight] in seconds
// since the epoch, and we want to use its
// START point, "9pm" as the endpoint for the
// range and not "midnight". Otherwise, we
// use the actual END point of the range.
if ( simple) int3[ni3].m_b = tmp2[k+corr].m_b;
else int3[ni3].m_b = tmp2[k+corr].m_a;
// sanity check
if ( int3[ni3].m_b < int3[ni3].m_a) {
// this happened for "4pm - 12pm" for
// http://www.newmexico.org/calendar/events/index.php?com=detail&eID=22948&year=2011&month=01
// so let's just give up on such things
log("dates: bad date intersection for %s",u);
return 0;
//char*xx=NULL;*xx=0; }
}
ni3++;
}
// wrap it up
return ni3;
}
int32_t dcount = 0;
// scan ptrs if we are a complex date type
for ( int32_t x = 0 ; x < di->m_numPtrs ; x++ ) {
// breathe
QUICKPOLL ( m_niceness );
// get done
Date *dx = di->m_ptrs[x];
// if dx is a season but di already has a month range
// then ignore it! fixes "Summer Hours: March 15 - Oct. 15"
// for unm.edu so that is not hurt
if ( dx->m_type == DT_SEASON &&
( di->m_hasType & DT_RANGE_DAYNUM ||
di->m_hasType & DT_MONTH ||
di->m_hasType & DT_DAYNUM ) )
continue;
// if we are a range, we must join the points we just
// added for dx, with the next ptr
/*
char hflag2 = 0;
if ( di->m_type == DT_RANGE &&
((di->m_ptrs[0]->m_type) & simple) &&
((di->m_ptrs[1]->m_type) & simple) ) {
// sanity check
if ( di->m_numPtrs != 2 ) { char *xx=NULL;*xx=0; }
if ( x == 0 ) hflag2 = MAX_UNBOUNDED;
else hflag2 = MIN_UNBOUNDED;
}
*/
// . if int1 is occupied, then put these intervals into "int2"
// . TODO: union the list intervals together... set
// "add" to true (like "subtract")
bool swap = ( dcount > 0 ); // && di->m_type != DT_LIST );
dcount++;
// sanity check -- these must be empty at this point
if ( ni2 || ni3 ) { char *xx=NULL; *xx=0; }
// add in the associated intervals for this complex date
// into either int1 or int2, if int1 is occupied
if ( ! swap ) ni1 = addIntervals ( dx, 0, int1 , depth+1,orig);
else ni2 = addIntervals ( dx, 0, int2 , depth+1,orig);
// return -1 on error with g_errno set
if ( ni1 == -1 ) return -1;
if ( ni2 == -1 ) return -1;
// . if this is a weak dow and we have a strong, ignore it
// . another part of the fix for southgatehouse.com which
// has a band called "Sunday Valley" which needs to telescope
// to a date with "Friday" in it. so we end up with a
// telescoped date with two different DOWs and this should
// resolve them.
if ( dx->m_type == DT_DOW &&
(dx->m_flags & DF_HAS_WEAK_DOW) &&
(di->m_flags & DF_HAS_STRONG_DOW) ) {
// HACK: this means intersection should be full
if ( ! swap ) {
ni1 = 1;
int1[0].m_a = (time_t)0;
int1[0].m_b = (time_t)0x7fffffff;
}
else {
ni2 = 1;
int2[0].m_a = (time_t)0;
int2[0].m_b = (time_t)0x7fffffff;
}
}
// if we do not have int2 occupied, keep chugging so we can
// get something to intersect
if ( ! swap ) continue;
// union int1 and int2 together instead of intersecting?
bool unionOp = (di->m_type & DT_LIST_ANY);
// . intersect int1 and int2 and put into int3
// . returns # of intervals stored into int3
ni3 = intersect3(int1,int2,int3,ni1,ni2,depth+1,false,unionOp);
// error? g_errno should be set
if ( ni3 == -1 ) return -1;
// "int1" is the accumulator in case we are intersecting
// more than two sets of intervals
Interval *tmp;
tmp = int1;
int1 = int3;
int3 = tmp;
ni1 = ni3;
ni3 = 0;
ni2 = 0;
// stop if intersection was empty, no need to go further
if ( ni1 <= 0 ) break;
}
// all done if we were a compound, list or range
if ( di->m_numPtrs > 0 ) {
// copy results to the requested buffer, but if
// we are already using that as the accumulator, return now
if ( int1 == retInt ) return ni1;
// ok, do the copy
gbmemcpy ( retInt , int1 , ni1 * sizeof(Interval) );
// return how many intervals are in "retInt"
return ni1;
}
// int16_tcut
int32_t num = di->m_num;
// sanity check
if ( num < 0 ) { char*xx=NULL;*xx=0; }
datetype_t dt = di->m_type;
suppflags_t sfmask =
SF_FIRST|
SF_LAST|
SF_SECOND|
SF_THIRD|
SF_FOURTH|
SF_FIFTH;
// int16_tcut
suppflags_t sflags = di->m_suppFlags;
// . deal with "first thursday of the month", "second tuesday"
// . we also use m_supp to indicate presence of "every" or a plural
// form of the DOW for our algo that sets DF_BAD_RECURRING_DOW bit
// above, so only do this if supp <= 17 now
if ( dt == DT_DOW && (sflags & sfmask) ) {
// cycle through the years
for ( int32_t y = m_year0 ; y < m_year1 ; y++ ) {
// loop over months
for ( int32_t m = 1 ; m < 13 ; m++ ) {
// get year/month start
int32_t ym1 = getYearMonthStart ( y , m );
int32_t ym2 = getYearMonthStart ( y , m+1 );
// . get dow for the first of that month
// . dow goes from 1 to 7
int32_t dow = getDOW ( ym1 + 1 );
// start it at 1
int32_t count = 1;
// reset this
int32_t lastMatch = -1;
// count out "sup occruences"
for ( int32_t d = ym1 ; d < ym2 ; d += 3600*24 , dow++ ) {
// wrap it
if ( dow > 7 ) dow = 1;
// skip if not our day
if ( dow != num ) continue;
// are we a match?
bool match = false;
// use a mask instead
if ( count == 1 && (sflags & SF_FIRST) )
match = true;
if ( count == 2 && (sflags & SF_SECOND) )
match = true;
if ( count == 3 && (sflags & SF_THIRD) )
match = true;
if ( count == 4 && (sflags & SF_FOURTH) )
match = true;
if ( count == 5 && (sflags & SF_FIFTH) )
match = true;
count++;
// "last monday of the month"?
if ( sflags & SF_LAST ) {
lastMatch = d;
continue;
}
// skip if no match
if ( ! match ) continue;
// got a match
int32_t a = d;
int32_t b = d + 3600*24;
if ( ! addInterval(a,b,retInt,&ni3,depth))
return -1;
// that was it!
//break;
}
// the month is over, if had "last monday of the month"
// we now have to add that...
if ( lastMatch > 0 ) {
int32_t a = lastMatch;
int32_t b = lastMatch + 3600*24;
if ( ! addInterval(a,b,retInt,&ni3,depth))
return -1;
}
} // end month loop
} // end year loop
}
// "wednesday" or "every tuesday"
else if ( dt == DT_DOW ) {
// get start of year in seconds since epoch (month=jan=1)
int32_t yoff1 = getYearMonthStart ( m_year0 , 1 );
int32_t yoff2 = getYearMonthStart ( m_year1 , 1 );
// . get dow (day of week) at that time (first day of year)
// . just do a mod of 24*3600
// . dow goes from 1 to 7
int32_t dow = getDOW ( yoff1 + 1 );
// get first day then
int32_t diff = num - dow;
// if before us, catch up
if ( diff < 0 ) diff += 7;
// make it into seconds
int32_t sdiff = diff * 24*3600;
// to end of 2nd year in seconds since start of 1st year
//int32_t dend = yoff + 24*3600* 366*2;
// . step through the next two years, one week a a time
// . TODO: ignore dows for "Wed nov 17 2009 - thurs dec 3 2009"
// type things, since we are bounding by weeks!
for ( int32_t d = yoff1 ; d < yoff2 ; d += 24*3600*7 ) {
// adjust min/max for ranges
int32_t a = d + sdiff;
int32_t b = a + 3600 * 24;
//if ( hflag == MIN_UNBOUNDED ) a = d;
//if ( hflag == MAX_UNBOUNDED ) b = d + 7 * 24*3600;
// . add interval [a,b) to m_tree1
// . "yearOff0" is the time_t for start of the year
// . cycle=week,
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
}
// "dec"
if ( dt == DT_MONTH ) {
// cycle through the years
for ( int32_t y = m_year0 ; y < m_year1 ; y++ ) {
// get year/month start
// . "num" is the month and goes from 1-12
int32_t a = getYearMonthStart ( y , num );
int32_t b = getYearMonthStart ( y , num + 1 );
// just paint the middle 10 days or so for
// mid-novemeber for villr.com
if ( di->m_suppFlags & SF_MID ) {
a += 10*86400;
b -= 10*86400;
}
// do special ranges like "through April"
//if ( di->m_flags & DF_ONGOING )
// a = getYearMonthStart ( y , 1 );
//if ( hflag == MIN_UNBOUNDED ) a = 0;
//if ( hflag == MAX_UNBOUNDED ) b = 0x7fffffff;
// just intersect m_tree1 directly
if ( ! addInterval (a,b,retInt,&ni3,depth) ) return -1;
}
}
// "12th"
if ( dt == DT_DAYNUM ) {
// cycle through the years
for ( int32_t y = m_year0 ; y < m_year1 ; y++ ) {
// loop over months
for ( int32_t m = 1 ; m < 13 ; m++ ) {
// get year/month start
int32_t ym = getYearMonthStart ( y , m );
// fix for "Mar 31" -- some months may not have
// that day #, like Feb might only have 28 days...
if ( num >= 29 ) {
// get days in month
int32_t nd = getNumDaysInMonth(m-1,y);
// skip if overflow
if ( num > nd ) continue;
}
// add the day to it
int32_t a = ym + (num - 1 )* 24 * 3600;
int32_t b = ym + (num )* 24 * 3600;
// TODO: fix for leap year!!
//if ( hflag==MIN_UNBOUNDED) a=ym;
//if( hflag==MAX_UNBOUNDED) b=getYearMonthStart(y,m+1);
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
}
}
// "7pm" (every day assumed)
if ( dt == DT_TOD ) {
int32_t a = num;
int32_t b = 24*3600;
// do special ranges like "through 1pm" or "before 3pm"
if ( di->m_flags & DF_ONGOING ) {
a = 0;
b = num;
// but if it was like "until 12:30 am" then
// b - a > 86400 so let's adjust "a" to prevent
// overlap! this was a problem for
// http://www.bentfestival.org/
if ( num > 86400 )
a = num - 86400;
}
// if TOD is like 1am, then it is > 24*3600, so we need to
// fix it here! the custome range intersection logic above
// will only take the left or right end point of this tod
// anyway hopefully. i.e. "simple" is false.
// actually if events starts at 1:30 am assume it lasts for
// one hour, so ad 3600 seconds to the endpoint.
// if a is <= b then we assume event lasts until midnight
// that day, i.e. b = 24*3600.
// MDW: i made this a >= b because bentfestival.org was
// starting at "12am" (also i upped 3600 to 3600*3).
// but we also have to define "night" as going to like 3am.
if ( a >=b )
b = a + 3600*3;
// loop over all days in year0 up to and including year1
int32_t ym0 = getYearMonthStart ( m_year0 , 1 );
int32_t ym1 = getYearMonthStart ( m_year1 , 1 );
// fix for "8 [[]] Monday nights" and "9:30 [[]] Monday nights"
// for salsapower.com
if ( ! (di->m_suppFlags & SF_HAD_AMPM) &&
(orig->m_suppFlags & SF_NIGHT) &&
a < 12*3600 ) {
a += 12*3600;
// we had "before 6:30" and "evenings" for a date in
// http://www.restaurantica.com/va/arlington/
// bonsai-grill/23375901/ (DF_ONGOING was set)
if ( a > b ) b += 12*3600;
}
// . fix for mrmovietimes.com
// . "10:20am, 5:10" (list of tods, only "am" is given)
if ( ! (di->m_suppFlags & SF_HAD_AMPM) &&
(di->m_suppFlags & SF_PM_BY_LIST) &&
a < 12*3600 ) {
a += 12*3600;
// see note above (DF_ONGOING was set)
if ( a > b ) b += 12*3600;
}
// sanity check
//if ( (ym0 % (24*3600) ) != 0 ) { char *xx=NULL;*xx=0;}
// loop over every day (assume leap year, 366 days)
for ( int32_t d = ym0 ; d < ym1 ; d += 24*3600 ) {
int32_t A = a+d;
int32_t B = b+d;
//if ( hflag == MIN_UNBOUNDED ) A = 0 + d;
//if ( hflag == MAX_UNBOUNDED ) B = 24*3600 + d;
// add it in with yearly offsets in seconds since epoch
// fasle->DO NOT use dayShift since we are the TOD!
if ( ! addInterval(A,B,retInt,&ni3,depth,0))return -1;
}
}
// years
if ( dt == DT_YEAR ) {
int32_t a = getYearMonthStart ( num , 1 );
int32_t b = getYearMonthStart ( num + 1 , 1 );
//if ( hflag == MIN_UNBOUNDED ) a = 0;
//if ( hflag == MAX_UNBOUNDED ) b = 0x7fffffff;
if ( ! addInterval ( a , b,retInt,&ni3,depth ) ) return -1;
}
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
// all done if not holiday
if ( ! ( dt & specialTypes ) ) return ni3;
int32_t a;
int32_t b;
// scan the years
for ( int32_t y = m_year0 ; y < m_year1 ; y++ ) {
// jan 1st, all day
if ( num == HD_NEW_YEARS_DAY || num == HD_HOLIDAYS ) {
a = getYearMonthStart ( y,1 );
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 3rd monday of january, all day
if ( num == HD_MARTIN_DAY || num == HD_HOLIDAYS ) {
a = getDOWStart ( y,1,2,3); // Monday=2, get 3rd monday
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// feb 2, all day
if ( num == HD_GROUNDHOG_DAY ) {
a = getYearMonthStart (y,2) + 24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 1st sunday of february
if ( num == HD_SUPERBOWL ) {
a = getDOWStart ( y,2,1,1);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// feb 14
if ( num == HD_VALENTINES ) {
a = getYearMonthStart (y,2) + 13*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// feb 15
if ( num == HD_PRESIDENTS || num == HD_HOLIDAYS ) {
a = getYearMonthStart (y,2) + 14*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// wednesday before palm sunday
if ( num == HD_ASH_WEDNESDAY ) {
a = getDOWStart ( y,4,1,1) - 11*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// mar 17
if ( num == HD_ST_PATRICKS ) {
a = getYearMonthStart (y,3) + 16*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// sunday before easter
if ( num == HD_PALM_SUNDAY ) {
a = getDOWStart ( y,4,1,1) - 7*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// movable, mar 29
//if ( num == HD_FIRST_PASSOVER ) {
// a = getYearMonthStart (y,3) + 28*24*3600;
// b = a + 24*3600;
//}
// april 1
if ( num == HD_APRIL_FOOLS ) {
a = getYearMonthStart (y,4) ;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// friday before easter
if ( num == HD_GOOD_FRIDAY ) {
a = getDOWStart ( y,4,1,1) - 2*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// easter, first sunday in april?
if ( num == HD_EASTER_SUNDAY || num == HD_HOLIDAYS ) {
a = getDOWStart ( y,4,1,1);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_EASTER_MONDAY ) {
a = getDOWStart ( y,4,1,1) + 24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
//if ( num == HD_LAST_PASSOVER ) {
//}
// 3rd monday of april
if ( num == HD_PATRIOTS_DAY ) {
a = getDOWStart ( y,4,2,3);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// wednesday of the last FULL week in april
if ( num == HD_EARTH_DAY || num == HD_SECRETARY_DAY ) {
// either the 3rd or 4th wednesday of april
int32_t a1 = getDOWStart (y,4,4,3);
int32_t a2 = getDOWStart (y,4,4,4);
// get start of may
int32_t a3 = getYearMonthStart(y,5);
// add thursday+friday+saturday to it
// and if still in april, it's good!
if ( a2 + 3*24*3600 < a3 ) a = a2;
// otherwise, use the 3rd wedsnesday
else a = a1;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// last friday of april (4th or 5th friday)
if ( num == HD_ARBOR_DAY ) {
// either the 4th or 5th friday
int32_t a1 = getDOWStart (y,4,6,4);
int32_t a2 = getDOWStart (y,4,6,5);
// get start of may
int32_t a3 = getYearMonthStart(y,5);
// and if still in april, it's good!
if ( a2 < a3 ) a = a2;
else a = a1;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 5th of may
if ( num == HD_CINCO_DE_MAYO ) {
a = getYearMonthStart(y,5) + 4*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 2nd sunday of may
if ( num == HD_MOTHERS_DAY ) {
a = getDOWStart ( y,5,1,2);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 49 days after easter
if ( num == HD_PENTECOST_SUN ) {
a = getDOWStart ( y,4,1,1) + 48*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// last monday of may
if ( num == HD_MEMORIAL_DAY || num == HD_HOLIDAYS ) {
// either the 4th or 5th monday
int32_t a1 = getDOWStart (y,5,2,4);
int32_t a2 = getDOWStart (y,5,2,5);
// get start of june
int32_t a3 = getYearMonthStart(y,6);
// and if still in may, it's good!
if ( a2 < a3 ) a = a2;
else a = a1;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// jun 14
if ( num == HD_FLAG_DAY ) {
a = getYearMonthStart(y,6) + 13*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 3rd sunday of june
if ( num == HD_FATHERS_DAY ) {
a = getDOWStart ( y,6,1,3);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// jul 4
if ( num == HD_INDEPENDENCE || num == HD_HOLIDAYS ) {
a = getYearMonthStart(y,7) + 3*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// first monday of sep
if ( num == HD_LABOR_DAY || num == HD_HOLIDAYS ) {
a = getDOWStart ( y,9,2,1);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// oct 9
if ( num == HD_LEIF_ERIKSON ) {
a = getYearMonthStart(y,10) + 8*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 2nd monday of october
if ( num == HD_COLUMBUS_DAY ) {
a = getDOWStart ( y,10,2,2);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// oct 30
if ( num == HD_MISCHIEF_NIGHT ) {
a = getYearMonthStart(y,10) + 29*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// oct 31
if ( num == HD_HALLOWEEN ) {
a = getYearMonthStart(y,10) + 30*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// nov 1
if ( num == HD_ALL_SAINTS ) {
a = getYearMonthStart(y,11);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// nov 11
if ( num == HD_VETERANS || num == HD_HOLIDAYS ) {
a = getYearMonthStart(y,11) + 10*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// 4th thursday of nov
if ( num == HD_THANKSGIVING || num == HD_HOLIDAYS ) {
a = getDOWStart ( y,11,5,4);
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// friday after thanksgiving
if ( num == HD_BLACK_FRIDAY ) {
a = getDOWStart ( y,11,5,4) + 24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// dec 7
if ( num == HD_PEARL_HARBOR ) {
a = getYearMonthStart(y,12) + 6*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// dec 14
if ( num == HD_ENERGY_CONS ) {
a = getYearMonthStart(y,12) + 13*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// dec 24
if ( num == HD_CHRISTMAS_EVE ) {
a = getYearMonthStart(y,12) + 23*24*3600;
b = a + 24*3600;
// usually starts at like 8pm!
a += 8 * 3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// dec 25
if ( num == HD_CHRISTMAS_DAY || num == HD_HOLIDAYS ) {
a = getYearMonthStart(y,12) + 24*24*3600;
b = a + 24*3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// dec 31
if ( num == HD_NEW_YEARS_EVE ) {
a = getYearMonthStart(y,12) + 30*24*3600;
b = a + 24*3600;
// usually starts at like 8pm!
a += 8 * 3600;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// daily
if ( num == HD_EVERY_DAY ) {
// span the whole year
a = getYearMonthStart(y,1) ;
b = getYearMonthStart(y+1,1);
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
// weekends
if ( num == HD_WEEKENDS ) {
// just use this
// sunday = 1, saturday = 7
int32_t mask = 0;
mask |= 1<<1;
mask |= 1<<7;
addIntervalsForDOW ( mask,retInt,&ni3,depth,y );
}
if ( num == HD_WEEKDAYS ) {
int32_t mask = 0;
mask |= 1<<2;
mask |= 1<<3;
mask |= 1<<4;
mask |= 1<<5;
mask |= 1<<6;
addIntervalsForDOW ( mask,retInt,&ni3,depth,y );
}
//
// northern hemisphere definitions
//
if ( num == HD_SUMMER ) {
// june july and august
a = getYearMonthStart(y,6) ;
b = getYearMonthStart(y,9) ;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_FALL ) {
a = getYearMonthStart(y,9) ;
b = getYearMonthStart(y,12) ;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_WINTER ) {
// two pieces here
a = getYearMonthStart(y,1) ;
b = getYearMonthStart(y,3) ;
if ( ! addInterval (a,b,retInt,&ni3,depth) ) return -1;
a = getYearMonthStart(y,12) ;
b = getYearMonthStart(y+1,1) ;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_SPRING ) {
a = getYearMonthStart(y,3) ;
b = getYearMonthStart(y,6) ;
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_SCHOOL_YEAR ) {
// two pieces here
a = getYearMonthStart(y,1) ;
b = getYearMonthStart(y,5) ;
// middle may it ends
b += 15 * 86400;
if ( ! addInterval (a,b,retInt,&ni3,depth) ) return -1;
// starts sep 1
a = getYearMonthStart(y,9) ;
b = getYearMonthStart(y+1,1) ; // < jan 1 year+1
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
if ( num == HD_MORNING ) {
int32_t ym0 = getYearMonthStart ( y , 1 );
int32_t ym1 = getYearMonthStart ( y + 1 , 1 );
// loop over every day (assume leap year, 366 days)
for ( int32_t d = ym0 ; d < ym1 ; d += 24*3600 ) {
// get morning
int32_t a = d;
// up until noon
int32_t b = d + 12*3600;
// in seconds since epoch
if ( !addInterval(a,b,retInt,&ni3,depth,false))
return -1;
}
}
if ( num == HD_AFTERNOON ) {
int32_t ym0 = getYearMonthStart ( y , 1 );
int32_t ym1 = getYearMonthStart ( y + 1 , 1 );
// loop over every day (assume leap year, 366 days)
for ( int32_t d = ym0 ; d < ym1 ; d += 24*3600 ) {
// from noon
int32_t a = d + 12*3600;
// up til 6pm
int32_t b = d + 18*3600;
// in seconds since epoch
if ( !addInterval(a,b,retInt,&ni3,depth,false))
return -1;
}
}
if ( num == HD_NIGHT ) {
int32_t ym0 = getYearMonthStart ( y , 1 );
int32_t ym1 = getYearMonthStart ( y + 1 , 1 );
// loop over every day (assume leap year, 366 days)
for ( int32_t d = ym0 ; d < ym1 ; d += 24*3600 ) {
// . from 6pm
// . but that messes up juvaejazz.com
// which has evening and 5pm! so make 5pm...
int32_t a = d + 17*3600;
// up til midnight -- no! 3am
// make b be 3am now so "saturday night" and
// "until 12:30am" is not empty set
// fixes www.bentfestival.org
int32_t b = d + 27*3600;
//int32_t b = d + 24*3600;
// . in seconds since epoch
// . do not do day shifting on this!
if ( !addInterval(a,b,retInt,&ni3,depth,false))
return -1;
}
}
// "last day of the month"
if ( num == HD_MONTH_LAST_DAY ) {
// loop over every day (assume leap year, 366 days)
for ( int32_t m = 1 ; m <= 12 ; m++ ) {
// get first day of following month
int32_t ym0 = getYearMonthStart ( y , m+1 );
// subtract one day to get last day of month
int32_t a = ym0 - 24*3600;
// up til midnight
int32_t b = a + 24*3600;
// in seconds since epoch
if ( ! addInterval(a,b,retInt,&ni3,depth))
return -1;
}
}
// "first day of the month"
if ( num == HD_MONTH_FIRST_DAY ) {
// loop over every day (assume leap year, 366 days)
for ( int32_t m = 1 ; m <= 12 ; m++ ) {
// get first day of following month
int32_t ym0 = getYearMonthStart ( y , m );
// that is it
int32_t a = ym0 ;
// up til midnight
int32_t b = a + 24*3600;
// in seconds since epoch
if ( ! addInterval(a,b,retInt,&ni3,depth))
return -1;
}
}
// every month
if ( num == HD_EVERY_MONTH ) {
// span the whole year
a = getYearMonthStart(y,1) ;
b = getYearMonthStart(y+1,1);
if ( ! addInterval(a,b,retInt,&ni3,depth)) return -1;
}
}
return ni3;
}
bool Dates::addIntervalsForDOW ( int32_t mask ,
Interval *retInt ,
int32_t *ni3 ,
int32_t depth ,
int32_t year ) {
// get start of year in seconds since epoch (month=jan=1)
int32_t yoff1 = getYearMonthStart ( year , 1 );
int32_t yoff2 = getYearMonthStart ( year + 1 , 1 );
// . get dow (day of week) at that time (first day of year)
// . just do a mod of 24*3600
// . dow goes from 1 to 7
int32_t dow = getDOW ( yoff1 + 1 );
// back up for first inc in the loop
dow--;
// step through the next two years, one day at a time
for ( int32_t d = yoff1 ; d < yoff2 ; d += 24*3600 ) {
// inc dow
if ( ++dow >= 8 ) dow = 1;
// skip if no match
if ( ! ( mask & (1<= 13 ) { y++; m -= 12; }
// convert to timestamp in seconds since the epoch
tm ts;
// reset
ts.tm_sec = 0;
ts.tm_min = 0;
ts.tm_hour = 0;
ts.tm_wday = 0;
ts.tm_yday = 0;
ts.tm_isdst = 0; // daylight savings time?
// set
ts.tm_mon = m - 1; // mktime() expects range of 0-11 for this
ts.tm_mday = 1;
ts.tm_year = y - 1900;
// . TODO: cache this!!!!!
// . this was returning time local to the server, so fix to UTC
time_t ttt = mktime(&ts);
// so jan 1, 2010 returns a ttt that when printed using printTime()
// prints out "jan 1 7am 2010" so subtract our server timezone
ttt -= timezone;
// and now when printed, "ttt" is jan 1, 00:00, the start of the year
// in GMT/UTC time
return ttt;
}
time_t getDOWStart ( int32_t y , int32_t m, int32_t dowArg, int32_t count ) {
// count starts at 1 (first monday of the month, etc.)
if ( count < 1 ) { char *xx=NULL;*xx=0; }
if ( count > 5 ) { char *xx=NULL;*xx=0; }
// sunday=1, saturday=7
if ( dowArg < 1 ) { char *xx=NULL;*xx=0; }
if ( dowArg > 7 ) { char *xx=NULL;*xx=0; }
// start in seconds since epoch for this year month
int32_t start = getYearMonthStart ( y , m );
// . what dow is? get delta in seconds
// . day of epoch is jan 1, 1970, which was a thursday
// so add 3 days to make start of sunday
// . epoch is a time_t of 0
int32_t delta = start - 3*24*3600;
// sanity check
if ( delta < 0 ) { char *xx=NULL;*xx=0; }
// div by seconds in day to get what day of the week it is
// for the first of this month on this year
int32_t dow = (delta / (24*3600)) % 7;
// add one since dowArg is 1-7
dow++;
// now align to our dow
while ( dow != dowArg ) {
// inc by one day
start += 24*3600;
// wrap it
if ( ++dow == 8 ) dow = 1;
}
// inc by a week for every count over 1
start += (count-1) * 7*24*3600;
// that is it
return start;
}
int32_t getDOW ( time_t t ) {
struct tm *ts = gmtime ( &t );
return ts->tm_wday + 1;
}
int32_t getYear ( time_t t ) {
struct tm *ts = gmtime ( &t );
return ts->tm_year + 1900;
}
// add the interval to m_int1/m_ni1/m_map1
bool Dates::addInterval ( int32_t a , int32_t b , Interval *int3 , int32_t *ni3 ,
int32_t depth , bool useDayShift ) {
// limit it
if ( *ni3 >= MAX_INTERVALS ) { char *xx=NULL;*xx=0; }
// fix "mondays 10pm - 2am"
if ( useDayShift ) {
a += m_shiftDay;
b += m_shiftDay;
}
// . no, they can overlap if two holidays fall on the same day
// and maybe they do not always do that!
if ( *ni3 > 0 &&
int3[(*ni3)-1].m_a == a &&
int3[(*ni3)-1].m_b == b )
return true;
// point to it
Interval *ii = &int3[*ni3];
// add it to our array of intervals
ii->m_a = a;
ii->m_b = b;
// sanity check
if ( a > b ) { char *xx=NULL;*xx=0; }
// maintain order, and can not overlap
if ( *ni3 > 0 && int3[*ni3-1].m_b > a ) { char *xx=NULL;*xx=0; }
*ni3 = *ni3 + 1;
#ifdef _DLOG_
// log it for debug
// a depth indicator
char ds[40];
for ( int32_t k = 0 ; k < depth ; k++ ) ds[k]='-';
ds[depth]='\0';
logf(LOG_DEBUG,"dates: %s [%"INT32",%"INT32")",ds,a,b);
#endif
return true;
}
// . scan through every interval in "m_int2" (the accumulator)
// . intersect it with each interval in "m_int1" and store the result of
// each individual intersection into "m_int3"
// . we use a "map" to reduce the number of interval pairs we compare
// . what if they each have overlapping tod intervals, like movie theatre.
// . hmmm... we should not be intersecting two different interval sets of
// TODs then, cuz they should be in a list or range or something.
// . TODO: add dedup table to prevent same interval from being re-added
// . returns # of intervals stored into int3
// . returns -1 and sets g_errno on error
int32_t Dates::intersect ( Interval *int1 ,
Interval *int2 ,
Interval *int3 ,
int32_t ni1 ,
int32_t ni2 ,
int32_t depth ) {
// then call the new way
int32_t ni3 = intersect3 (int1,int2,int3,ni1,ni2,depth,false,false);
/*
// now we are phasing in merge based intersection and need to
// make sure it concurs with the hash based intersection
Interval tmp3[MAX_INTERVALS];
// call the original way
int32_t tmpni3 = intersect2 (int1,int2,tmp3,ni1,ni2,depth);
// compare
if ( tmpni3 != ni3 ) { char *xx=NULL;*xx=0; }
for ( int32_t x = 0 ; x < ni3 ; x++ ) {
if ( tmp3[x].m_a != int3[x].m_a ) { char *xx=NULL;*xx=0; }
if ( tmp3[x].m_b != int3[x].m_b ) { char *xx=NULL;*xx=0; }
}
*/
// return it
return ni3;
}
int32_t Dates::intersect2 ( Interval *int1 ,
Interval *int2 ,
Interval *int3 ,
int32_t ni1 ,
int32_t ni2 ,
int32_t depth ) {
int32_t ni3 = 0;
#ifdef _DLOG_
// log it for debug
// a depth indicator
char ds[40];
for ( int32_t k = 0 ; k < depth ; k++ ) ds[k]='-';
ds[depth]='\0';
logf(LOG_DEBUG,"dates: %s INTERSECTING",ds);
#endif
char buf[8];
int64_t *key = (int64_t *)buf;
int32_t *A = (int32_t *)&buf[0];
int32_t *B = (int32_t *)&buf[4];
char dbuf[10000];
HashTableX dt;
dt.set ( 8, 0, 1000 , dbuf,10000, false,m_niceness,"dedupint");
// make a map of int1 for faster intersecting
char mapBuf1[10000];
HashTableX map1;
map1.set ( 4, 4, 1000 , mapBuf1, 10000, true,m_niceness,"intmap1");
for ( int32_t i = 0 ; i < ni1 ; i++ ) {
// int16_tcut
Interval *ii = &int1[i];
// get day range
int32_t d1 = ii->m_a / (24*3600);
int32_t d2 = (ii->m_b - 1 ) / (24*3600);
// see what intervals touch these days in "int1"
for ( int32_t d = d1 ; d <= d2 ; d++ ) {
// breathe
QUICKPOLL ( m_niceness );
// . get who touches day #d
// . map data ptr pts to our ptr
if ( ! map1.addKey ( &d , &ii ) ) return -1;
// debug log for nowe
//logf(LOG_DEBUG,"map add d=%"INT32"",d);
}
}
// scan all intervals in our accumulator
for ( int32_t i = 0 ; i < ni2 ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// get the interval endpoints
int32_t a = int2[i].m_a;
int32_t b = int2[i].m_b;
// convert into day units
int32_t d1 = a / (24*3600);
int32_t d2 = (b-1) / (24*3600);
// see what intervals in "int1" touch these days
for ( int32_t d = d1 ; d <= d2 ; d++ ) {
// get who touches day #d in the int1 intervals
int32_t slot = map1.getSlot ( &d );
// chain over those intervals in "int1"
for ( ; slot >= 0 ; slot = map1.getNextSlot(slot,&d) ) {
// breathe
QUICKPOLL ( m_niceness );
// put the interval into "ii" here
Interval *ii ;
ii = *(Interval **)map1.getValueFromSlot(slot);
// skip if us!
if ( ii == &int2[i] ) continue;
// skip if does not intersect [a,b)
if ( ii->m_b <= a ) continue;
if ( ii->m_a >= b ) continue;
// if "ii" contains [a,b), add [a,b) to "int3"
if ( ii->m_a <= a && ii->m_b >= b ) {
*A = a;
*B = b;
}
// if we contain "ii"
else if ( a <= ii->m_a && b >= ii->m_b ) {
*A = ii->m_a;
*B = ii->m_b;
}
// if we are on the left
else if ( a <= ii->m_a ) {
*A = ii->m_a;
*B = b;
}
// if ii is on the left
else if ( ii->m_a <= a ) {
*A = a;
*B = ii->m_b;
}
else { char *xx=NULL;*xx=0; }
// dedup
if ( dt.isInTable ( key ) ) continue;
// add it
if ( ! dt.addKey ( key ) ) return -1;
// add it
if ( ! addInterval(*A,*B,int3,&ni3,depth) ) return -1;
// sanity check
//char *xx=NULL;*xx=0;
}
}
}
return ni3;
}
// if int2 is "negative" (DF_CLOSE_DATE) and int1 is "positive"
// then the merge performs a subtract rather than intersection.
// the sign of int3 will always be positive, unless both int1 and int2
// are negative.
int32_t Dates::intersect3 ( Interval *int1 ,
Interval *int2 ,
Interval *int3 ,
int32_t ni1 ,
int32_t ni2 ,
int32_t depth ,
bool subtractint2 ,
bool unionOp ) {
// int16_tcut
char *u = ""; if ( m_url ) u = m_url->getUrl();
// sanity check
if ( ni1 > MAX_INTERVALS ) { char *xx=NULL;*xx=0; }
if ( ni2 > MAX_INTERVALS ) { char *xx=NULL;*xx=0; }
if ( unionOp && subtractint2 ) { char *xx=NULL;*xx=0; }
#ifdef _DLOG_
// log it for debug
// a depth indicator
char ds[40];
for ( int32_t k = 0 ; k < depth ; k++ ) ds[k]='-';
ds[depth]='\0';
logf(LOG_DEBUG,"dates: %s INTERSECTING",ds);
#endif
// use ptrs
Interval *p1 = int1;
Interval *p2 = int2;
Interval *p3 = int3;
Interval *p3max = int3 + MAX_INTERVALS;
Interval *p1end = int1 + ni1;
Interval *p2end = int2 + ni2;
Interval *lastSubInt = NULL;
goto loop;
overflow:
// sometimes we get a huge list of tods like for
// http://www.ghtransit.com/schedule.html causing ni1 to be 29240
// and ni2 is already 4386 or so we breack the int3 buf's
// MAX_INTERVALS. so put checks for that here:
logf(LOG_DEBUG,"dates: potential overflow for "
"%s . returning 0.", u );
//{ char *xx=NULL;*xx=0;}
return 0;
// the merge loop
loop:
// stop on overflow
if ( p3 + 1 > p3max ) {
goto overflow;
//return 0;
//char *xx=NULL;*xx=0;
//return p3-int3;
}
if ( p1 >= p1end ) {
//gbmemcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
//p3 += p2end - p2;
if ( unionOp ) {
if ( p3 + (p2end - p2) > p3max ) goto overflow;
gbmemcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
p3 += p2end - p2;
}
return p3-int3;
}
if ( p2 >= p2end ) {
// if we were doing a subtraction and not intersection...
// then the rest of p1 survives
if ( subtractint2 ) {
if ( p3 + (p1end - p1) > p3max ) goto overflow;
gbmemcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
p3 += p1end - p1;
}
else if ( unionOp ) {
if ( p3 + (p1end - p1) > p3max ) goto overflow;
gbmemcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
p3 += p1end - p1;
}
return p3-int3;
}
// discard p1's interval if before p2's interval
if ( p1->m_b <= p2->m_a ) {
// keep it if subtracting though
if ( subtractint2 ) {
p3->m_a = p1->m_a;
p3->m_b = p1->m_b;
p3++;
}
else if ( unionOp ) {
p3->m_a = p1->m_a;
p3->m_b = p1->m_b;
p3++;
}
// otherwise, just discard it
p1++;
goto loop;
}
// likewise, for p2
if ( p2->m_b <= p1->m_a ) {
if ( unionOp ) {
p3->m_a = p2->m_a;
p3->m_b = p2->m_b;
p3++;
}
p2++;
goto loop;
}
// p1 (---------)
// p2 (------------)
//
// p1 (---------)
// p2 (----------------)
//
// p1 (-----------------------)
// p2 (------) (----)
//
// p1 (-----------------------)
// p2 (------------)
if ( p1->m_a <= p2->m_a ) {
// use that a
p3->m_a = p2->m_a;
// if doing subtraction
if ( subtractint2 ) {
// pick p2's a as our b then
p3->m_a = p1->m_a;
p3->m_b = p2->m_a;
// if the positive interval is huge, do not forget
// our past subtracted intervals!
if ( lastSubInt && p3->m_a < lastSubInt->m_b )
p3->m_a = lastSubInt->m_b;
// ignore donuts for now
p1++;
p3++;
goto loop;
}
else if ( unionOp ) {
p3->m_a = p1->m_a;
p3->m_b = p1->m_b;
p1++;
p3++;
goto loop;
}
// p1 (---------)
// p2 (------------)
//
// p1 (---------)
// p2 (----------------)
if ( p1->m_b <= p2->m_b )
p3->m_b = p1->m_b;
// p1 (-----------------------)
// p2 (------------)
//
// p1 (-----------------------)
// p2 (------------)
else
p3->m_b = p2->m_b;
p3++;
// which do we inc? p1 or p2?
Interval *next1 = p1+1;
Interval *next2 = p2+1;
// if no next1, we must advance p2 then
if ( next1 >= p1end ) p2 = next2;
// if no next2, we must advance p1 then
else if ( next2 >= p2end ) p1 = next1;
// pick the one whose next guy's left endpoint is smallest
else if ( next1->m_a < next2->m_a ) p1 = next1;
// otherwise, default
else p2 = next2;
}
// p1 (------------)
// p2 (---------)
//
// p1 (----------------)
// p2 (---------)
//
// p1 (------------)
// p2 (-----------------------)
//
// p1 (------------)
// p2 (-----------------------)
else {
// ok now p2->m_a > p1->m_a
p3->m_a = p1->m_a;
if ( subtractint2 ) {
p3->m_a = p2->m_b;
p3->m_b = p1->m_b;
// if the positive interval is huge, do not forget
// our past subtracted intervals!
if ( lastSubInt && p3->m_a < lastSubInt->m_b )
p3->m_a = lastSubInt->m_b;
// if valid, inc p3
if ( p3->m_a < p3->m_b ) p3++;
// save this
lastSubInt = p2;
// selective inc'ing here!
if ( p2->m_b <= p1->m_b ) p2++;
// if endpoints are equal, we inc p1 too!
if ( p1->m_b <= p2->m_b ) p1++;
goto loop;
}
else if ( unionOp ) {
p3->m_a = p2->m_a;
p3->m_b = p2->m_b;
p2++;
p3++;
goto loop;
}
// and pick the min b
if ( p1->m_b <= p2->m_b ) p3->m_b = p1->m_b;
else p3->m_b = p2->m_b;
// int int3 ptr
p3++;
// which do we inc? p1 or p2?
Interval *next1 = p1+1;
Interval *next2 = p2+1;
// if no next1, we must advance p2 then
if ( next1 >= p1end ) p2 = next2;
// if no next2, we must advance p1 then
else if ( next2 >= p2end ) p1 = next1;
// pick the one whose next guy's left endpoint is smallest
else if ( next1->m_a < next2->m_a ) p1 = next1;
// otherwise, default
else p2 = next2;
}
goto loop;
}
/*
bool Dates::printNormalized1 ( SafeBuf *sb , Event *ev , int32_t niceness ) {
ev->m_date->printNormalized2 ( sb , niceness ,m_words);
if ( ev->m_numCloseDates <= 0 ) return true;
if ( ! sb->safePrintf(" closed ") ) return false;
for ( int32_t i = 0 ; i < ev->m_numCloseDates ; i++ ) {
// breathe
QUICKPOLL(niceness);
// int16_tcut
Date *cd = ev->m_closeDates[i];
// print it
if ( ! cd->printNormalized2(sb,niceness,m_words)) return false;
}
return true;
}
// just print the date itself
bool Date::printNormalized2 ( SafeBuf *sb , int32_t niceness , Words *words ) {
nodeid_t *tids = words->getTagIds();
char **wptrs = words->getWords();
int32_t *wlens = words->getWordLens ();
//if ( m_numPtrs == 0 && (m_flags & DF_CLOSE_DATE) )
//if ( (m_flags & DF_CLOSE_DATE) )
// sb->safePrintf(" closed ");
//if ( m_numPtrs > 0 && m_mostUniqueDatePtr == m_ptrs[0] )
// sb->safePrintf("");
// print out each word
for ( int32_t j = m_a ; j < m_b ; j++ ) {
// breathe
QUICKPOLL(niceness);
// skip if tag
if ( tids[j] ) continue;
// print it otherwise
sb->safeMemcpy(wptrs[j],wlens[j]);
}
//if ( m_numPtrs > 0 && m_mostUniqueDatePtr == m_ptrs[0] )
// sb->safePrintf(" ");
//if ( m_numPtrs == 0 && (m_flags & DF_CLOSE_DATE) )
//if ( (m_flags & DF_CLOSE_DATE) )
// sb->safePrintf(" ");
// telescope ptrs
for ( int32_t i = 1 ; m_type==DT_TELESCOPE && isafePrintf(", ");
//if ( (dp->m_flags & DF_CLOSE_DATE) )
// sb->safePrintf(" closed ");
//if ( dp == m_mostUniqueDatePtr )
// sb->safePrintf("");
// print out each word
for ( int32_t j = dp->m_a ; j < dp->m_b ; j++ ) {
// skip if tag
if ( tids[j] ) continue;
// print it otherwise
sb->safeMemcpy(wptrs[j],wlens[j]);
}
//if ( dp == m_mostUniqueDatePtr )
// sb->safePrintf(" ");
//if ( (dp->m_flags & DF_CLOSE_DATE) )
// sb->safePrintf("");
}
// end in assumed year
//if ( m_flags & DF_ASSUMED_YEAR ) {
// int32_t t1 = m_minPubDate;
// // use 90 days instead of 365 since usually people will
// // indicate the year if the date is so far out
// int32_t t2 = t1 + 90*24*3600;
// sb->safePrintf("");
// sb->safePrintf(" ** %s- ",ctime(&t1));
// sb->safePrintf("%s",ctime(&t2));
// sb->safePrintf(" ");
//}
return true;
}
*/
void resetDateTables ( ) {
s_mt.reset();
s_tzt.reset();
s_dvt.reset();
}
// is di a subdate of us?
bool Date::isSubDate ( Date *di ) {
// if he has some types we do not have, forget it
if ( (di->m_hasType & m_hasType) != di->m_hasType ) return false;
// check each one
if ( (di->m_hasType & DT_MONTH) && di->m_month != m_month )
return false;
if ( (di->m_hasType & DT_DAYNUM) && di->m_dayNum != m_dayNum )
return false;
if ( (di->m_hasType & DT_YEAR) && di->m_year != m_year )
return false;
// support some tod ranges, like "until 2am"
//if ( (di->m_hasType & DT_TOD) && di->m_tod != m_tod )
// return false;
if ( di->m_hasType & DT_TOD ) {
if ( di->m_minTod < m_minTod ) return false;
if ( di->m_maxTod > m_maxTod ) return false;
}
if ( ( di->m_hasType & DT_DOW ) && di->m_dow != m_dow )
return false;
return true;
}
void Dates::setEventBrotherBits ( ) {
char *dom = m_url->getDomain();
int32_t dlen = m_url->getDomainLen();
if ( m_contentType != CT_XML ) dlen = 0;
bool isFacebook = false;
bool isEventBrite = false;
bool isStubHub = false;
if ( dlen == 12 && strncmp ( dom , "facebook.com" , 12 ) == 0 )
isFacebook = true;
if ( dlen == 11 && strncmp ( dom , "stubhub.com" , 11 ) == 0 )
isStubHub = true;
if ( dlen == 14 && strncmp ( dom , "eventbrite.com" , 14 ) == 0 )
isEventBrite = true;
// are implied sections valid? they should be because we need them
// for santafeplayhouse.org whose event dates span two sentences
// and are only together in a tight implied section.
if ( ! m_sections->m_addedImpliedSections ) { char *xx=NULL;*xx=0; }
////////////////////////
//
// set SEC_HASEVENTDOMDOW bit
//
// . used for setting SEC_EVENT_BROTHER bits
//
///////////////////////
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if was incorporated into a compound date, range or list
if ( ! di ) continue;
// do not venture into telescope section
//if ( di->m_type == DT_TELESCOPE ) break;
// skip if fuzzy
//if ( di->m_flags & DF_FUZZY ) continue;
// skip if pub date
if ( di->m_flags & DF_PUB_DATE ) continue;
// skip if registration date
if ( di->m_flags & DF_REGISTRATION ) continue;
if ( di->m_flags & DF_NONEVENT_DATE ) continue;
if ( di->m_flags5 & DF5_IGNORE ) continue;
// int16_tcut
datetype_t dt = (DT_MONTH|DT_DAYNUM);
bool match = false;
// need month or dow
if ( (di->m_hasType & dt) == dt ) match = true;
if ( di->m_hasType & DT_DOW ) match = true;
if ( ! match ) continue;
// do not venture into telescope section
//if ( di->m_type == DT_TELESCOPE ) break;
// if we are a telescope, get the first date in telescope
if ( di->m_type == DT_TELESCOPE ) {
// "24 [[]] November 2009 [[]] 8pm - 2am [[]]
// Monday - Saturday" for burtstikilounge.com
di = di->m_ptrs[0];
// . only allwo telescoped daynum i guess for
// burtstikilounge.com
// . otherwise telescoped tods for folkmads.org are
// getting SEC_HASEVENTDOMDOW set!
if ( di->m_hasType != DT_DAYNUM ) continue;
}
// telescopes are not fuzzy
else {
// skip if fuzzy
if ( di->m_flags & DF_FUZZY ) continue;
}
// . telescope it all up
// . use compound section now in case date spans multiple
// sentence sections
Section *sd = di->m_compoundSection;
// telescope all the way up
for ( ; sd ; sd = sd->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// stop if already set
if ( sd->m_flags & SEC_HASEVENTDOMDOW ) break;
// mark it
sd->m_flags |= SEC_HASEVENTDOMDOW;
}
}
// sanity
if ( m_sections->m_lastSection->m_next ) { char *xx=NULL;*xx=0; }
if ( ! s_init42 ) { char *xx=NULL;*xx=0; } // h_details, h_more
//
// similar to above, we have issues with the last link like
// "(more)" is being combined into the previous sentence! so
// also get the hash of the last link in the section
//
for ( Section *si = m_sections->m_lastSection; si ; si = si->m_prev ) {
// breathe
QUICKPOLL(m_niceness);
// must be a href tag, a link!
if ( si->m_tagId != TAG_A ) continue;
// get the hash. it is not in the section so
// we have to scan
int32_t kmax = si->m_a + 20;
if ( kmax > m_nw ) kmax = m_nw;
if ( kmax > si->m_b ) kmax = si->m_b;
uint32_t tagh = 0;
bool gotOne = false;
for ( int32_t k = si->m_a ; k < kmax ; k++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not alnum
if ( ! m_wids[k] ) continue;
// must be "details" or "more"
if ( m_wids[k] == h_details ) gotOne = true;
if ( m_wids[k] == h_more ) gotOne = true;
// hash it up
tagh ^= m_wids[k];
tagh <<= 1;
if ( tagh == 0 ) tagh = 1234567;
}
// need "more" or "details" in link
if ( ! gotOne ) continue;
// if no text, forget it... what about an image???
if ( ! tagh ) continue;
// set last sent content hash
Section *sp = si;
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// stop if already set, no need to telescope up more
if ( sp->m_lastLinkContentHash32 ) break;
// set it
sp->m_lastLinkContentHash32 = tagh;
}
}
// make hashtable of generic words for determining if the event
// brothers are actually just a list of dates
static char *s_gwords [] = {
"open",
"opens",
"closed",
"closes",
"on",
"for",
"hours",
"of",
"operation",
"store",
"business",
"office"
};
// store these words into table
static HashTableX s_gt;
static char s_gtbuf[2000];
static bool s_init3 = false;
if ( ! s_init3 ) {
s_init3 = true;
s_gt.set(8,0,128,s_gtbuf,2000,false,m_niceness,"gttab");
int32_t n = (int32_t)sizeof(s_gwords)/ sizeof(char *);
for ( int32_t i = 0 ; i < n ; i++ ) {
// set words
char *s = s_gwords[i];
int64_t h = hash64n(s);
s_gt.addKey ( &h );
}
}
// int16_tcut
wbit_t *bb = m_bits->m_bits;
// scan the sections
for ( Section *si = m_sections->m_rootSection ; si ; si = si->m_next) {
// breathe
QUICKPOLL(m_niceness);
// must be in a brother list
if ( ! si->m_nextBrother ) continue;
// and first in the list
if ( si->m_prevBrother ) continue;
// only look for "hard" brothers
// . fixes trumba so "re-opens saturday at 9am" can telescope
// to the timepoint date range a few sentences above it.
// . fixes publicbroadcasting so soul power's november
// date range can be telescoped to
// . fixes blackbirdbuvette.com so store hours, kitchen hours
// are not date brothers
// . fixes collectorsguide.com so "every sunday before 1pm"
// can telescope to the store hours in the same hard section
// . ...
if ( ! m_sections->isHardSection ( si ) ) continue;
// count bullet delim as soft for this to fix
// blackbirdbuvette.com whose store hours and kitchen hours
// are in a bullet delimeted list
if ( si->m_baseHash == BH_BULLET ) continue;
// . get first one we find
// . this algo hurts sybarite5.org because one of the sections
// at the top is an event and not a header, and it has
// a monthday range that ends up being a header for all
// the event sections below which is really bad! this was
// originally a fix for publicbroadcasting soul power but
// we fixed that by not doing this for soft sections!
// . NO! actually that is a header in sybarite5.org! wow...
// . and we need this to fix adobetheater.org too for which
// we otherwise lose a range header
Section *first = NULL;
// are the brothers really part of the same set of store hours?
// date brothers?
bool eventBrothers = false;
Section *bro = si;
Section *last = NULL;
bool diffTODs = false;
bool diffDays = false;
int32_t px1;
int32_t rx1;
int32_t ex1;
int32_t tx1;
int32_t dx1;
int32_t ax1;
int32_t lh1;
// scan the brother list
for ( ; bro ; bro = bro->m_nextBrother ) {
// breathe
QUICKPOLL(m_niceness);
// . if its a registration section, disregard it
// . fixes signmeup.com which has a differing phone
// # and dates in a registration section. that
// cause all brothers to be event brothers and the
// race tod start times could not telescope to the
// brother that had the day, "Thanksgiving"
if ( bro->m_flags & SEC_HAS_REGISTRATION ) continue;
// . TODO: include subfields
// . TODO: storehours detection
// . TODO: adjacent date/price/email detection
// skip if has nothing special
int32_t px2 = bro->m_phoneXor;
int32_t rx2 = bro->m_priceXor;
int32_t ex2 = bro->m_emailXor;
int32_t ax2 = bro->m_addrXor;
// should help fix pagraphs that repeat the same date
// like denver.org single's day soiree
int32_t tx2 = bro->m_todXor;
int32_t dx2 = bro->m_dayXor;
int32_t lh2 = bro->m_lastLinkContentHash32;
// cancel date xor if not in tod and dom/dow section
//if ( ! (bro->m_flags & SEC_HAS_TOD) )
// tx2 = 0;
//if ( ! (bro->m_flags & SEC_HAS_DOM) &&
// ! (bro->m_flags & SEC_HAS_DOW) )
// tx2 = 0;
if ( ! (bro->m_flags & SEC_HASEVENTDOMDOW) )
tx2 = 0;
// skip if nothing to compare to prev bro with
if ( ! px2 && ! rx2 && ! ex2 && ! tx2 && ! dx2 &&
! lh2 )
continue;
// if no last, skip then
if ( ! last ) {
update:
last = bro;
px1 = px2;
rx1 = rx2;
ex1 = ex2;
tx1 = tx2;
dx1 = dx2;
ax1 = ax2;
lh1 = lh2;
continue;
}
// are both brothers sentences?
//bool bothSentences =
// ( (bro ->m_flags & SEC_SENTENCE) &&
// (last->m_flags & SEC_SENTENCE) );
// . get our phone # xor
// . is last bro a different phone # than us?
// . this messes up cabq.gov libraries because
// in one section they'd have the primary phone
// and then a list of secondary phone #'s in the
// section after it
// . but without it many more urls mess up in a
// worse way, but getting description not theirs
if ( px1 && px2 && px1 != px2 )
eventBrothers = true;
// . need hasprice set. can't be single sentences.
// . they often list pricing info per sentence.
// . fixes collectorsguide.com which has
// "Admission ... adults $7, children/seniors $3"
// "Combo ... adults $12; children/seniors $5"
// as two separate sentences next to each other.
// . we seem to have multiple sections in one event
// that have ticket prices... denver.org
// melodytent, collectorsguide, so take this out
// for now
//if ( rx1 && rx2 && rx1 != rx2 && ! bothSentences )
// eventBrothers = true;
// differing emails?
if ( ex1 && ex2 && ex1 != ex2 )
eventBrothers = true;
// differing TOD dates?
if ( tx1 && tx2 && tx1 != tx2 ) {
diffTODs = true;
eventBrothers = true;
}
// . differing day dates? burtstikilounge.com calendars
// . TOD is telescoped to by the day, like if its
// store hours or something...
if ( dx1 && dx2 && dx1 != dx2 ) {
diffDays = true;
eventBrothers = true;
}
// . same tod, different address?
// . removes Tingley Beach description from the
// rio grande zoo because eventbroters was not
// getting set because they all had the same hours
// and price, etc.
if ( tx1 && tx2 && ax1 && ax2 && ax1 != ax2 )
eventBrothers = true;
// . subfields... like "cost:"
// . see isCompatible2() for this code
//if ( getNumSubfieldsInCommon ( last, bro ) > 0 )
// sameSubFields = true;
// . last link content hash the same? (more) (details)
// . sometimes it is merged into last sentence!
if ( lh1 && lh2 && lh1 == lh2 )
eventBrothers = true;
// do not do the event brothers algo for specific
// xml feeds because we ignore all but one date
// for these guys. we already know the event
// delimeters. and this often will find a phone or
// email in the and another one in a
// brother section even though the xml is well defined.
// because eventbrite has an and
// section with their own independent contact info.
if ( isEventBrite ||
isFacebook ||
isStubHub )
eventBrothers = false;
// if no eventbrothers yet, keep going
if ( ! eventBrothers ) goto update;
// remember the first one. we start setting the bit
// at the first one to be recognized as an event
// because it is often the case there are headers
// above the list of event sections.
if ( ! first ) first = last;
Section *s1 = last;
Section *s2 = bro ;
// point s1/s2 to first sentence in those sections, if
// not already
if ( ! ( s1->m_flags & SEC_SENTENCE ) )
s1 = s1->m_nextSent;
if ( ! ( s2->m_flags & SEC_SENTENCE ) )
s2 = s2->m_nextSent;
// or if not in sentences... wtf? this happens when
// indexing a script page or something
// fixes core for http://www.neaq.org/Scripts/feed/feed2js.php?src=http%3A%2F%2Fwww.eventkeeper.com%2Fekfeed%2FNEAQ_aq_homefeed2.xml&num=2&tz=-2&utf=y&html=y
if ( ! s1 || ! s2 ) break;
// must be adjacent. otherwise they are event brothers.
if ( s1->m_nextSent != s2 ) break;
// if they are two pure dates, then its a list of dates
// and they are not *event* brothers. fixes stuff
// like "Mon-Fri 9-5" and "Sat 9-3" so they are not
// event brothers.
int32_t a,b;
bool pure1 = true;
bool pure2 = true;
a = s1->m_a;
b = s1->m_b;
for ( int32_t i = a ; i < b ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not wid
if ( ! m_wids[i] ) continue;
// if part of date, skip it
if ( bb[i] & D_IS_IN_DATE ) continue;
// if generic word, skip it
if ( s_gt.isInTable(&m_wids[i]) ) continue;
// skip if number, like ticket price
if ( m_words->isNum(i) ) continue;
// crap, not pure
pure1 = false;
break;
}
// stop if not pure, no point in doing more
if ( ! pure1 ) break;
// otherwise, check the purity of the next sentence
a = s2->m_a;
b = s2->m_b;
// see if 2nd brother is all pure words too
for ( int32_t i = a ; i < b ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not wid
if ( ! m_wids[i] ) continue;
// if part of date, skip it
if ( bb[i] & D_IS_IN_DATE ) continue;
// if generic word, skip it
if ( s_gt.isInTable(&m_wids[i]) ) continue;
// crap, not pure
pure2 = false;
break;
}
// stop if not pure
if ( ! pure2 ) break;
// if both pure, not event brother
eventBrothers = false;
first = NULL;
goto update;
}
// bail if not EVENT brothers
if ( ! eventBrothers ) continue;
// . scan all event brothers
// . if we have a single store hours container and only
// that has the address, assume it is the store hours
// for all event brothers
Section *shc = NULL;
for ( Section *bro = first; bro ; bro = bro->m_nextBrother ) {
// breathe
QUICKPOLL(m_niceness);
// skip if not store hours
if (!(bro->m_flags & SEC_STOREHOURSCONTAINER))continue;
// if we already have one, that's no good
if ( shc ) { shc = NULL; break; }
// set it
shc = bro;
}
// must have address for all brothers
if ( shc && shc->m_addrXor == 0 ) shc = NULL;
// now if we had a single store hours container
for ( Section *bro=first; shc && bro; bro=bro->m_nextBrother ){
// breathe
QUICKPOLL(m_niceness);
// skip if not store hours
if ( bro->m_addrXor == 0 ) continue;
// must match
if ( bro->m_addrXor == shc->m_addrXor ) continue;
// not good otherwise
shc = NULL;
break;
}
// . crap for terrence wilson there are 3 sections:
// . date1
date2s
desc+addr
// . i was thinking that if the section contains the address
// of an event, the event can use the description, so
// maybe carve out this exception in the event desc algo
// in Events.cpp
// otherwise, set bit on all brothers
for ( Section *bro = first; bro ; bro = bro->m_nextBrother ) {
// breathe
QUICKPOLL(m_niceness);
// a legit store hours section is immune since it is
// not an event really. this way the dates in there
// can telescope out to join with the dates in the
// true event brothers. should fix burtstikilounge.com.
if ( bro == shc ) continue;
// this will prevent dates in "bro" from being
// headers outside this section as well as event
// description sharing between event brother sections.
bro->m_flags |= SEC_EVENT_BROTHER;
}
}
}
// . find a dow based date (no single month daynum )
// . get date after it if dow based as well
// . if no such date after it, evaluate it by itself then
// . otherwise: get section containing both
// . evaluate all dates in that section, must all be legit schedule dates
void Dates::setStoreHours ( bool telescopesOnly ) {
// int16_tcut
//wbit_t *bb = m_bits->m_bits;
datetype_t specialTypes =
DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_SUBMONTH | // last day of month
DT_EVERY_DAY | // 7 days a week
DT_SEASON | // summers
DT_ALL_HOLIDAYS ; // "holidays"
// int16_tcut
wbit_t *bb = m_bits->m_bits;
// detect words before this date like "hours:" or "open:"
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if not telescope
if ( telescopesOnly && di->m_type != DT_TELESCOPE ) continue;
// need tod though. all event times have tods so this
// should just speed things up.
if ( ! (di->m_hasType & DT_TOD) ) continue;
// official dates are exempt (i.e. facebook start_date)
if ( di->m_flags & DF_OFFICIAL ) continue;
/*
// it can be closure not too, not just store hours...
// like for asthmaallies.org: "Holiday Office Closures:
// December 22 - 26, 2011"
if ( ! (di->m_hasType & DT_DOW) ) continue;
// 2+ dows required to fix "Fri, Nov 27, 8:00p"
int32_t numDow = getNumBitsOn8(di->m_dowBits);
if ( numDow <= 1 ) continue;
// . now month unless in a month range or monthdaynum range
// . if month, it needs like Nov x - y or Nov x - Dec y
if ( (di->m_hasType & DT_MONTH) &&
!(di->m_hasType & DT_RANGE_MONTHDAY) )
continue;
*/
// scan the words before each date element in "di"
int32_t ne;
Date **de = getDateElements(di,&ne);
// starts with a dow like "Monday-Friday.."
// no. okstate.edu starts with "Aug 22 - Dec 16 2011"
// and then lists the dow hours. and at top it has
// "Fall Semester".
// ok, then, we don't want to hit "doors open: 8pm" on
// reverbnation.com so try this one:
//if ( de[0]->m_type == DT_TOD ) continue;
// fixes "Fall Hours * Aug 24 - Nov 26 * M-Th 7:30am-...
// for tarlton.law.utexas.edu
//if ( (de[0]->m_hasType & DT_SEASON) &&
// m_wids[de[0]->m_b-1] == h_hours )
// goto gotOne;
// so scan each date element then
int32_t x; for ( x = 0 ; x < ne ; x++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
Date *dx = de[x];
// scan for words before
int32_t a = dx->m_a;
int32_t min = a - 30;
if ( min < 0 ) min = 0;
int32_t alnumCount = 0;
int64_t lastWid = 0LL;
for ( ; a >= min ; a-- ) {
// breathe
QUICKPOLL(m_niceness);
// skip tags and punct
if ( ! m_wids[a] ) continue;
// check it. often "hours" is in the date
// like it is for utexas.edu "Fall Hours"
if ( m_wids[a] == h_hours )
break;
// do not count dates in list towards
// alnumcount
if ( bb[a] & D_IS_IN_DATE ) continue;
// limit alpha count
if ( ++alnumCount >= 7 )
break;
if ( (m_wids[a] == h_is ||
m_wids[a] == h_be ||
m_wids[a] == h_are ) &&
lastWid == h_open )
break;
// if open is first word in sentence? kinda..
if ( m_wids[a] == h_open && a-2= 7 || a < min ) continue;
// ok, got one
break;
}
// if we had no luck... try next date, "di"
if ( x >= ne ) continue;
// otherwise, it was store hours...
// gotOne:
di->m_flags |= DF_STORE_HOURS;
di->m_flags |= DF_WEEKLY_SCHEDULE;
//
// set SEC_STOREHOURSCONTAINER
//
Section *sp = di->m_section;
// initial dates xor in "sd"
int32_t todXor = sp->m_todXor;
// can't be zero - we contain the store hours
if ( ! todXor ) continue;//{ char *xx=NULL;*xx=0; }
// keep setting up as int32_t as datexor remains unchanged
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// stop if a date was gained
if ( sp->m_todXor != todXor ) break;
// set otherwise
sp->m_flags |= SEC_STOREHOURSCONTAINER;
}
}
// scan all dates we got
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if not in body (i.e. from url)
if ( ! (di->m_flags & DF_FROM_BODY) ) continue;
// ignore if fuzzy
if ( di->m_flags & DF_FUZZY ) continue;
// skip if registration date, not considered store hours
if ( di->m_flags & DF_REGISTRATION ) continue;
// skip if not telescope
if ( telescopesOnly && di->m_type != DT_TELESCOPE ) continue;
// turn it off in case we are a re-call on a telescope
//di->m_flags &= ~DF_SCHEDULECAND;
// must have a dow/tod type thing
if ( ! ( di->m_hasType & (specialTypes|DT_DOW|DT_TOD) ) )
continue;
// no month day
if ( (di->m_hasType & DT_DAYNUM) )
// no more exceptions!
// exception: Nov 23 - Dec 5
//!(di->m_hasType & DT_RANGE_MONTHDAY) &&
// exception: Nov 23-27
//!(di->m_hasType & DT_RANGE_DAYNUM) &&
// exception: Nov - Dec
//di->m_month != -1 )
continue;
// or month
if ( (di->m_hasType & DT_MONTH ) )
continue;
// set this bit
di->m_flags |= DF_SCHEDULECAND;
}
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// must be set
if ( ! ( di->m_flags & DF_SCHEDULECAND ) ) continue;
// skip if not telescope
if ( telescopesOnly && di->m_type != DT_TELESCOPE ) continue;
// get date after us
Date *dj = NULL;
// scan for it
for ( int32_t j = i + 1 ; j < m_numDatePtrs ; j++ ) {
// breathe
QUICKPOLL ( m_niceness );
// get it
dj = m_datePtrs[j];
// stop if got it
if ( dj ) break;
}
// if not schedule-y null it out
if ( dj && !(dj->m_flags & DF_SCHEDULECAND) ) dj = NULL;
/*
// or if too many words in between di and dj, they are not
// part of the same store hours then
int32_t acount = 0;
int32_t tcount = 0;
for ( int32_t r = di->m_b ; dj && r < dj->m_a ; r++ ) {
// do not scan too far
if ( ++tcount >= 20 ) break;
// count alnums
if ( m_wids[r] && ++acount >= 6 ) break;
}
// stop if too far apart, do not try to pair up
if ( tcount >= 20 ) dj = NULL;
if ( acount >= 6 ) dj = NULL;
*/
// if had a possible partner get section containing both
Section *sd = di->m_section;
// blow it up
for ( ; dj && sd ; sd = sd->m_parent ) {
// breathe
QUICKPOLL ( m_niceness );
// test
if ( sd->contains2 ( dj->m_a ) ) break;
}
redo:
// all dates in section must be legit now too
int32_t fd = sd->m_firstDate - 1;
// craziness! if there are two sentences "...before. "
// "The spring..." the date can span those two sentences
// "before. The spring" and cause fd to be -1. like for
// thealcoholenthusiast.com...
if ( fd < 0 ) { char *xx=NULL;*xx=0; }
// assume good
bool good = true;
// is it an open-ended tod range like "before 1pm"?
bool openRange = true;
datetype_t acc = 0;
char dowBits = 0;
int32_t numSecondsOpen = 0;
int32_t hadNoEndTime = 0;
bool isStoreHours = false;
Date *last = NULL;
int32_t lastNumDow = 0;
int32_t kmax = m_numDatePtrs;
// di might be a telescope so we need this
if ( di->m_type == DT_TELESCOPE ) {
// make dj NULL
good = false;
// do not do loop below now
kmax = 0;
}
// scan those
for ( int32_t k = fd ; k < kmax ; k++ ) {
// breathe
QUICKPOLL ( m_niceness );
// test
Date *dk = m_datePtrs[k];
if ( ! dk ) continue;
// stop on breach
if ( dk->m_a >= sd->m_b ) break;
// skip if not sure if a date
if ( dk->m_flags & DF_FUZZY ) continue;
// skip if registration date, not considered store hrs
if ( dk->m_flags & DF_REGISTRATION ) continue;
// fix "irishtimes.com's latest news service is updated
// constantly from 6.30 a.m. until 10 p.m. daily."
// from coring because its todxor was 0!
if ( dk->m_flags & DF_PUB_DATE ) continue;
// this causes the ! todXor core dump below because
// we do not set todxor for ticket/registration dates
// even if they are indeed store hours
if ( dk->m_flags & DF_NONEVENT_DATE ) continue;
if ( dk->m_flags5 & DF5_IGNORE ) continue;
// stop if date is not store hours-ish
if ( ! (dk->m_flags & DF_SCHEDULECAND) ) {
good = false;
break;
}
// . count words between this date and last one
// . if too many then use the smallest section
// around date "di"
// . should fix abqtango.com ...
if ( last && dj ) {
// reset counters
int32_t acount = 0;
int32_t tcount = 0;
int32_t r = last->m_b;
for ( ; r < dk->m_a ; r++ ) {
// do not scan too far
if ( ++tcount >= 20 ) break;
// count alnums
if ( m_wids[r] && ++acount >= 6) break;
}
// stop if too far apart, do not try to pair up
if ( r < dk->m_a ) {
// use smallest section around "di"
sd = di->m_section;
// and just use "di", no pairing
// up with another date
dj = NULL;
goto redo;
}
}
// assign this
last = dk;
// otherwise, accumulate bits
acc |= dk->m_hasType;
// and days of the week as well
dowBits |= dk->m_dowBits;
// how many days of the week are mentioned
int32_t numDow = getNumBitsOn8(dk->m_dowBits);
// fix "Wednesday - Saturday [[]] 10:00am - 6:00pm"
// for woodencow.com url
if ( numDow <= 0 ) numDow = lastNumDow;
// assume at least 1 to fix maret.org since we
// do not have telscopes yet
if ( numDow <= 0 ) numDow = 1;
// for next guy
lastNumDow = numDow;
// need tod date at this point
if ( ! ( dk->m_hasType & DT_TOD ) ) continue;
// if not range, no end time then
if ( ! (dk->m_hasType & DT_RANGE_TOD) ) {
// count it
hadNoEndTime++;
// and get next date
continue;
}
// and total hours open per week
int32_t min = dk->m_minTod;
int32_t max = dk->m_maxTod;
if ( min < 0 ) continue;
if ( min > max ) { char *xx=NULL;*xx=0; }
// this will be zero if not a tod range
numSecondsOpen += numDow * (max - min);
// disallow "Every Sunday before 1pm" by
// itself from collectorsguide.com
if ( ! (dk->m_flags & DF_ONGOING ) ) openRange = false;
}
// if had some incomaptible dates, then just try di by itself
if ( ! good ) dj = NULL;
// . if no good partner date, just try di by itself
// . we come here right away for telescopes only
if ( ! good ) {
// if has month/daynum in it, forget it
if ( ! (di->m_flags & DF_SCHEDULECAND) )
continue;
// reset crap
numSecondsOpen = 0;
openRange = true;
hadNoEndTime = 0;
// do not do section telescoping thing now
kmax = 0;
// otherwise init these to di's stuff
acc = di->m_hasType;
dowBits = di->m_dowBits;
if ( (di->m_hasType & DT_RANGE_TOD) &&
! (di->m_flags & DF_ONGOING ) )
openRange = false;
// how many days of the week are mentioned
int32_t numDow = getNumBitsOn8(dowBits);
// and total hours open per week
int32_t min = di->m_minTod;
int32_t max = di->m_maxTod;
// this will be zero if not a tod range
if ( min >= 0 && max > min )
numSecondsOpen += numDow * (max - min);
}
// "seven days a week" are like dows
if ( acc &
( DT_HOLIDAY | // thanksgiving
DT_SUBDAY | // mornings
DT_SUBWEEK | // weekends
DT_EVERY_DAY | // 7 days a week
DT_ALL_HOLIDAYS ))// "holidays"
// treat it like a dow for these purposes
acc |= DT_DOW;
// . must have dow and tod range to be store hours
// . this algo fixes burtstikilounge.com which has a
// "6pm-2am [[]] Monday - Saturday" type of date which is
// a telescope, and we have no telescopes here yet
if ( ! (acc & DT_RANGE_TOD) ) continue;
if ( ! (acc & DT_DOW ) ) continue;
// need at least one closed tod range
if ( openRange ) continue;
// get # dow it is on
int32_t numDow = getNumBitsOn8 ( dowBits );
// . must be open at least 4 days a week
// . no, let's make it 2+ for a weekly schedule so that
// date like "11:30 sat and sun only" on unm.edu are
// "event candidates" in Events.cpp. (recurring dow implied)
if ( numDow < 2 ) continue;
// its part of a weekly schedule now at least
dateflags_t df = DF_WEEKLY_SCHEDULE;
// . set weekly schedule section
// . dates in the same weekly schedule section do not
// cause the mult events penalty in Events.cpp
//di->m_storeHoursSection = lastsd;
// . and for at least 28 hours a week
// . but fix unm.edu which has things like "8am Mon - Fri"
// so that they are still store hours.
if ( numSecondsOpen >= 28*3600 ) isStoreHours = true;
// if we had like "mon-fri 9am"... assume store hours
if ( hadNoEndTime > 0 ) isStoreHours = true;
// skip if not store hours
if ( isStoreHours ) df |= DF_STORE_HOURS;
// if we are a telescope or an individual date set the flag
if ( ! good )
di->m_flags |= df;
// mark all in our section now (only if not telescope)
for ( int32_t k = fd ; k < kmax ; k++ ) {
// breathe
QUICKPOLL ( m_niceness );
// test
Date *dk = m_datePtrs[k];
if ( ! dk ) continue;
// stop on breach
if ( dk->m_a >= sd->m_b ) break;
// skip if not sure if a date
if ( ! (dk->m_flags & DF_SCHEDULECAND ) ) continue;
// mark it
dk->m_flags |= df;
}
// bail if not store hours
if ( ! isStoreHours ) continue;
//
//
// set SEC_STOREHOURSCONTAINER
//
//
// this makes telescopes core because one piece of them
// might not have the tod or todXor set
if ( ! good ) continue;
Section *sp = sd;
// initial dates xor in "sd"
int32_t todXor = sd->m_todXor;
// can't be zero - we contain the store hours
if ( ! todXor ) { char *xx=NULL;*xx=0; }
// keep setting up as int32_t as datexor remains unchanged
for ( ; sp ; sp = sp->m_parent ) {
// breathe
QUICKPOLL(m_niceness);
// stop if a date was gained
if ( sp->m_todXor != todXor ) break;
// set otherwise
sp->m_flags |= SEC_STOREHOURSCONTAINER;
}
}
/////////////////////////////
//
// set DF_KITCHEN_HOURS
//
// . this means it is not quite store hours
// . could be happy hour tod range, kitchen hours, etc.
// . this does not need a tod range like store hours since it can
// have something like "kitchen hours: until 10 pm" and needs to
// telescope to the store hours
//
/////////////////////////////
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if not in body (i.e. from url)
if ( ! (di->m_flags & DF_FROM_BODY) ) continue;
// skip if not telescope
if ( telescopesOnly && di->m_type != DT_TELESCOPE ) continue;
// must have a dow/tod type thing
//if ( ! ( di->m_hasType & DT_DOW ) &&
// ! ( di->m_hasType & DT_HOLIDAY ) )
// continue;
if ( ! ( di->m_hasType & (specialTypes|DT_DOW) ) )
continue;
// a tod or tod range, like "until 10pm"
if ( ! ( di->m_hasType & DT_TOD ) ) continue;
// no month day
if ( di->m_hasType & DT_DAYNUM )
continue;
// get section of the date
Section *sd = di->m_section;
// scan up until sentence section
for ( ; sd ; sd = sd->m_parent )
if ( sd->m_flags & SEC_SENTENCE ) break;
// if no sentence we must be in javascript
if ( m_contentType == CT_JS && ! sd ) continue;
// sanity check otherwise
if ( ! sd ) { char *xx=NULL;*xx=0; }
// in a table? get the cell section then
Section *cell = di->m_tableCell;
// . detects "kitchen hours" etc. basically any "sub hours"
// . we just exclude these dates for setting store hours
bool hasKitchen = false;
if ( hasKitchenHours ( sd ) )
hasKitchen = true;
else if ( cell && hasKitchenHours ( cell->m_headColSection ) )
hasKitchen = true;
else if ( cell && hasKitchenHours ( cell->m_headRowSection ) )
hasKitchen = true;
// if no kitchen hours, do not set any flags
if ( ! hasKitchen ) continue;
// otherwise...
di->m_flags |= DF_KITCHEN_HOURS;
// and remove this
di->m_flags &= ~DF_STORE_HOURS;
}
//
// set DF_SUBSTORE_HOURS
//
// . for dates that telescope to the store hours we set this
// unless we have a specific daynum or list of daynums. daynum
// ranges are ok.
//
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if not in body (i.e. from url)
if ( ! (di->m_flags & DF_FROM_BODY) ) continue;
// skip if not telescope. this algo only does telescopes!
if ( di->m_type != DT_TELESCOPE ) continue;
// skip if store hours already set
if ( di->m_flags & DF_STORE_HOURS ) continue;
// must have one store hours ptr in it
int32_t j; for ( j = 0 ; j < di->m_numPtrs ; j++ )
if ( di->m_ptrs[j]->m_flags & DF_STORE_HOURS ) break;
if ( j >= di->m_numPtrs ) continue;
// ok, we got store hours, do we have a single
// daynum or list of daynums?
if ( (di->m_hasType & DT_DAYNUM) &&
!(di->m_hasType & DT_RANGE_DAYNUM) )
// do not inherit the store hours flag in that case
continue;
// otherwise we do!
di->m_flags |= DF_SUBSTORE_HOURS;
}
}
// set Date::m_maxYearGuess for each date that
// does not have a year. use the year of the nearest neighbor
// to determine it. allow for that year minus or plus one if
// we also have a DOW. and also allow for that year minus one if
// we are from a month # greater than our neighbor that supplied
// the year, assuming he did have a month. so if they have a list
// like Dec 13th and the neighbor following is Jan 2nd 2011, we
// allow the year 2010 for Dec 13th. and only consider non-fuzzy
// years. so neighbors must be non-fuzzy dates.
void Dates::setMaxYearGuesses ( ) {
int32_t minYear = 9999;
int32_t maxYear = 0000;
// for drivechicago.com the "May-Sep '10" date throws us off. but if
// we recognize that there are years from 2010-2011 on the page then
// we can allow the dow/month/daynum-based event dates to imply one of
// those years. we can set their Date::m_guessedYearFromDow and use
// that to set leftGuess/rightGuess for other dates.
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if we got no year
if ( ! ( di->m_hasType & DT_YEAR) ) continue;
// skip if we got no tod
if ( ! ( di->m_hasType & DT_TOD ) ) continue;
// skip if copyright
if ( di->m_flags & DF_COPYRIGHT ) continue;
if ( di->m_flags & DF_FUZZY ) continue;
if ( di->m_flags & DF_NONEVENT_DATE ) continue;
if ( di->m_flags5 & DF5_IGNORE ) continue;
// ok, record min/max of years for event dates
if ( di->m_minYear < minYear ) minYear = di->m_minYear;
if ( di->m_maxYear > maxYear ) maxYear = di->m_maxYear;
}
// now set m_dowBasedYear for each date that has a dow and a monthday
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// stop?
if ( minYear == 9999 ) break;
// to be sure, must be within 4 years
if ( maxYear - minYear > 4 ) break;
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if we got a year
if ( di->m_hasType & DT_YEAR) continue;
// skip if we got no tod
if ( ! ( di->m_hasType & DT_TOD ) ) continue;
// skip if copyright
if ( di->m_flags & DF_COPYRIGHT ) continue;
if ( di->m_flags & DF_FUZZY ) continue;
if ( di->m_flags & DF_NONEVENT_DATE ) continue;
if ( di->m_flags5 & DF5_IGNORE ) continue;
// skip if no dow
if ( ! ( di->m_hasType & DT_DOW ) ) continue;
if ( ! ( di->m_hasType & DT_DAYNUM ) ) continue;
// how does this happen?
// it happened for "Sunday October 30, 5 - 8PM" for
// http://96.30.56.90/~nytmpl/
if ( di->m_dayNum <= 0 ) continue;
if ( di->m_minDayNum <= 0 ) continue;
if ( di->m_maxDayNum <= 0 ) continue;
if ( di->m_minDayNum != di->m_maxDayNum ) continue;
// just one dow
if ( getNumBitsOn8(di->m_dowBits) != 1 ) continue;
// . ok, calculate what year it must be on then!
// . return 0 on issue, if none in range
di->m_dowBasedYear =
calculateYearBasedOnDOW ( minYear,maxYear, di );
}
// scan all the dates
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if copyright
if ( di->m_flags & DF_COPYRIGHT ) continue;
if ( di->m_flags & DF_FUZZY ) continue;
// skip if we got a year
if ( di->m_hasType & DT_YEAR ) continue;
// skip if we have no daynum
if ( ! ( di->m_hasType & DT_DAYNUM ) ) continue;
// . ok, we need a year then...
// . returns -1 if could not find any good guess
di->m_maxYearGuess = guessMaxYear ( i );
}
}
// . sometimes there are multiple years with the event dates
// . we don't always pick the biggest year, we pick the one closest to
// our date topologically.
int32_t Dates::guessMaxYear ( int32_t i ) {
// get it
Date *di = m_datePtrs[i];
int32_t leftGuess = 0;
int32_t rightGuess = 0;
// if we are so fortunate
if ( di->m_dowBasedYear ) return di->m_dowBasedYear;
Date *dj = NULL;
// scan for date to the left
for ( int32_t j = i - 1 ; j >= 0 ; j-- ) {
QUICKPOLL(m_niceness);
dj = m_datePtrs[j];
if ( ! dj ) continue;
if ( dj->m_a < 0 ) continue;
if ( dj->m_flags & DF_FUZZY ) continue;
if ( dj->m_flags & DF_COPYRIGHT ) continue;
if ( dj->m_flags & DF_NONEVENT_DATE ) continue;
if ( dj->m_flags5 & DF5_IGNORE ) continue;
// . must be an event with a tod.
// . fixes kgoradio.com from using "in Summer of 2009"
if ( ! (dj->m_hasType & DT_TOD) ) continue;
// need a year
if ( dj->m_hasType & DT_YEAR ) {
leftGuess = dj->m_maxYear;
break;
}
if ( dj->m_dowBasedYear ) {
leftGuess = dj->m_dowBasedYear;
break;
}
// if he has a max year, that is good!
//if ( dj->m_maxYearGuess > 0 ) {
// leftGuess = dj->m_maxYearGuess;
// break;
//}
}
Date *dk = NULL;
// scan for date to the left
for ( int32_t j = i + 1 ; j < m_numDatePtrs ; j++ ) {
QUICKPOLL(m_niceness);
dk = m_datePtrs[j];
if ( ! dk ) continue;
if ( dk->m_a < 0 ) continue;
if ( dk->m_flags & DF_FUZZY ) continue;
if ( dk->m_flags & DF_COPYRIGHT ) continue;
if ( dk->m_flags & DF_NONEVENT_DATE ) continue;
if ( dk->m_flags5 & DF5_IGNORE ) continue;
// . must be an event with a tod.
// . fixes kgoradio.com from using "in Summer of 2009"
if ( ! (dk->m_hasType & DT_TOD) ) continue;
// need a year
if ( dk->m_hasType & DT_YEAR ) {
rightGuess = dk->m_maxYear;
break;
}
if ( dk->m_dowBasedYear ) {
rightGuess = dk->m_dowBasedYear;
break;
}
// if he has a max year, that is good!
//if ( dk->m_maxYearGuess > 0 ) {
// rightGuess = dk->m_maxYearGuess;
// break;
//}
}
// if only had one, use that
if ( leftGuess == 0 ) return rightGuess;
if ( rightGuess == 0 ) return leftGuess;
// if same, ok
if ( leftGuess == rightGuess ) return leftGuess;
Section **sp = m_sections->m_sectionPtrs;
// which is closer?
Section *s1 = sp[dj->m_a];
Section *s2 = sp[dk->m_a];
Section *sx = sp[di->m_a];
// grow our date until contains s1 or s2
for ( ; sx ; sx = sx->m_parent ) {
QUICKPOLL(m_niceness);
if ( sx->contains ( s1 ) ) break;
if ( sx->contains ( s2 ) ) break;
}
if ( sx->contains(s1) && ! sx->contains(s2) )
return leftGuess;
if ( sx->contains(s2) && ! sx->contains(s1) )
return rightGuess;
// ok, its a tie... who cares then. return biggest then
if ( leftGuess > rightGuess ) return leftGuess;
return rightGuess;
}
// return the dow based year
int32_t Dates::calculateYearBasedOnDOW ( int32_t minYear, int32_t maxYear, Date *di ) {
// if month is -1 must be a range or list, skip it
if ( di->m_month < 0 ) return 0;
// must have just one dow
int32_t numDow = getNumBitsOn8(di->m_dowBits);
if ( numDow != 1 ) return 0;
int32_t month = di->m_month;
// sanity check for month, 1 to 12 are legit
if ( month <= 0 || month >= 13 ) { char *xx=NULL;*xx=0; }
int32_t day = di->m_minDayNum;
// between 1 and 31 sanity check
if ( day < 1 || day > 31 ) { char *xx=NULL;*xx=0; }
// bit #0 to x
int32_t dow = getHighestLitBit((unsigned char)(di->m_dowBits));
// between 0 and 6
if ( dow >= 7 ) { char *xx=NULL;*xx=0; }
// . Jan 1, 2000 fell on a saturday (leap year)
// . Jan 1, 2001 fell on a monday
// . Jan 1, 2002 fell on a tuesday
// . Jan 1, 2003 fell on a wednesday
// . Jan 1, 2004 fell on a thursday (leap year)
// . Jan 1, 2005 fell on a saturday
// . Jan 1, 2006 fell on a sunday
// . Jan 1, 2007 fell on a monday
// . Jan 1, 2008 fell on a tuesday (leap year)
// . Jan 1, 2009 fell on a thursday
// . Jan 1, 2010 fell on a friday
// . Jan 1, 2011 fell on a saturday
// . Jan 1, 2012 fell on a sunday
// how many days into the year are we (assume not leap year)?
int32_t daysIn = 0;
for ( int32_t i = 1 ; i < month ; i++ )
daysIn += s_numDaysInMonth[i-1];
// add in current daynum, subtract 1
daysIn += day - 1;
// what the dow of jan 1 then?
dow -= (daysIn % 7);
// wrap it up
if ( dow < 0 ) dow += 7;
// between 0 and 6
if ( dow >= 7 ) { char *xx=NULL;*xx=0; }
// jan 1 2008 was a tuesday = 2
// jan 1 2000 was a saturday = 6
int32_t jan1dow = 6;
// scan the years. include up to 1 year from now (spideredtime)
for ( int32_t y = 2000 ; y <= 2030 ; y++ ) {
// stop if b
QUICKPOLL(m_niceness);
// save it
int32_t saved = jan1dow;
// inc for compare now if in leap year and past feb
if ( (y % 4)==0 && month >= 3 )
saved = jan1dow + 1;
// inc it for next year
jan1dow++;
// wrap back to sunday
if ( jan1dow == 7 ) jan1dow = 0;
// leap year?
if ( (y % 4) == 0 ) jan1dow++;
// wrap back to sunday
if ( jan1dow == 7 ) jan1dow = 0;
// skip if not in requested range
if ( y < minYear ) continue;
if ( y > maxYear ) break;
// compare
if ( saved != dow ) continue;
// ok, got a match
return y;
}
return 0;
}
/*
///////////////////////////////
//
//
// new date normalization code
//
//
///////////////////////////////
// what about Nov 15 - oct 14 2010 [[]] nov 2010 [[]] ...
// . try to print out date in this format:
// Every 2nd&3rd Mon,Thu,Sat-Sun from Aug2,2010-Oct10,2011 1pm-3pm & 5pm-7pm
// . compare to outright listing:
// Aug 2,4,8 2010 Oct 6,12,13 2011 1pm-3pm
// TODO: Every 2nd&3rd Mon,Thu,Sat-Sun from Aug2,2010 2pm - Oct10,2011 4pm
// * if 2nd day in range is the next day at like 2am or 4am we don't need it!
// . TODO: what about the 4th day of every month??? just list outright?
// . [dow/dowlist/dowrange]
// [monthdayyear,monthdayyearlist,monthdayyearrange,seasonyear,holidayyear]
// [tod/todrange/todlist/todlistofranges]
// . 1. for each dow and tod/todrange pair
// . 2. find the smallest monthdayyear interval that contains all dow points
// . 3. set the recurring dowbits. i.e. if the dow is monday and every 2nd
// monday is empty then zero out that dow bit. 1st/2nd/4rd/4th/5th/last
// . 4. find longest time_t interval that covers the necessary recurring dow
// days without exception. store them all in an array. record a
// min and max interval for each one. i.e. the min's endpoints are
// the necessary dows. the max's endpoints are past those usually up
// to the missing dows (or spider endpoints)
// . 5. do the same thing for the missing dows that we should have but
// are probably holidays or seasons they are closed or whatever
// . 6. repeat for each dow
// . 7. combine each dow representation
// . 8. positive interval maxes should be intersected, mins should be unioned
// . 9. negative interval maxes should be intersected, mins should be unioned
// . A. if any positive min intersects a negative min of another dow
// we can't really combine them because there is no mdy date we can
// use to express the restrictive mdy range...
// . B. weight each of these representation with just listing the
// dates outright in m-d-y format
// . C. represent intervals as months, seasons, holidays, etc. to get the
// most compact representation
// . just fix the way it prints out now
// . do not print a tod or tod range if not m_minTod or m_maxTod
// . do not print a dow or dow range if we have a dom
// . do not print a dom range if we have an exact dom
bool Date::printTextNorm2 ( Interval **intervals ,
int32_t numIntervals ,
SafeBuf *sb ,
int32_t thisYear ) {
char dbuf[256];
dedup.set ( 8,0,16,dbuf,256,false,niceness,"dadbuf");
// now combine CronDates in same month and same tod
// and use the day bits to hold that info
for ( int32_t i = 0 ; i < m_numIntervals ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// int16_tcut
Interval *ii = intervals[i];
// seconds relative to that day
uint64_t tod1 = ii->m_a % 86400;
uint64_t tod2 = tod1 + ii->m_b - ii->m-a;
// . if more than 24 hours long... wtf?
// . Aug 2, 2010 2pm - Oct 10,2011 4pm
if ( tod2 - tod1 > 24*3600 ) {
log("dates: interval > 24 hrs. special normalization "
"required.");
printSpecial2 ();
continue;
}
// grab a tod to process
int64_t key64 = (tod1<<32) | tod2;
// already did?
if ( dedup.isInTable ( &key64 ) ) continue;
// add it
if ( ! dedup.addKey ( &key64 ) ) return false;
// prcess that
if ( ! processTODRange ( tod1, tod2 ) ) return false;
}
return true;
}
bool Date::processTODRange ( uint32_t TOD1 ,
uint32_t TOD2 ,
Interval **intervals ,
int32_t numIntervals ,
SafeBuf *sb ,
int32_t thisYear ) {
// . map each day this year and next two years to a "rom"
// . "rom" = recurrence of month
// . list is it the 1st monday or 2nd monday of the month, etc.?
// . basically this maps out 3 years worth of days
char romMap[365*3];
char dowMap[365*3];
char hasTod[365*3];
char month [365*3];
char dayNum[365*3];
int32_t nr = 0;
// . make a day map for ALL of spidered year, plus following year.
// . then just start combinating over all possible things like
// months, seasons, holidays, etc. to describe what is shown in
// in day map.
// . list each day its on like:
// . 2nd saturday in august 2011
// . then we can merge two such beasts multiple ways:
// . 2nd saturdays aug-oct 2011
// . saturdays in august
// . not in 2010
// . not in spring,winter,fall 2011
// . not in august, oct or dec 2011
// . not saturdays,sundays in august,oct,dec 2012
// . not 3rd saturdays in august,oct,dec 2012
// . not christmas, thanksgiving 2011
// . not aug 3, 2011 - feb 4 2012 (finally daymonth ranges in order)
//summer"... start with years, then seasons, then months.
// . then only tuesdays/wed/...
//
// loop over all dows
for ( int32_t dow = 0 ; dow < 7 ; dow++ ) {
// ptrs to every thursday etc.
Interval *dows[150];
char roms[150];
int32_t ndows = 0;
// get all the intervals that have a START time
// that falls on this DOW
for ( int32_t i = 0 ; i < m_numIntervals ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// int16_tcut
Interval *ii = intervals[i];
// seconds relative to that day
uint32_t tod1 = ii->m_a % 86400;
uint32_t tod2 = tod1 + ii->m_b - ii->m-a;
// a match?
if ( tod1 != TOD1 ) continue;
if ( tod1 != TOD2 ) continue;
// does it fall on the right dow? (day of week)
if ( getDow ( ii->m_a ) != dow ) continue;
// ok, store it
hasTod[ddd] = 1;
}
// now loop through 1 through 6, where 6 stands
// for the "last day {DOW} of the month" where
// {DOW} is "dow".
for ( int32_t r = 0 ; r <= 6 ; r++ ) {
// breathe
QUICKPOLL(m_niceness);
// . 0 means the 1st "monday" etc of the month
// . we use monday if dow is 0, 1 for tuesday, etc.
for ( k = spiderDay ; ; k++ )
if ( dowMap[k] == dow ) break;
// init
bool inPositive = false;
Span spans[1000];
int32_t numSpans = 0;
Span *cursor = &spans[numSpans++];
// . this is a time_t when page was spidered i guess
// but it is UTC and event is local time!!!
// TODO: fix!!!
// . should be when we started the Intervals
cursor->m_a = spiderStartTime;
cursor->m_b = 0;
// . now advance from there
// . check every monday, etc.
for ( ; k < lastDay ; k += 7 ) {
// skip if not right rom
if ( romMap[k] != r ) continue;
// extend current span
cursor->m_b = dayStart[k] + 86400;
// if we are not changing our cursor span...
if ( hasTod[k] && inPositive ) continue;
if ( ! hasTod[k] && ! inPositive ) continue;
// fix it so day #k is excluded
cursor->m_b = dayStart[k];
// then toggle inPositive
inPositive = ! inPositive;
// make a new span
cursor = &spans[++numSpans];
// init it to start of day here
cursor->m_a = dayStart[k];
cursor->m_b = 0;
cursor->m_inPositive = inPositive;
}
// . set last one
// . should be when we truncated the Intervals
cursor->m_b = lastDayTime;
// now we have a set of spans (intervals really)
// and we can represent them in standard form:
// "Every 2nd&3rd Mon,Thu,Sat-Sun from Aug2,2010-
// Oct10,2011 1pm-3pm ..."
// and the negative spans are times that are
// exceptions
// next get the min and max of each endpoint, a and
// b, for each negative span.
// do this for each "r" and each "dow"
}
// . combine all curosors for each "r" value
// . reduce the negative spans so they do not exclude
// positives from the other r values
// . maybe just
}
// hmm... maybe just mark up the daymap without looping over the
// r values then try to describe the days being excluded. like
// excludes weekends, and also excluded labor day...
-- mark all the days and carve out the biggest exceptions first.
like years, then seasons, month ranges, months, holidays...
// now
// then try to alter the endpoints within in the
// min and max values so that they align on something
// nice like a holiday, a month, a season or a year
// or the complement of a month or range of months...
// so we can say only in "Aug-Oct" or something...
...
// try to shift the boundaries of the negative spans
// so that they align with something simple.
}
}
return true;
}
// . create a CronDate class for every date Interval
// . has more attrbutes than a "struct tm" used by mktime()
// . has bit arrays so we can combine two together
class CronDate {
public:
// year range, like 2011-2012 or 2011-2011
int32_t m_year1;
int32_t m_year2;
// for month range (i.e. oct-dec)
int32_t m_month1;
int32_t m_month2;
// days of month relative to m_month1 and m_month2 to make
// a month range like "oct 11 - dec22"
int32_t m_mday1;
int32_t m_mday2;
// what months we represent, jan-dec
int32_t m_monthMask;
// if m_month1==m_month2, these bits represent up to 31 days
int32_t m_mdayMask;
// sunday thru saturday, 1 bit each
char m_dowEveryMask;
// the time-of-day time range (i.e. 1pm-3pm)
int32_t m_tod1;
int32_t m_tod2;
// everyweekend,everyweekday,every 1/2/3/4/5th mask
int32_t m_bits;
// how many syllables does it take to print out the current date
// as represented by this class in english?
int32_t m_numSylables;
class EnglishDate *m_exceptions1;
class EnglishDate *m_exceptions2;
};
bool Dates::printNormalizedTime ( Date *dx ,
Interval **intervals ,
int32_t numIntervals ,
SafeBuf *sb ,
int32_t thisYear ) {
// return now if no intervals
if ( numIntervals == 0 ) return true;
int32_t cdsize = sizeof(CronDate);
// make space for the CronDates
int32_t need = cdsize * numIntervals;
CronDate *cd1 = (struct tm *)mmalloc ( need );
if ( ! cd1 ) return false;
CronDate *cd2 = (struct tm *)mmalloc ( need );
if ( ! cd2 ) { mfree ( cd1 , need ); return false; }
// set for calling functions easier, like printTODRanges(j)
m_sb = sb;
m_cd = cd;
m_numCronDates = numIntervals;
// assign a month and daynum for each interval and dow for that year
for ( int32_t i = 0 ; i < numIntervals ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// int16_tcut
Interval *ii = intervals[i];
// decode it
struct tm *ts;
struct tm1;
struct tm2;
ts = gmtime ( &ii->m_a );
gbmemcpy ( &tm1 , ts , sizeof(tm) );
ts = gmtime ( &ii->m_b );
gbmemcpy ( &tm2 , ts , sizeof(tm) );
// just copy it
cd[i]->m_year1 = tm1->tm_year + 1900;
cd[i]->m_year2 = tm2->tm_year + 1900;
cd[i]->m_month1 = tm1->tm_mon;
cd[i]->m_month2 = tm2->tm_mon;
cd[i]->m_mday1 = tm1->tm_mday;
cd[i]->m_mday2 = tm2->tm_mday;
cd[i]->m_tod1 = ii->m_a % 24*3600;
cd[i]->m_tod2 = ii->m_b % 24*3600;
cd[i]->m_dowEveryMask = 0;
cd[i]->m_dowEveryFirstMask = 0;
cd[i]->m_dowEverySecondMask = 0;
cd[i]->m_dowEveryThirdMask = 0;
cd[i]->m_dowEveryFourthMask = 0;
cd[i]->m_dowEveryFifthMask = 0;
cd[i]->m_bits = 0;
cd[i]->m_numSylables = -1;
cd[i]->m_exceptions1 = NULL;
cd[i]->m_exceptions2 = NULL;
// scan months if multiple and or them in
cd[i]->m_monthMask = 0;
//cd[i]->m_monthMaskExceptions = 0;
for ( int32_t j = tm1->m_mon ; j <= tm2->m_mon ; j++ )
cd[i]->m_monthMask |= 1 << j;
// same for days, BUT IFF SAME MONTH!
cd[i]->m_mdayMask = 0;
//cd[i]->m_mdayMaskExceptions = 0;
if ( tm1->m_mon == tm2->m_mon )
cd[i]->m_mdayMask = 1 << (tm->tm_mday);
}
// now combine CronDates in same month and same tod
// and use the day bits to hold that info
for ( int32_t i = 0 ; i < m_numCronDates ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// grab a tod to process
int32_t tod = cd[i].m_tod1 % 24*3600;
// already did?
if ( dedup.isInTable ( &tod ) ) continue;
// add it
if ( ! dedup.addKey ( &tod ) ) return false;
// . compress based on that tod
// . combines all CronDates into a single CronDate
// and represents them with monthday bits
if ( ! compressCronDates ( tod ) ) return false;
}
// if like monday had multiple tods try to combine them...
// like monday @ 3pm and monday @ 6pm should be
// "monday: 3pm, 6pm"
return true;
}
// combine similar CronDates and set m_mdayBits to keep track of what
// days of the month we are representing.
bool Dates::compressCronDates ( int32_t todArg ) {
CronDate *prev = NULL;
// combine crondates with same tod and month and year range together
for ( int32_t i = 0 ; i < m_numCronDates ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
CronDate *cd = &m_cd[i];
// skip if nuked
if ( cd->m_bits & CD_IGNORED ) continue;
// new last
CronDate *last = prev;
// update prev for next guy
prev = cd;
// grab a tod to process
int32_t tod = cd[i].m_tod1 % 24*3600;
// must match
if ( tod != todArg ) continue;
// get last?
if ( ! last ) { last = cd; continue; }
// if not same month/year, skip
if ( last->m_month1 != cd->m_month1 ) continue;
if ( last->m_month2 != cd->m_month1 ) continue;
if ( last->m_year1 != cd->m_year1 ) continue;
if ( last->m_year2 != cd->m_year2 ) continue;
// ok, set his month day bit and we can get rid of us
last->m_mdayBits |= cd->m_mdayBits;
// we are nuked!
cd->m_bits |= CD_IGNORED;
}
//
// . now compress the months together
// . like "daily aug-sep"
//
prev = NULL;
// combine crondates with same tod and month and year range together
for ( int32_t i = 0 ; i < m_numCronDates ; i++ ) {
// breathe
QUICKPOLL(m_niceness);
// get it
CronDate *cd = &m_cd[i];
// skip if nuked
if ( cd->m_bits & CD_IGNORED ) continue;
// new last
CronDate *last = prev;
// update prev for next guy
prev = cd;
// grab a tod to process
int32_t tod = cd[i].m_tod1 % 24*3600;
// must match
if ( tod != todArg ) continue;
// get last?
if ( ! last ) { last = cd; continue; }
// if not same year, skip
if ( last->m_year1 != cd->m_year1 ) continue;
if ( last->m_year2 != cd->m_year2 ) continue;
// if same every masks, combine
if ( last->m_dowEveryMask != cd->m_dowEveryMask )
continue;
if ( last->m_dowEveryFirstMask != cd->m_dowEveryFirstMask )
continue;
if ( last->m_dowEverySecondMask != cd->m_dowEverySecondMask )
continue;
if ( last->m_dowEveryThirdMask != cd->m_dowEveryThirdMask )
continue;
if ( last->m_dowEveryFourthMask != cd->m_dowEveryFourthMask )
continue;
if ( last->m_dowEveryFifthMask != cd->m_dowEveryFifthMask )
continue;
// can't be a month hole between us!
if ( last->m_month2 + 1 < cd->m_month1 ) continue;
// ok, combine
last->m_month2 = cd->m_month2;
// eliminate us
cd->m_bits |= CD_IGNORED;
// . we also get his exceptions!
// . a linked list of CronDates?
if ( last->m_exceptions )
last->m_exceptions->m_next = cd->m_exceptions;
else
last->m_exceptions = cd->m_exceptions;
}
// . set m_everyDowMask for every day of the week that has the event
// every time for that month.
// . then combine two adjacent CronDates that are different months
// but have the same m_everyDowMask
// . if more every dow days are used than excepted, then go for it
// . include the m_dowEvery*Masks as well
// . set those bits for a month if the # of matches outnumbers
// the number of exceptions
// . but count over the period of a whole year or season...
// . hmmm... maybe just consider using the ranges already given to us
// in the Dates existing format?
// . maybe just use that and do not print certain aspects of it...
// . keep a CronDate between different months to hold exceptions
// . if all months can be combined for a particular year and the
// # of words used to represent their m_everyDowMask and their
// exceptions to that is the smallest score... then do it
// . compare score to just
// . if two adjacent months have th
// . set m_dowEveryBits if occurs every S|M|T|W|R|F|S of the month
// . set m_dowEveryFriday bits etc.
// . ignore days BEFORE the spider time!!!! TODO!!!
// . setting these bits then allows us to combine two+ months
// . after trying to set those bits, make an array of the exceptions
// and try to compress that array and see how many words/syllables
// it is....
// . call these donut holes...
...
// . HOW TO deal with exceptions???
// . combine NOT event dates as well?
// . then we need like 365*2 cron dates, set CD_NOT_EVENT date?
// . or at least, we need that for every unique tod/todrange
// . between every two CronDates that have the same tod/todrange
// but are 1+ day apart, we should have an "exception" CronDate,
// that represents the down days between those two CronDates
// have a CronDate that is not!
// . then that is used to represent the whole between the two
// .
//
}
// scan our parent for us
if (
int32_t ptrNum = -1;
for ( int32_t k = 0 ; k < parent->m_numPtrs ; k++ ) {
QUICKPOLL(m_niceness);
if ( parent->m_ptrs[k] != dp ) continue;
ptrNum = k;
break;
}
// get date to left of us
Date
Date *left = NULL;
for ( ; ! left && ppp ; ppp = ppp->m_dateParent )
if ( ptrNum > 0 ) left = parent->m_ptrs[ptrNum-1];
*/
/* old code
void Dates::setMinMaxYearsOnPage ( ) {
int32_t minYear1 = 9999;
int32_t maxYear1 = 0;
// get current year... when the doc was spidered
int32_t currentYear = getYear ( m_nd->m_spideredTime );
// scan all the dates
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if copyright
if ( di->m_flags & DF_COPYRIGHT ) continue;
if ( di->m_flags & DF_FUZZY ) continue;
// skip if not in body (i.e. from url). no, allow url dates!
//if ( di->m_a < 0 ) continue;
// a year is easy?
if ( ! ( di->m_hasType & DT_YEAR ) ) continue;
// must have dow or month. otherwise we get crap like
// Summer of 2011. 2010/2011. etc.
if ( ! ( di->m_hasType & DT_DAYNUM ) &&
! ( di->m_hasType & DT_MONTH ) )
continue;
// skip if list or range of years. fixes kumeyay.com which
// has a list of month/year pairs in links for the archives.
if ( di->m_minYear != di->m_maxYear ) continue;
// throw away ridiculous years. talking about historical dates
// i would think is most likely. fixes "June 27, 1940" for
// santafe.org/perl/.....
if ( di->m_minYear < 2000 ) continue;
// ok, use it
if ( di->m_minYear < minYear1 ) minYear1 = di->m_minYear;
if ( di->m_maxYear > maxYear1 ) maxYear1 = di->m_maxYear;
}
int32_t minYear2 = 9999;
int32_t maxYear2 = 0;
// now repeat the loop but for implied years, i.e. "Wed Nov 13"
for ( int32_t i = 0 ; i < m_numDatePtrs ; i++ ) {
// breathe
QUICKPOLL ( m_niceness );
// int16_tcut
Date *di = m_datePtrs[i];
// skip if none
if ( ! di ) continue;
// skip if already has year
if ( di->m_hasType & DT_YEAR ) continue;
// one daynum and one dow is good
if ( ! (di->m_hasType & DT_DAYNUM) ) continue;
if ( ! (di->m_hasType & DT_MONTH ) ) continue;
if ( di->m_minDayNum != di->m_maxDayNum ) continue;
// if month is -1 must be a range or list, skip it
if ( di->m_month < 0 ) continue;
// must have just one dow
int32_t numDow = getNumBitsOn8(di->m_dowBits);
if ( numDow != 1 ) continue;
int32_t month = di->m_month;
// sanity check for month, 1 to 12 are legit
if ( month <= 0 || month >= 13 ) { char *xx=NULL;*xx=0; }
int32_t day = di->m_minDayNum;
// between 1 and 31 sanity check
if ( day < 1 || day > 31 ) { char *xx=NULL;*xx=0; }
// bit #0 to x
int32_t dow = getHighestLitBit((unsigned char)(di->m_dowBits));
// between 0 and 6
if ( dow >= 7 ) { char *xx=NULL;*xx=0; }
// . Jan 1, 2000 fell on a saturday (leap year)
// . Jan 1, 2001 fell on a monday
// . Jan 1, 2002 fell on a tuesday
// . Jan 1, 2003 fell on a wednesday
// . Jan 1, 2004 fell on a thursday (leap year)
// . Jan 1, 2005 fell on a saturday
// . Jan 1, 2006 fell on a sunday
// . Jan 1, 2007 fell on a monday
// . Jan 1, 2008 fell on a tuesday (leap year)
// . Jan 1, 2009 fell on a thursday
// . Jan 1, 2010 fell on a friday
// . Jan 1, 2011 fell on a saturday
// . Jan 1, 2012 fell on a sunday
// how many days into the year are we (assume not leap year)?
int32_t daysIn = 0;
for ( int32_t i = 1 ; i < month ; i++ )
daysIn += s_numDaysInMonth[i-1];
// add in current daynum, subtract 1
daysIn += day - 1;
// what the dow of jan 1 then?
dow -= (daysIn % 7);
// wrap it up
if ( dow < 0 ) dow += 7;
// between 0 and 6
if ( dow >= 7 ) { char *xx=NULL;*xx=0; }
// jan 1 2008 was a tuesday = 2
// jan 1 2000 was a saturday = 6
int32_t jan1dow = 2;
// scan the years. include up to 1 year from now (spideredtime)
for ( int32_t y = 2008 ; y <= currentYear+1 ; y++ ) {
// stop if b
QUICKPOLL(m_niceness);
// save it
int32_t saved = jan1dow;
// inc for compare now if in leap year and past feb
if ( (y % 4)==0 && month >= 3 )
saved = jan1dow + 1;
// inc it for next year
jan1dow++;
// leap year?
if ( (y % 4) == 0 ) jan1dow++;
// compare
if ( saved != dow ) continue;
// ok, got a match
if ( y < minYear2 ) minYear2 = y;
if ( y > maxYear2 ) maxYear2 = y;
}
}
// assume none defined
m_minYearOnPage = -1;
m_maxYearOnPage = -1;
// bail if none defined
if ( minYear1 == 9999 && minYear2 == 9999 )
return;
// only use minYear2/maxYear2 if minYear1/maxYear1 not defined
if ( minYear1 == 9999 && minYear2 != 9999 ) {
m_minYearOnPage = minYear2;
m_maxYearOnPage = maxYear2;
return;
}
// only use minYear1/maxYear1 if minYear2/maxYear2 not defined
if ( minYear1 != 9999 && minYear2 == 9999 ) {
m_minYearOnPage = minYear1;
m_maxYearOnPage = maxYear1;
return;
}
// . ignore minYear2/maxYear2 if minYear1/maxYear1 defined
// . yeah, like patpendergrass.com has Friday, December 17 but
// its talking about 2005 not 2010!
//if ( maxYear2 > maxYear1 ) m_maxYearOnPage = maxYear2;
//else m_maxYearOnPage = maxYear1;
m_minYearOnPage = minYear1;
m_maxYearOnPage = maxYear1;
// sometimes they just explicitly list next year, like zvents, but
// the other dates are actually the year before that. sea & the
// invalid mariner listed 3/24/2010 but the other dates were
// like Friday, Oct 16 which is in 2009.
if ( minYear2 + 1 == minYear1 )
m_minYearOnPage = minYear2;
if ( maxYear2 + 1 == minYear1 )
m_minYearOnPage = maxYear2;
return;
}
*/