... assuming caller has already
// printed a into "p"
// . return "p" after printing into it
char *Parms::printParms ( char *p , char *pend , long page , char *username,
//long user ,
void *THIS , char *coll , char *pwd , long nc ,
long pd ) {
s_count = 0;
// background color
char *bg1 = LIGHT_BLUE;
char *bg2 = DARK_BLUE;
// find in parms list
for ( long i = 0 ; i < m_numParms ; i++ ) {
// get it
Parm *m = &m_parms[i];
// make sure we got the right parms for what we want
if ( m->m_page != page ) continue;
// skip if offset is negative, that means none
if ( m->m_off < 0 &&
m->m_type != TYPE_CMD &&
m->m_type != TYPE_CONSTANT ) continue;
// might have an array, do not exceed the array size
long jend = m->m_max;
long size = jend ;
char *ss = ((char *)THIS + m->m_off - 4);
if ( m->m_max > 1 ) size = *(long *)ss;
if ( size < jend ) jend = size;
// background color
char *bg ;
// toggle background color on group boundaries...
if ( m->m_group == 1 ) {
if ( bg == bg1 ) bg = bg2;
else bg = bg1;
}
// . do we have an array? if so print title on next row
// UNLESS these are priority checkboxes, those can all
// cluster together onto one row
// . only add if not in a row of controls
if ( m->m_max > 1 && m->m_type != TYPE_PRIORITY_BOXES &&
m->m_rowid == -1 ) {
// make a separate table for array of parms
sprintf ( p ,
//"\n"
""
""
//""
"%s"
" "
//" "
" \n"
""
"%s \n",
DARK_BLUE,m->m_title,m->m_desc );
p += gbstrlen ( p );
}
// arrays always have blank line for adding stuff
if ( m->m_max > 1 ) size++;
// if m_rowid of consecutive parms are the same then they
// are all printed in the same row, otherwise the inner loop
// has no effect
long rowid = m_parms[i].m_rowid;
// if not part of a complex row, just print this array right up
if ( rowid == -1 ) {
for ( long j = 0 ; j < size ; j++ )
p=printParm ( p, pend, username, &m_parms[i],i,
j, jend, (char *)THIS,coll,NULL,
bg,nc,pd);
continue;
}
// if not first in a row, skip it, we printed it already
if ( i > 0 && m_parms[i-1].m_rowid == rowid ) continue;
// otherwise print everything in the row
for ( long j = 0 ; j < size ; j++ ) {
for ( long k = i ;
k < m_numParms &&
m_parms[k].m_rowid == rowid;
k++ )
p=printParm(p,pend,username,&m_parms[k],k,j,
jend,(char *)THIS,coll,NULL,bg,nc,
pd);
}
// end array table
//if ( m->m_max > 1 ) {
// sprintf ( p , "
\n");
// p += gbstrlen ( p );
//}
}
return p;
}
*/
bool Parms::printParms ( SafeBuf* sb , long page , char *username,//long user,
void *THIS , char *coll , char *pwd , long nc ,
long pd ) {
bool status = true;
s_count = 0;
// background color
char *bg1 = LIGHT_BLUE;
char *bg2 = DARK_BLUE;
// background color
char *bg = NULL;
// find in parms list
for ( long i = 0 ; i < m_numParms ; i++ ) {
// get it
Parm *m = &m_parms[i];
// make sure we got the right parms for what we want
if ( m->m_page != page ) continue;
// skip if offset is negative, that means none
if ( m->m_off < 0 &&
m->m_type != TYPE_MONOD2 &&
m->m_type != TYPE_MONOM2 &&
m->m_type != TYPE_CMD ) continue;
// might have an array, do not exceed the array size
long jend = m->m_max;
long size = jend ;
char *ss = ((char *)THIS + m->m_off - 4);
if ( m->m_type == TYPE_MONOD2 ) ss = NULL;
if ( m->m_type == TYPE_MONOM2 ) ss = NULL;
if ( m->m_max > 1 && ss ) size = *(long *)ss;
if ( size < jend ) jend = size;
// toggle background color on group boundaries...
if ( m->m_group == 1 ) {
if ( bg == bg1 ) bg = bg2;
else bg = bg1;
}
// . do we have an array? if so print title on next row
// UNLESS these are priority checkboxes, those can all
// cluster together onto one row
// . only add if not in a row of controls
if ( m->m_max > 1 && m->m_type != TYPE_PRIORITY_BOXES &&
m->m_rowid == -1 ) {
// make a separate table for array of parms
sb->safePrintf (
//"\n"
""
""
//""
"%s"
" "
//" "
" \n"
""
"%s \n",
DARK_BLUE,m->m_title,m->m_desc );
}
// arrays always have blank line for adding stuff
if ( m->m_max > 1 )
// not for PAGE_PRIORITIES!
//m->m_page != PAGE_PRIORITIES )
size++;
// if m_rowid of consecutive parms are the same then they
// are all printed in the same row, otherwise the inner loop
// has no effect
long rowid = m_parms[i].m_rowid;
// if not part of a complex row, just print this array right up
if ( rowid == -1 ) {
for ( long j = 0 ; j < size ; j++ )
status &=printParm ( sb, username,&m_parms[i],i,
j, jend, (char *)THIS,
coll,NULL,
bg,nc,pd,
false);
continue;
}
// if not first in a row, skip it, we printed it already
if ( i > 0 && m_parms[i-1].m_rowid == rowid ) continue;
// otherwise print everything in the row
for ( long j = 0 ; j < size ; j++ ) {
// flip j if in this page
long newj = j;
//if ( m->m_page == PAGE_PRIORITIES )
// newj = size - 1 - j;
for ( long k = i ;
k < m_numParms &&
m_parms[k].m_rowid == rowid;
k++ )
status &=printParm(sb,username,&m_parms[k],k,
newj,jend,(char *)THIS,coll,NULL,bg,
nc,pd, j==size-1);
}
// end array table
//if ( m->m_max > 1 ) {
// sprintf ( p , "
\n");
// p += gbstrlen ( p );
//}
}
return status;
}
/*
char *Parms::printParm ( char *p ,
char *pend ,
//long user ,
char *username,
Parm *m ,
long mm , // m = &m_parms[mm]
long j ,
long jend ,
char *THIS ,
char *coll ,
char *pwd ,
char *bg ,
long nc ,
long pd ) {
// do not print if no permissions
if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
return p;
//if ( m->m_perms != 0 && (m->m_perms & user) == 0 ) return p;
// do not print some if #define _CLIENT_ is true
#ifdef _GLOBALSPEC_
if ( m->m_priv == 2 ) return p;
if ( m->m_priv == 3 ) return p;
#elif _CLIENT_
if ( m->m_priv ) return p;
#elif _METALINCS_
if ( m->m_priv == 2 ) return p;
if ( m->m_priv == 3 ) return p;
#endif
// priv of 4 means do not print at all
if ( m->m_priv == 4 ) return p;
// what type of parameter?
char t = m->m_type;
// point to the data in THIS
char *s = THIS + m->m_off + m->m_size * j ;
// . if an array, passed our end, this is the blank line at the end
// . USE THIS EMPTY/DEFAULT LINE TO ADD NEW DATA TO AN ARRAY
// . make at least as big as a long long
if ( j >= jend ) s = "\0\0\0\0\0\0\0\0";
// delimit each cgi var if we need to
if ( m->m_cgi && gbstrlen(m->m_cgi) > 45 ) {
log(LOG_LOGIC,"admin: Cgi variable is TOO big.");
char *xx = NULL; *xx = 0;
}
char cgi[64];
if ( m->m_cgi ) {
if ( j > 0 ) sprintf ( cgi , "%s%li" , m->m_cgi , j );
else sprintf ( cgi , "%s" , m->m_cgi );
}
// . display title and description of the control/parameter
// . the input cell of some parameters are colored
char *color = "";
if ( t == TYPE_CMD || t == TYPE_BOOL2 ) color = " bgcolor=#0000ff";
if ( t == TYPE_BOOL ) {
if ( *s ) color = " bgcolor=#00ff00";
else color = " bgcolor=#ff0000";
}
if ( t == TYPE_BOOL || t == TYPE_BOOL2 ) {
// disable controls not allowed in read only mode
if ( g_conf.m_readOnlyMode && m->m_rdonly )
color = " bgcolor=#ffff00";
}
bool firstInRow = false;
if ( (s_count % nc) == 0 ) firstInRow = true;
s_count++;
if ( mm > 0 && m->m_rowid >= 0 && m_parms[mm-1].m_rowid == m->m_rowid )
firstInRow = false;
long firstRow = 0;
if ( m->m_page == PAGE_PRIORITIES ) firstRow = MAX_PRIORITY_QUEUES - 1;
// . use a separate table for arrays
// . make title and description header of that table
// . do not print all headers if not m_hdrs, a special case for the
// default line in the url filters table
if ( j == firstRow && m->m_rowid >= 0 && firstInRow && m->m_hdrs ) {
// print description as big comment
if ( m->m_desc && pd == 1 ) {
sprintf ( p , "\n" );
p += gbstrlen ( p );
//p = htmlEncode ( p , pend , m->m_desc ,
// m->m_desc + gbstrlen ( m->m_desc ) );
sprintf ( p , "%s" , m->m_desc );
p += gbstrlen ( p );
sprintf ( p , " \n" );
p += gbstrlen ( p );
}
// # column
// do not show this for PAGE_PRIORITIES it is confusing
if ( m->m_max > 1 &&
m->m_page != PAGE_PRIORITIES ) {
sprintf ( p , "# \n" );
p += gbstrlen(p);
}
// print all headers
for ( long k = mm ;
km_rowid; k++ ) {
sprintf ( p , "%s \n" ,
m_parms[k].m_title );
p += gbstrlen(p);
}
sprintf ( p , " \n" ); // mdw added
p += gbstrlen ( p );
}
// print row start for single parm
if ( m->m_max <= 1 && ! m->m_hdrs ) {
if ( firstInRow ) {
sprintf ( p , "" , bg );
p += gbstrlen ( p );
}
p += sprintf ( p , " " , 100/nc/2 );
}
// print the title/description in current table for non-arrays
if ( m->m_max <= 1 && m->m_hdrs ) { // j == 0 && m->m_rowid < 0 ) {
if ( firstInRow )
p += sprintf ( p , " ",bg);
if ( t == TYPE_STRINGBOX ) {
sprintf ( p , ""
"%s ",m->m_title );
p += gbstrlen ( p );
if ( pd )
p = htmlEncode (p,pend,m->m_desc,
m->m_desc+gbstrlen(m->m_desc));
sprintf ( p , " \n" );
}
else {
sprintf ( p , "" //" "
"%s ",
3*100/nc/2/4,m->m_title );
p += gbstrlen ( p );
if ( pd )
p = htmlEncode (p,pend,m->m_desc,
m->m_desc+gbstrlen(m->m_desc));
// and default value if it exists
if ( m->m_def && m->m_def[0] && t != TYPE_CMD ) {
char *d = m->m_def;
if ( t == TYPE_BOOL ) {
if ( d[0]=='0' ) d = "NO";
else d = "YES";
sprintf ( p , " Default: %s.",d);
p += gbstrlen ( p );
}
else {
sprintf ( p , " Default: ");
p += gbstrlen ( p );
p = htmlEncode (p,pend,d,d+gbstrlen(d) );
}
}
sprintf ( p , " \n" ,
color , 100/nc/2/4 );
}
p += gbstrlen ( p );
}
// . print number in row if array, start at 1 for clarity's sake
// . used for url filters table, etc.
if ( m->m_max > 1 ) {
// but if it is in same row as previous, do not repeat it
// for this same row, silly
if ( firstInRow && m->m_page != PAGE_PRIORITIES )
sprintf ( p, " %li \n", j);//j+1 );
else if ( firstInRow )
sprintf ( p , " " );
else
sprintf ( p, " " );
p += gbstrlen ( p );
}
long cast = m->m_cast;
if ( g_proxy.isProxy() ) cast = 0;
// print the input box
if ( t == TYPE_BOOL ) {
char *tt, *v;
if ( *s ) { tt = "YES"; v = "0"; }
else { tt = "NO" ; v = "1"; }
if ( g_conf.m_readOnlyMode && m->m_rdonly )
sprintf ( p, "read-only mode " );
// if cast=1, command IS broadcast to all hosts
else
sprintf ( p, ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,v,cast,tt);
}
else if ( t == TYPE_BOOL2 ) {
if ( g_conf.m_readOnlyMode && m->m_rdonly )
sprintf ( p, "read-only mode ");
// always use m_def as the value for TYPE_BOOL2
else
sprintf ( p, ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,m->m_def, m->m_title);
}
else if ( t == TYPE_CHECKBOX ) {
char *ddd = "";
if ( *s ) ddd = " checked";
sprintf (p, " ",
cgi,ddd);
}
else if ( t == TYPE_CHAR )
sprintf (p," ",cgi,(long)(*s));
else if ( t == TYPE_PRIORITY )
printDropDown ( MAX_SPIDER_PRIORITIES , p , pend , cgi , *s ,
false , false );
else if ( t == TYPE_PRIORITY2 )
printDropDown ( MAX_SPIDER_PRIORITIES , p , pend , cgi , *s ,
true , true );
else if ( t == TYPE_RETRIES )
printDropDown ( 4 , p , pend , cgi , *s , false , false );
else if ( t == TYPE_PRIORITY_BOXES ) {
// print ALL the checkboxes when we get the first parm
if ( j != 0 ) return p;
printCheckBoxes ( MAX_SPIDER_PRIORITIES , p , pend , cgi , s );
}
else if ( t == TYPE_CMD )
// if cast=0 it will be executed, otherwise it will be
// broadcasted with cast=1 to all hosts and they will all
// execute it
sprintf ( p, ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,cast,m->m_title);
else if ( t == TYPE_FLOAT )
sprintf (p," ",cgi,*(float *)s);
else if ( t == TYPE_IP ) {
if ( m->m_max > 0 && j == jend )
sprintf (p," ",cgi);
else
sprintf (p," ",cgi,iptoa(*(long *)s));
}
else if ( t == TYPE_LONG )
sprintf (p," ",cgi,*(long *)s);
else if ( t == TYPE_LONG_CONST )
sprintf (p,"%li",*(long *)s);
else if ( t == TYPE_LONG_LONG )
sprintf (p," ",cgi,*(long long *)s);
else if ( t == TYPE_STRING || t == TYPE_STRINGNONEMPTY ) {
long size = m->m_size;
if ( size > 25 ) size = 25;
sprintf (p," ");
}
else if ( t == TYPE_STRINGBOX ) {
sprintf(p,"\n");
}
else if ( t == TYPE_CONSTANT )
sprintf (p,"%s",m->m_title);
else if ( t == TYPE_MONOD2 )
sprintf ( p , "%li" , j / 2 );
else if ( t == TYPE_MONOM2 )
sprintf ( p , "%li" , j % 2 );
else if ( t == TYPE_RULESET ) ;
// subscript is already included in "cgi"
//p = g_pages.printRulesetDropDown ( p ,
// pend ,
// user ,
// cgi ,
// *(long *)s , // selected
// -1 ); // subscript
else if ( t == TYPE_TIME ) {
//time is stored as a string
//if time is not stored properly, just write 00:00
if ( s[2] != ':' )
strncpy ( s, "00:00", 5 );
char hr[3];
char min[3];
memcpy ( hr, s, 2 );
memcpy ( min, s + 3, 2 );
hr[2] = '\0';
min[2] = '\0';
// print the time in the input forms
sprintf(p,
" h "
" m " ,
cgi ,
hr ,
cgi ,
min );
}
else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
// time is stored as long
long ct = *(long *)s;
// get the time struct
struct tm *tp = gmtime ( (time_t *)&ct ) ;
// set the "selected" month for the drop down
char *ss[12];
for ( long i = 0 ; i < 12 ; i++ ) ss[i]="";
long month = tp->tm_mon;
if ( month < 0 || month > 11 ) month = 0; // Jan
ss[month] = " selected";
// print the date in the input forms
sprintf(p,
" "
""
"Jan"
" Feb"
" Mar"
" Apr"
" May"
" Jun"
" Jul"
" Aug"
" Sep"
" Oct"
" Nov"
" Dec"
" \n"
" "
" "
" h "
" m "
" s" ,
cgi ,
(long)tp->tm_mday ,
cgi ,
ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[8],
ss[9],ss[10],ss[11],
cgi ,
(long)tp->tm_year + 1900 ,
cgi ,
(long)tp->tm_hour ,
cgi ,
(long)tp->tm_min ,
cgi ,
(long)tp->tm_sec );
}
else if ( t == TYPE_SITERULE ) {
// print the siterec rules as a drop down
char *ss[5];
for ( long i = 0; i < 5; i++ ) ss[i] = "";
long v = *(long*)s;
if ( v < 0 || v > 4 ) v = 0;
ss[v] = " selected";
sprintf ( p, ""
"Hostname"
" Path Depth 1"
" Path Depth 2"
" Path Depth 3"
" \n",
cgi, ss[0], ss[1], ss[2], ss[3] );
}
p += gbstrlen ( p );
// end the input cell
sprintf ( p , " \n");
p += gbstrlen ( p );
// "insert above" link? used for arrays only, where order matters
if ( m->m_addin && j < jend ) {
sprintf ( p , "insert \n",coll,cgi );
p += gbstrlen ( p );
}
// does next guy start a new row?
bool lastInRow = true; // assume yes
if ( mm+1m_rowid>=0&&m_parms[mm+1].m_rowid==m->m_rowid)
lastInRow = false;
if ( ((s_count-1) % nc) != (nc-1) ) lastInRow = false;
// . display the remove link for arrays if we need to
// . but don't display if next guy does NOT start a new row
if ( m->m_max > 1 && lastInRow &&
m->m_page != PAGE_PRIORITIES ) {
if ( j < jend )
sprintf ( p , ""
"remove \n",coll,cgi );
else
sprintf ( p , " \n");
p += gbstrlen ( p );
}
if ( lastInRow ) sprintf ( p , " \n");
p += gbstrlen ( p );
return p;
}
*/
bool Parms::printParm ( SafeBuf* sb,
//long user ,
char *username,
Parm *m ,
long mm , // m = &m_parms[mm]
long j ,
long jend ,
char *THIS ,
char *coll ,
char *pwd ,
char *bg ,
long nc ,
long pd ,
bool lastRow ) {
bool status = true;
// do not print if no permissions
if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
return status;
//if ( m->m_perms != 0 && (m->m_perms & user) == 0 ) return status;
// do not print some if #define _CLIENT_ is true
#ifdef _GLOBALSPEC_
if ( m->m_priv == 2 ) return status;
if ( m->m_priv == 3 ) return status;
#elif _CLIENT_
if ( m->m_priv ) return status;
#elif _METALINCS_
if ( m->m_priv == 2 ) return status;
if ( m->m_priv == 3 ) return status;
#endif
// priv of 4 means do not print at all
if ( m->m_priv == 4 ) return status;
// what type of parameter?
char t = m->m_type;
// point to the data in THIS
char *s = THIS + m->m_off + m->m_size * j ;
// . if an array, passed our end, this is the blank line at the end
// . USE THIS EMPTY/DEFAULT LINE TO ADD NEW DATA TO AN ARRAY
// . make at least as big as a long long
if ( j >= jend ) s = "\0\0\0\0\0\0\0\0";
// delimit each cgi var if we need to
if ( m->m_cgi && gbstrlen(m->m_cgi) > 45 ) {
log(LOG_LOGIC,"admin: Cgi variable is TOO big.");
char *xx = NULL; *xx = 0;
}
char cgi[64];
if ( m->m_cgi ) {
if ( j > 0 ) sprintf ( cgi , "%s%li" , m->m_cgi , j );
else sprintf ( cgi , "%s" , m->m_cgi );
}
// . display title and description of the control/parameter
// . the input cell of some parameters are colored
char *color = "";
if ( t == TYPE_CMD || t == TYPE_BOOL2 )
color = " bgcolor=#6060ff";
if ( t == TYPE_BOOL ) {
if ( *s ) color = " bgcolor=#00ff00";
else color = " bgcolor=#ff0000";
}
if ( t == TYPE_BOOL || t == TYPE_BOOL2 ) {
// disable controls not allowed in read only mode
if ( g_conf.m_readOnlyMode && m->m_rdonly )
color = " bgcolor=#ffff00";
}
bool firstInRow = false;
if ( (s_count % nc) == 0 ) firstInRow = true;
s_count++;
if ( mm > 0 && m->m_rowid >= 0 && m_parms[mm-1].m_rowid == m->m_rowid )
firstInRow = false;
long firstRow = 0;
//if ( m->m_page==PAGE_PRIORITIES ) firstRow = MAX_PRIORITY_QUEUES - 1;
// . use a separate table for arrays
// . make title and description header of that table
// . do not print all headers if not m_hdrs, a special case for the
// default line in the url filters table
if ( j == firstRow && m->m_rowid >= 0 && firstInRow && m->m_hdrs ) {
// print description as big comment
if ( m->m_desc && pd == 1 ) {
sb->safePrintf ( "\n" );
//p = htmlEncode ( p , pend , m->m_desc ,
// m->m_desc + gbstrlen ( m->m_desc ) );
sb->safePrintf ( "%s" , m->m_desc );
sb->safePrintf ( " \n" );
}
// # column
// do not show this for PAGE_PRIORITIES it is confusing
if ( m->m_max > 1 ) {
//m->m_page != PAGE_PRIORITIES ) {
sb->safePrintf ( "# \n" );
}
// print all headers
for ( long k = mm ;
km_rowid; k++ ) {
// parm shortcut
Parm *mk = &m_parms[k];
sb->safePrintf ( "" );
// if its of type checkbox in a table make it
// toggle them all on/off
if ( mk->m_type == TYPE_CHECKBOX &&
mk->m_page == PAGE_FILTERS ) {
sb->safePrintf("",
m_parms[k].m_cgi, m->m_max);
}
sb->safePrintf ( "%s ", m_parms[k].m_title );
if ( mk->m_type == TYPE_CHECKBOX &&
mk->m_page == PAGE_FILTERS )
sb->safePrintf(" ");
/*
if ( m->m_page == PAGE_PRIORITIES &&
m_parms[k].m_type == TYPE_CHECKBOX)
sb->safePrintf("(toggle) ",
m_parms[k].m_cgi, m->m_max);
*/
sb->safePrintf (" \n");
}
sb->safePrintf ( " \n" ); // mdw added
}
// print row start for single parm
if ( m->m_max <= 1 && ! m->m_hdrs ) {
if ( firstInRow ) {
sb->safePrintf ( "" , bg );
}
sb->safePrintf ( " " , 100/nc/2 );
}
// print the title/description in current table for non-arrays
if ( m->m_max <= 1 && m->m_hdrs ) { // j == 0 && m->m_rowid < 0 ) {
if ( firstInRow )
sb->safePrintf ( " ",bg);
if ( t == TYPE_STRINGBOX ) {
sb->safePrintf ( ""
"%s ",m->m_title );
if ( pd )
status &= sb->htmlEncode (m->m_desc,
gbstrlen(m->m_desc),
false);
sb->safePrintf ( " \n" );
}
else {
sb->safePrintf ( "" //" "
"%s ",
3*100/nc/2/4,m->m_title );
if ( pd )
status &= sb->htmlEncode (m->m_desc,
gbstrlen(m->m_desc),
false);
// and cgi parm if it exists
if ( m->m_def && m->m_scgi )
sb->safePrintf(" Cgi: %s.", m->m_scgi );
// and default value if it exists
if ( m->m_def && m->m_def[0] && t != TYPE_CMD ) {
char *d = m->m_def;
if ( t == TYPE_BOOL ) {
if ( d[0]=='0' ) d = "NO";
else d = "YES";
sb->safePrintf ( " Default: %s.",d);
}
else {
sb->safePrintf (" Default: ");
status &= sb->htmlEncode (d,
gbstrlen(d),
false);
}
}
sb->safePrintf ( " \n" ,
color , 100/nc/2/4 );
}
}
// . print number in row if array, start at 1 for clarity's sake
// . used for url filters table, etc.
if ( m->m_max > 1 ) {
// but if it is in same row as previous, do not repeat it
// for this same row, silly
if ( firstInRow ) // && m->m_page != PAGE_PRIORITIES )
sb->safePrintf ( " %li \n", j );//j+1
else if ( firstInRow )
sb->safePrintf ( " " );
else
sb->safePrintf ( " " );
}
long cast = m->m_cast;
if ( g_proxy.isProxy() ) cast = 0;
// print the input box
if ( t == TYPE_BOOL ) {
char *tt, *v;
if ( *s ) { tt = "YES"; v = "0"; }
else { tt = "NO" ; v = "1"; }
if ( g_conf.m_readOnlyMode && m->m_rdonly )
sb->safePrintf ( "read-only mode " );
// if cast=1, command IS broadcast to all hosts
else
sb->safePrintf ( ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,v,cast,tt);
}
else if ( t == TYPE_BOOL2 ) {
if ( g_conf.m_readOnlyMode && m->m_rdonly )
sb->safePrintf ( "read-only mode ");
// always use m_def as the value for TYPE_BOOL2
else
sb->safePrintf ( ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,m->m_def, m->m_title);
}
else if ( t == TYPE_CHECKBOX ) {
char *ddd = "";
if ( *s ) ddd = " checked";
// this is part of the "HACK" fix below. you have to
// specify the cgi parm in the POST request, and unchecked
// checkboxes are not included in the POST request.
if ( lastRow && m->m_page == PAGE_FILTERS )
sb->safePrintf(" safePrintf(""
" m_page == PAGE_FILTERS)
sb->safePrintf("id=id_%s ",cgi);
sb->safePrintf("value=1 name=%s%s>"
" ",
cgi,ddd);
}
else if ( t == TYPE_CHAR )
sb->safePrintf (" ",cgi,(long)(*s));
/* else if ( t == TYPE_CHAR2 )
sprintf (p," ",cgi,*(char*)s);*/
else if ( t == TYPE_PRIORITY )
printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s ,
false , false );
else if ( t == TYPE_PRIORITY2 )
printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s ,
true , true );
else if ( t == TYPE_RETRIES )
printDropDown ( 4 , sb , cgi , *s , false , false );
else if ( t == TYPE_PRIORITY_BOXES ) {
// print ALL the checkboxes when we get the first parm
if ( j != 0 ) return status;
printCheckBoxes ( MAX_SPIDER_PRIORITIES , sb , cgi , s );
}
else if ( t == TYPE_CMD )
// if cast=0 it will be executed, otherwise it will be
// broadcasted with cast=1 to all hosts and they will all
// execute it
sb->safePrintf ( ""
"%s ",
g_pages.getPath(m->m_page),coll,
cgi,cast,m->m_title);
else if ( t == TYPE_FLOAT )
sb->safePrintf (" ",cgi,*(float *)s);
else if ( t == TYPE_IP ) {
if ( m->m_max > 0 && j == jend )
sb->safePrintf (" ",cgi);
else
sb->safePrintf (" ",cgi,iptoa(*(long *)s));
}
else if ( t == TYPE_LONG )
sb->safePrintf (" ",cgi,*(long *)s);
else if ( t == TYPE_LONG_CONST )
sb->safePrintf ("%li",*(long *)s);
else if ( t == TYPE_LONG_LONG )
sb->safePrintf (" ",cgi,*(long long *)s);
else if ( t == TYPE_STRING || t == TYPE_STRINGNONEMPTY ) {
long size = m->m_size;
// give regular expression box on url filters page more room
if ( m->m_page == PAGE_FILTERS ) {
if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
}
else {
if ( size > 20 ) size = 20;
}
sb->safePrintf (" dequote ( s , gbstrlen(s) );
sb->safePrintf ("\">");
}
else if ( t == TYPE_SAFEBUF ) {
long size = m->m_size;
// give regular expression box on url filters page more room
if ( m->m_page == PAGE_FILTERS ) {
if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
}
else {
if ( size > 20 ) size = 20;
}
sb->safePrintf (" dequote ( s , gbstrlen(s) );
SafeBuf *sx = (SafeBuf *)s;
sb->dequote ( sx->getBufStart() , sx->length() );
sb->safePrintf ("\">");
}
else if ( t == TYPE_STRINGBOX ) {
sb->safePrintf("\n");
}
else if ( t == TYPE_CONSTANT )
sb->safePrintf ("%s",m->m_title);
else if ( t == TYPE_MONOD2 )
sb->safePrintf ("%li",j / 2 );
else if ( t == TYPE_MONOM2 ) {
/*
if ( m->m_page == PAGE_PRIORITIES ) {
if ( j % 2 == 0 ) sb->safePrintf ("old");
else sb->safePrintf ("new");
}
else
*/
sb->safePrintf ("%li",j % 2 );
}
else if ( t == TYPE_RULESET ) ;
// subscript is already included in "cgi"
//g_pages.printRulesetDropDown ( sb ,
// user ,
// cgi ,
// *(long *)s , // selected
// -1 ); // subscript
else if ( t == TYPE_TIME ) {
//time is stored as a string
//if time is not stored properly, just write 00:00
if ( s[2] != ':' )
strncpy ( s, "00:00", 5 );
char hr[3];
char min[3];
memcpy ( hr, s, 2 );
memcpy ( min, s + 3, 2 );
hr[2] = '\0';
min[2] = '\0';
// print the time in the input forms
sb->safePrintf(" h "
" m " ,
cgi ,
hr ,
cgi ,
min );
}
else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
// time is stored as long
long ct = *(long *)s;
// get the time struct
struct tm *tp = gmtime ( (time_t *)&ct ) ;
// set the "selected" month for the drop down
char *ss[12];
for ( long i = 0 ; i < 12 ; i++ ) ss[i]="";
long month = tp->tm_mon;
if ( month < 0 || month > 11 ) month = 0; // Jan
ss[month] = " selected";
// print the date in the input forms
sb->safePrintf(
" "
""
"Jan"
" Feb"
" Mar"
" Apr"
" May"
" Jun"
" Jul"
" Aug"
" Sep"
" Oct"
" Nov"
" Dec"
" \n"
" "
" "
" h "
" m "
" s" ,
cgi ,
(long)tp->tm_mday ,
cgi ,
ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[8],
ss[9],ss[10],ss[11],
cgi ,
(long)tp->tm_year + 1900 ,
cgi ,
(long)tp->tm_hour ,
cgi ,
(long)tp->tm_min ,
cgi ,
(long)tp->tm_sec );
/*
if ( t == TYPE_DATE2 ) {
p += gbstrlen ( p );
// a long after the long is used for this
long ct = *(long *)(THIS+m->m_off+4);
char *ss = "";
if ( ct ) ss = " checked";
sprintf ( p , " use current "
"time\n",cgi,ss);
}
*/
}
else if ( t == TYPE_SITERULE ) {
// print the siterec rules as a drop down
char *ss[5];
for ( long i = 0; i < 5; i++ ) ss[i] = "";
long v = *(long*)s;
if ( v < 0 || v > 4 ) v = 0;
ss[v] = " selected";
sb->safePrintf ( ""
"Hostname"
" Path Depth 1"
" Path Depth 2"
" Path Depth 3"
" \n",
cgi, ss[0], ss[1], ss[2], ss[3] );
}
// end the input cell
sb->safePrintf ( " \n");
// "insert above" link? used for arrays only, where order matters
if ( m->m_addin && j < jend ) {
sb->safePrintf ( "insert \n",coll,cgi );
}
// does next guy start a new row?
bool lastInRow = true; // assume yes
if ( mm+1m_rowid>=0&&m_parms[mm+1].m_rowid==m->m_rowid)
lastInRow = false;
if ( ((s_count-1) % nc) != (nc-1) ) lastInRow = false;
// . display the remove link for arrays if we need to
// . but don't display if next guy does NOT start a new row
if ( m->m_max > 1 && lastInRow ) {
// m->m_page != PAGE_PRIORITIES ) {
// show remove link?
bool show = true;
if ( j >= jend ) show = false;
// get # of rows
long *nr = (long *)((char *)THIS + m->m_off - 4);
// are we the last row?
bool lastRow = false;
// yes, if this is true
if ( j == *nr - 1 ) lastRow = true;
// do not allow removal of last default url filters rule
if ( lastRow && !strcmp(m->m_cgi,"fsp")) show = false;
if ( show )
sb->safePrintf (""
"remove \n",coll,cgi );
else
sb->safePrintf ( " \n");
}
if ( lastInRow ) sb->safePrintf (" \n");
return status;
}
// get the object of our desire
char *Parms::getTHIS ( HttpRequest *r , long page ) {
// if not master controls, must be a collection rec
if ( page < PAGE_OVERVIEW ) return (char *)&g_conf;
char *coll = r->getString ( "c" );
if ( ! coll || ! coll[0] )
//coll = g_conf.m_defaultColl;
coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
CollectionRec *cr = g_collectiondb.getRec ( coll );
if ( ! cr ) log("admin: Collection \"%s\" not found.",
r->getString("c") );
return (char *)cr;
}
//because this can do commands which block, now we pass a callback
//with the request and socket in case they want to block until
//it completes.
bool Parms::setFromRequest ( HttpRequest *r ,
//long user ,
TcpSocket* s,
bool (*callback)(TcpSocket *s , HttpRequest *r)) {
bool retval = true;
// get the page from the path... like /sockets --> PAGE_SOCKETS
long page = g_pages.getDynamicPageNumber ( r );
// is it a collection?
char *THIS = getTHIS ( r , page );
// ensure valid
if ( ! THIS ) {
// it is null when no collection explicitly specified...
log(LOG_LOGIC,"admin: THIS is null for page %li.",page);
return retval;
}
// . clear all the checkbox parms for this page
// . if they are unchecked, no cgi parm is provided by the browser!!
char *action = r->getString ( "action" );
if ( action && strcmp(action,"submit" )==0 &&
(page == PAGE_SPIDER || page==PAGE_FILTERS) ) {
// || page == PAGE_PRIORITIES) ) {
for ( long i = 0 ; i < m_numParms ; i++ ) {
Parm *m = &m_parms[i];
if ( m->m_page != page ) continue;
if ( m->m_type != TYPE_PRIORITY_BOXES &&
m->m_type != TYPE_CHECKBOX ) continue;
// clear it
for ( long j = 0 ; j < m->m_max ; j++ ) // m_fixed
*(THIS + m->m_off + j) = 0;
}
}
// JAB - invalidate the regex if URL FILTER submit is pressed
//if ( action && strcmp(action,"submit" )==0 && page == PAGE_FILTERS) {
// if ( THIS != (char *)&g_conf )
// ((CollectionRec*) THIS)->invalidateRegEx ();
//}
// well, recompute if we have a doc:quality in any of the rules
// so Msg16 will know to get outlink qualities or not. saves time
// to not get them.
if ( action && strcmp(action,"submit" )==0 &&
page == PAGE_FILTERS &&
THIS != (char *)&g_conf ) {
// cast it
CollectionRec *cr = (CollectionRec *)THIS;
// get it
SpiderColl *sc = cr->m_spiderColl;
// right now we blindly force a reload of this spider cache
// since the url filters might have MATERIALLY changed,
// although later on we might want to check for that.
//if ( sc ) sc->m_needsReload = true;
if ( sc ) sc->m_lastUrlFiltersUpdate = getTimeGlobal();
// need to recompute this!
if ( sc ) sc->m_ufnMapValid = false;
// rebuild the waiting tree
if ( sc ) {
// reset this cache
clearUfnTable();
// activate a scan if not already activated
sc->m_waitingTreeNeedsRebuild = true;
// if a scan is ongoing, this will re-set it
sc->m_nextKey2.setMin();
}
}
// reset the sitedb filters table if submitted changes
//if ( action && strcmp(action,"submit" )==0 && page == PAGE_RULES ) {
// if ( THIS != (char *)&g_conf )
// ((CollectionRec*) THIS)->m_updateSiteRulesTable=1;
//}
// loop through cgi parms
for ( long i = 0 ; i < r->getNumFields() ; i++ ) {
// get cgi parm name
char *field = r->getField ( i );
long flen = r->getFieldLen ( i );
// get index into array, if it is an array, otherwise, an = -1
char *d = field + flen ;
while ( d > field && is_digit ( *(d-1) ) ) d--;
long an = 0;
if ( is_digit ( *d ) ) an = atol ( d );
// ensure we are valid
if ( an < 0 ) {
log("admin: Invalid removal of element"
"%li in array.", an);
continue;
}
char cc = *d;
*d = '\0';
bool insert = false;
if ( strncmp ( field , "ins_" , 4 ) == 0 ) {
insert = true;
field += 4;
}
bool remove = false;
// if it begins with "rm_" it is an array removal request
if ( strncmp ( field , "rm_" , 3 ) == 0 ) {
remove = true;
field +=3 ;
}
// find in parms list
long j;
Parm *m;
for ( j = 0 ; j < m_numParms ; j++ ) {
// get it
m = &m_parms[j];
// . skip if offset is negative, that means none
// . no, could be a command
//if ( m->m_off < 0 ) continue;
// skip if no cgi parm, may not be configurable now
if ( ! m->m_cgi ) continue;
if ( m->m_type == TYPE_TIME ){
char *cgi = m->m_cgi;
long len = gbstrlen(cgi);
if (strncmp(field,cgi,len)) continue;
// if not the hour skip it
if (flen != len + 2 ) continue;
if (field[flen-2] != 'h' ) continue;
if (field[flen-1] != 'r' ) continue;
// we got a match
}
// . compare up to first parm's cgi field name
// . date parms append letters to their base for
// the year,month,day,hour,minute
else if ( m->m_type==TYPE_DATE ||
m->m_type==TYPE_DATE2 ) {
char *cgi = m->m_cgi;
long len = gbstrlen(cgi);
if (strncmp(field,cgi,len)) continue;
// if not the year skip it
if (flen != len + 2 ) continue;
if (field[flen-2] != 'y' ) continue;
if (field[flen-1] != 'r' ) continue;
// we got a match
}
// otherwise, must match the cgi name exactly
else if ( strcmp ( field,m->m_cgi ) != 0 ) continue;
// make sure we got the right parms for what we want
if (THIS == (char *)&g_conf && m->m_obj != OBJ_CONF )
continue;
if (THIS != (char *)&g_conf && m->m_obj == OBJ_CONF )
continue;
// got it
break;
}
// restore the field name in the cgi part of the url
*d = cc;
// bail if the cgi field is not in the parms list
if ( j >= m_numParms ) continue;
// parm "m" must be from the same page as the page we are on
// UNLESS parm is PAGE_NONE. that way CommandPowerNotice()
// will work.
bool onPage = true;
if ( m->m_page != page && m->m_page != PAGE_NONE )
onPage = false;
// if showing the url filters page on the crawlbot/diffbot page
// then allow this to go through!
if ( ! onPage &&
m->m_page == PAGE_FILTERS &&
page == PAGE_CRAWLBOT )
onPage = true;
// if parm is not on the page we are viewing, skip it!
if ( ! onPage ) continue;
// insert row above it if we should (only applicable to
// non-fixed arrays)
if ( insert && m->m_max > 1 ) {
// get everyone in his row
long a = j;
long b = j;
long rowid = m_parms[j].m_rowid;
while ( rowid>=0 && a-1>=0 &&
m_parms[a-1].m_rowid==rowid ) a--;
while ( rowid>=0 && b+1m_needsSave = true;
continue;
}
// remove it if we should (only applicable to non-fixed arrays)
if ( remove && m->m_max > 1 ) {
// get everyone in his row
long a = j;
long b = j;
long rowid = m_parms[j].m_rowid;
while ( rowid>=0 && a-1>=0 &&
m_parms[a-1].m_rowid==rowid ) a--;
while ( rowid>=0 && b+1m_needsSave = true;
continue;
}
// value of cgi parm
char *v;
// used to build a proper date or time from various cgi vars
char ddd[64];
if ( m->m_type == TYPE_TIME ) {
char *cgi = m->m_cgi;
// set the value
char cgihr [10];
char cgimin [10];
sprintf ( cgihr , "%shr" , cgi );
sprintf ( cgimin , "%smin" , cgi );
long hr = r->getLong (cgihr , 0 ) ;
long min = r->getLong (cgimin , 0 ) ;
if ( hr < 0 || hr > 23 ) hr = 0;
if ( min < 0 || min > 59 ) min = 0;
sprintf ( ddd , "%02li:%02li", hr, min );
v = ddd;
}
// if we matched a date parm, set the value special
else if ( m->m_type == TYPE_DATE || m->m_type == TYPE_DATE2 ) {
char *cgi = m->m_cgi;
// set the value
char cgiyr [10];
char cgimon [10];
char cgiday [10];
char cgihr [10];
char cgimin [10];
char cgisec [10];
sprintf ( cgiyr , "%syr" , cgi );
sprintf ( cgimon , "%smon" , cgi );
sprintf ( cgiday , "%sday" , cgi );
sprintf ( cgihr , "%shr" , cgi );
sprintf ( cgimin , "%smin" , cgi );
sprintf ( cgisec , "%ssec" , cgi );
static char *mnames[] = {
"Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"};
long mm = r->getLong(cgimon , 0);
if ( mm < 0 || mm > 11 ) mm = 0;
sprintf ( ddd , "%li %s %li %li:%li:%li",
r->getLong (cgiday , 0 ) ,
mnames[mm],
r->getLong (cgiyr , 2004 ) ,
r->getLong (cgihr , 0 ) ,
r->getLong (cgimin , 0 ) ,
r->getLong (cgisec , 0 ) );
v = ddd;
}
// get the new value (null terminated)
else v = r->getValue ( i );
// . skip if no value was provided
// . unless it was a string! so we can make them empty.
if ( v[0] == '\0' &&
m->m_type != TYPE_STRING &&
m->m_type != TYPE_STRINGBOX ) continue;
// if a command, do it
if ( m->m_type == TYPE_CMD ) {
if(! m->m_func (s, r, callback) ) {
//sanity check
if(!retval) {
//this means that we are trying to
//do two commands which block, who
//calls the callback?
log(LOG_LOGIC,"admin: two blocking"
"commands issued at the same"
"time.");
char *xx = NULL; *xx = 0;
}
retval = false;
}
continue;
}
// skip if offset is negative, that means none
if ( m->m_off < 0 ) continue;
// skip if no permission
//if ( (m->m_perms & user) == 0 ) continue;
//if (m->m_type == TYPE_PRIORITY_BOXES)
// log("PRIORITY BOX");
// set it
setParm ( (char *)THIS , m, j, an, v, false/*not html enc.*/,
true );
// need to save it
if ( THIS != (char *)&g_conf )
((CollectionRec *)THIS)->m_needsSave = true;
// . ensure our array element count is at least that
// . do we have an array? skip if not.
//if ( m->m_max <= 0 ) continue;
// . is this element we're adding bumping up the count?
// . array count is 4 bytes before the array
//char *pos = (char *)THIS + m->m_off - 4 ;
// set the count to it if it is bigger than current count
//if ( an + 1 > *(long *)pos ) *(long *)pos = an + 1; mdw
}
// so g_spiderCache can reload if sameDomainWait, etc. have changed
g_collectiondb.updateTime();
return retval;
}
void Parms::insertParm ( long i , long an , char *THIS ) {
Parm *m = &m_parms[i];
// . shift everyone above down
// . first long at offset is always the count
// for arrays
char *pos = (char *)THIS + m->m_off ;
long num = *(long *)(pos - 4);
// ensure we are valid
if ( an >= num || an < 0 ) {
log("admin: Invalid insertion of element "
"%li in array of size %li for \"%s\".",
an,num,m->m_title);
return;
}
// also ensure that we have space to put the parm in, because in
// case of URl filters, it is bounded by MAX_FILTERS
if ( num >= MAX_FILTERS ){
log("admin: Invalid insert of element %li, array is full "
"in size %li for \"%s\".",an, num, m->m_title);
return;
}
// point to the place where the element is to be inserted
char *src = pos + m->m_size * an;
//point to where it is to be moved
char *dst = pos + m->m_size * ( an + 1 );
// how much to move
long size = ( num - an ) * m->m_size ;
// move them
memmove ( dst , src , size );
// inc the count
*(long *)(pos-4) = (*(long *)(pos-4)) + 1;
// put the defaults in the inserted line
setParm ( (char *)THIS , m , i , an , m->m_def , false ,false );
}
void Parms::removeParm ( long i , long an , char *THIS ) {
Parm *m = &m_parms[i];
// . shift everyone above down
// . first long at offset is always the count
// for arrays
char *pos = (char *)THIS + m->m_off ;
long num = *(long *)(pos - 4);
// ensure we are valid
if ( an >= num || an < 0 ) {
log("admin: Invalid removal of element "
"%li in array of size %li for \"%s\".",
an,num,m->m_title);
return;
}
// point to the element being removed
char *dst = pos + m->m_size * an;
// then point to the good stuf
char *src = pos + m->m_size * (an+1);
// how much to bury it with
long size = (num - an - 1 ) * m->m_size ;
// bury it
memcpy ( dst , src , size );
// dec the count
*(long *)(pos-4) = (*(long *)(pos-4)) - 1;
}
void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
bool isHtmlEncoded , bool fromRequest ) {
// . this is just for setting CollectionRecs, so skip if offset < 0
// . some parms are just for SearchInput (search parms)
if ( m->m_off < 0 ) return;
float oldVal = 0;
float newVal = 0;
if ( ! s ) {
s = "0";
char *tit = m->m_title;
if ( ! tit || ! tit[0] ) tit = m->m_xml;
log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value.",
tit);
//char *xx = NULL; *xx = 0;
}
// sanity check
if ( &m_parms[mm] != m ) {
log(LOG_LOGIC,"admin: Not sane parameters.");
char *xx = NULL; *xx = 0;
}
// if attempting to add beyond array max, bail out
if ( j >= m->m_max && j >= m->m_fixed ) {
log ( "admin: Attempted to set parm beyond limit. Aborting." );
return;
}
// if we are setting a guy in an array AND he is NOT the first
// in his row, ensure the guy before has a count of j+1 or more.
//
// crap, on the url filters page if you do not check "spidering
// enabled" checkbox when adding a new rule at the bottom of the
// table, , then the spidering enabled parameter does not transmit so
// the "respider frequency" ends up checking the "spider enabled"
// array whose "count" was not incremented like it should have been.
// HACK: make new line at bottom always have spidering enabled
// checkbox set and make it impossible to unset.
if ( m->m_max > 1 && m->m_rowid >= 0 && mm > 0 &&
m_parms[mm-1].m_rowid == m->m_rowid ) {
char *pos = (char *)THIS + m_parms[mm-1].m_off - 4 ;
long maxcount = *(long *)pos;
if ( j >= maxcount ) {
log("admin: parm before \"m\" is limiting us");
//log("admin: try nuking the url filters or whatever "
// "and re-adding");
return;
}
}
// ensure array count at least j+1
if ( m->m_max > 1 ) {
// . is this element we're adding bumping up the count?
// . array count is 4 bytes before the array
char *pos = (char *)THIS + m->m_off - 4 ;
// set the count to it if it is bigger than current count
if ( j + 1 > *(long *)pos ) *(long *)pos = j + 1;
}
char t = m->m_type;
if ( t == TYPE_CHAR ||
t == TYPE_CHAR2 ||
t == TYPE_CHECKBOX ||
t == TYPE_BOOL ||
t == TYPE_BOOL2 ||
t == TYPE_PRIORITY ||
t == TYPE_PRIORITY2 ||
t == TYPE_PRIORITY_BOXES ||
t == TYPE_RETRIES ||
t == TYPE_FILTER ) {
if ( fromRequest && *(char *)(THIS + m->m_off + j) == atol(s))
return;
if ( fromRequest)oldVal = (float)*(char *)(THIS + m->m_off +j);
*(char *)(THIS + m->m_off + j) = atol ( s );
newVal = (float)*(char *)(THIS + m->m_off + j);
goto changed; }
else if ( t == TYPE_CMD ) {
log(LOG_LOGIC, "conf: Parms: TYPE_CMD is not a cgi var.");
return; }
else if ( t == TYPE_DATE2 || t == TYPE_DATE ) {
long v = (long)atotime ( s );
if ( fromRequest && *(long *)(THIS + m->m_off + 4*j) == v )
return;
*(long *)(THIS + m->m_off + 4*j) = v;
if ( v < 0 ) log("conf: Date for <%s> of \""
"%s\" is not in proper format like: "
"01 Jan 1980 22:45",m->m_xml,s);
goto changed; }
else if ( t == TYPE_FLOAT ) {
if( fromRequest &&
*(float *)(THIS + m->m_off + 4*j) == (float)atof ( s ) )
return;
// if changed within .00001 that is ok too, do not count
// as changed, the atof() has roundoff errors
//float curVal = *(float *)(THIS + m->m_off + 4*j);
//float newVal = atof(s);
//if ( newVal < curVal && newVal + .000001 >= curVal ) return;
//if ( newVal > curVal && newVal - .000001 <= curVal ) return;
if ( fromRequest ) oldVal = *(float *)(THIS + m->m_off + 4*j);
*(float *)(THIS + m->m_off + 4*j) = (float)atof ( s );
newVal = *(float *)(THIS + m->m_off + 4*j);
goto changed; }
else if ( t == TYPE_IP ) {
if ( fromRequest && *(long *)(THIS + m->m_off + 4*j) ==
(long)atoip (s,gbstrlen(s) ) )
return;
*(long *)(THIS + m->m_off + 4*j) = (long)atoip (s,gbstrlen(s) );
goto changed; }
else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
t == TYPE_SITERULE ) {
long v = atol ( s );
// min is considered valid if >= 0
if ( m->m_min >= 0 && v < m->m_min ) v = m->m_min;
if ( fromRequest && *(long *)(THIS + m->m_off + 4*j) == v )
return;
if ( fromRequest)oldVal=(float)*(long *)(THIS + m->m_off +4*j);
*(long *)(THIS + m->m_off + 4*j) = v;
newVal = (float)*(long *)(THIS + m->m_off + 4*j);
goto changed; }
else if ( t == TYPE_LONG_LONG ) {
if ( fromRequest &&
*(unsigned long long *)(THIS + m->m_off+8*j)==
strtoull(s,NULL,10))
return;
*(long long *)(THIS + m->m_off + 8*j) = strtoull(s,NULL,10);
goto changed; }
// like TYPE_STRING but dynamically allocates
else if ( t == TYPE_SAFEBUF ) {
long len = gbstrlen(s);
// no need to truncate since safebuf is dynamic
//if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
//char *dst = THIS + m->m_off + m->m_size*j ;
// point to the safebuf, in the case of an array of
// SafeBufs "j" is the # in the array, starting at 0
SafeBuf *sb = (SafeBuf *)(THIS+m->m_off+(j*sizeof(SafeBuf)) );
long oldLen = sb->length();
// why was this commented out??? we need it now that we
// send email alerts when parms change!
if ( fromRequest &&
! isHtmlEncoded && oldLen == len &&
memcmp ( sb->getBufStart() , s , len ) == 0 )
return;
// nuke it
sb->purge();
// this means that we can not use string POINTERS as parms!!
if ( ! isHtmlEncoded ) sb->safeMemcpy ( s , len );
else len = sb->htmlDecode (s,len,false,0);
// ensure null terminated
sb->nullTerm();
// null term it all
//dst[len] = '\0';
//sb->reserve ( 1 );
// null terminate but do not include as m_length so the
// memcmp() above still works right
//sb->m_buf[sb->m_length] = '\0';
// . might have to set length
// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
//if ( m->m_plen >= 0 )
// *(long *)(THIS + m->m_plen) = len ;
goto changed;
}
else if ( t == TYPE_STRING ||
t == TYPE_STRINGBOX ||
t == TYPE_STRINGNONEMPTY ||
t == TYPE_TIME ) {
long len = gbstrlen(s);
if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
char *dst = THIS + m->m_off + m->m_size*j ;
// why was this commented out??? we need it now that we
// send email alerts when parms change!
if ( fromRequest &&
! isHtmlEncoded && (long)gbstrlen(dst) == len &&
memcmp ( dst , s , len ) == 0 )
return;
// this means that we can not use string POINTERS as parms!!
if ( ! isHtmlEncoded ) memcpy ( dst , s , len );
else len = htmlDecode (dst , s,len,false,0);
dst[len] = '\0';
// . might have to set length
// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
if ( m->m_plen >= 0 )
*(long *)(THIS + m->m_plen) = len ;
goto changed; }
changed:
// tell gigablast the value is EXPLICITLY given -- no longer based
// on default.conf
if ( m->m_obj == OBJ_COLL ) ((CollectionRec *)THIS)->m_orig[mm] = 2;
// we do not recognize timezones corectly when this is serialized
// into coll.conf, it says UTC, which is ignored in HttpMime.cpp's
// atotime() function. and when we submit it i think we use the
// local time zone, so the values end up changing every time we
// submit!!! i think it might read it in as UTC then write it out
// as local time, or vice versa.
if ( t == TYPE_DATE || t == TYPE_DATE2 ) return;
// do not send if setting from startup
if ( ! fromRequest ) return;
// note it in the log
log("admin: parm \"%s\" changed value",m->m_title);
long long nowms = gettimeofdayInMillisecondsLocal();
// . note it in statsdb
// . record what parm change and from/to what value
g_statsdb.addStat ( 0, // niceness ,
"parm_change" ,
nowms,
nowms,
0 , // value
m->m_hash , // parmHash
oldVal,
newVal);
// only send email alerts if we are host 0 since everyone syncs up
// with host #0 anyway
if ( g_hostdb.m_hostId != 0 ) return;
// send an email alert notifying the admins that this parm was changed
// BUT ALWAYS send it if email alerts were just TURNED OFF
// ("sea" = Send Email Alerts)
if ( ! g_conf.m_sendEmailAlerts && strcmp(m->m_cgi,"sea") != 0 )
return;
// if spiders we turned on, do not send an email alert, cuz we
// turn them on when we restart the cluster
if ( strcmp(m->m_cgi,"se")==0 && g_conf.m_spideringEnabled )
return;
char tmp[1024];
Host *h0 = g_hostdb.getHost ( 0 );
long ip0 = 0;
if ( h0 ) ip0 = h0->m_ip;
sprintf(tmp,"%s: parm \"%s\" changed value",iptoa(ip0),m->m_title);
g_pingServer.sendEmail ( NULL , // Host ptr
tmp , // msg
true , // sendToAdmin
false , // oom?
false , // kernel error?
true , // parm change?
true );// force it? even if disabled?
// now the spider collection can just check the collection rec
//long long nowms = gettimeofdayInMilliseconds();
//((CollectionRec *)THIS)->m_lastUpdateTime = nowms;
return;
}
Parm *Parms::getParmFromParmHash ( long parmHash ) {
for ( long i = 0 ; i < m_numParms ; i++ ) {
Parm *m = &m_parms[i];
if ( m->m_hash != parmHash ) continue;
return m;
}
return NULL;
}
void Parms::setToDefault ( char *THIS ) {
// init if we should
init();
for ( long i = 0 ; i < m_numParms ; i++ ) {
Parm *m = &m_parms[i];
if ( m->m_type == TYPE_COMMENT ) continue;
if ( m->m_type == TYPE_MONOD2 ) continue;
if ( m->m_type == TYPE_MONOM2 ) continue;
if ( m->m_type == TYPE_CMD ) continue;
if (THIS == (char *)&g_conf && m->m_obj != OBJ_CONF ) continue;
if (THIS != (char *)&g_conf && m->m_obj == OBJ_CONF ) continue;
// sanity check, make sure it does not overflow
if ( m->m_obj != OBJ_CONF &&
m->m_off > (long)sizeof(CollectionRec)){
log(LOG_LOGIC,"admin: Parm in Parms.cpp should use "
"OBJ_COLL not OBJ_CONF");
char *xx = NULL; *xx = 0;
}
//if ( m->m_page == PAGE_PRIORITIES )
// log("hey");
// or
if ( m->m_page > PAGE_OVERVIEW &&
m->m_page != PAGE_NONE &&
m->m_obj == OBJ_CONF ) {
log(LOG_LOGIC,"admin: Page can not reference "
"g_conf and be declared AFTER PAGE_OVERVIEW in "
"Pages.h. Title=%s",m->m_title);
char *xx = NULL; *xx = 0;
}
// leave arrays empty, set everything else to default
if ( m->m_max <= 1 ) {
//if ( i == 282 ) // "query" parm
// log("hey");
setParm ( THIS , m, i, 0, m->m_def, false/*not enc.*/,
false );
((CollectionRec *)THIS)->m_orig[i] = 1;
//m->m_orig = 0; // set in setToDefaults()
}
// these are special, fixed size arrays
if ( m->m_fixed > 0 ) {
for ( long k = 0 ; k < m->m_fixed ; k++ ) {
setParm(THIS,m,i,k,m->m_def,false/*not enc.*/,
false);
//m->m_orig = 0; // set in setToDefaults()
((CollectionRec *)THIS)->m_orig[i] = 1;
}
continue;
}
// make array sizes 0
if ( m->m_max <= 1 ) continue;
// otherwise, array is not fixed size
char *s = THIS + m->m_off ;
// set count to 1 if a default is present
//if ( m->m_def[0] ) *(long *)(s-4) = 1;
//else *(long *)(s-4) = 0;
*(long *)(s-4) = 0;
}
}
// . returns false and sets g_errno on error
// . you should set your "THIS" to its defaults before calling this
bool Parms::setFromFile ( void *THIS ,
char *filename ,
char *filenameDef ) {
// make sure we're init'd
init();
// let em know
if ( THIS == &g_conf ) log (LOG_INIT,"conf: Reading %s." , filename );
// . let the log know what we are doing
// . filename is NULL if a call from CollectionRec::setToDefaults()
Xml xml;
char buf [ MAX_XML_CONF ];
if ( filename && ! setXmlFromFile (&xml,filename,buf,MAX_XML_CONF) )
return false;
// . all the collectionRecs have the same default file in
// the workingDir/collections/default.conf
// . so use our built in buffer for that
if ( THIS != &g_conf && ! m_isDefaultLoaded ) {
m_isDefaultLoaded = true;
File f;
f.set ( filenameDef );
if ( ! f.doesExist() ) {
log(LOG_INIT,
"admin: Default collection configuration file "
"%s was not found. Newly created collections "
"will use hard coded defaults.",f.getFilename());
goto skip;
}
if ( ! setXmlFromFile ( &m_xml2 ,
filenameDef ,
m_buf ,
MAX_XML_CONF ) ) return false;
}
skip:
long vlen;
char *v ;
//char c ;
long numNodes = xml.getNumNodes();
long numNodes2 = m_xml2.getNumNodes();
// now set THIS based on the parameters in the xml file
for ( long i = 0 ; i < m_numParms ; i++ ) {
// get it
Parm *m = &m_parms[i];
//log(LOG_DEBUG, "Parms: %s: parm: %s", filename, m->m_xml);
// . there are 2 object types, coll recs and g_conf, aka
// OBJ_COLL and OBJ_CONF.
// . make sure we got the right parms for what we want
if ( THIS == &g_conf && m->m_obj != OBJ_CONF ) continue;
if ( THIS != &g_conf && m->m_obj == OBJ_CONF ) continue;
// skip comments and command
if ( m->m_type == TYPE_COMMENT ) continue;
if ( m->m_type == TYPE_MONOD2 ) continue;
if ( m->m_type == TYPE_MONOM2 ) continue;
if ( m->m_type == TYPE_CMD ) continue;
if ( m->m_type == TYPE_CONSTANT ) continue;
// these are special commands really
if ( m->m_type == TYPE_BOOL2 ) continue;
//if ( strcmp ( m->m_xml , "users" ) == 0 )
// log("got it");
// we did not get one from first xml file yet
bool first = true;
// array count
long j = 0;
// node number
long nn = 0;
// a tmp thingy
char tt[1];
long nb;
long newnn;
loop:
// get xml node number of m->m_xml in the "xml" file
newnn = xml.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
#ifdef _GLOBALSPEC_
if ( m->m_priv == 2 ) continue;
if ( m->m_priv == 3 ) continue;
#elif _CLIENT_
// always use default value if client not allowed control of
if ( m->m_priv ) continue;
#elif _METALINCS_
if ( m->m_priv == 2 ) continue;
if ( m->m_priv == 3 ) continue;
#endif
// debug
//log("%s --> %li",m->m_xml,nn);
// try default xml file if none, but only if first try
if ( newnn < 0 && first ) goto try2;
// it is valid, use it
nn = newnn;
// set the flag, we've committed the array to the first file
first = false;
// otherwise, we had some in this file, but now we're out
if ( nn < 0 ) continue;
// . next node is the value of this tag
// . skip if none there
if ( nn + 1 >= numNodes ) continue;
// point to it
v = xml.getNode ( nn + 1 );
vlen = xml.getNodeLen ( nn + 1 );
// if a back tag... set the value to the empty string
if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
// now, extricate from the tag if we need to
if ( m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_SAFEBUF ||
m->m_type == TYPE_STRINGNONEMPTY ) {
char *oldv = v;
long oldvlen = vlen;
// if next guy is NOT a tag node, try the next one
if ( v[0] != '<' && nn + 2 < numNodes ) {
v = xml.getNode ( nn + 2 );
vlen = xml.getNodeLen ( nn + 2 );
}
// should be a
if ( vlen<12 || strncasecmp(v," tag found "
"for \"<%s>\" tag. Trying without CDATA.",
m->m_xml);
v = oldv;
vlen = oldvlen;
}
// point to the nugget
else {
v += 9;
vlen -= 12;
}
}
// get the value
//v = xml.getString ( nn , nn+2 , m->m_xml , &vlen );
// this only happens when tag is there, but without a value
if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
//c = v[vlen];
v[vlen]='\0';
if ( vlen == 0 ){
// . this is generally ok
// . this is spamming the log so i am commenting out! (MDW)
//log(LOG_INFO, "parms: %s: Empty value.", m->m_xml);
// Allow an empty string
//continue;
}
// now decode it into itself
nb = htmlDecode ( v , v , vlen , false ,0);
v[nb] = '\0';
// set our parm
setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
false );
// we were set from the explicit file
((CollectionRec *)THIS)->m_orig[i] = 2;
// go back
//v[vlen] = c;
// do not repeat same node
nn++;
// try to get the next node if we're an array
if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
// otherwise, if not an array, go to next parm
continue;
try2:
// get xml node number of m->m_xml in the "m_xml" file
nn = m_xml2.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
// otherwise, we had one in file, but now we're out
if ( nn < 0 ) {
// if it was ONLY a search input parm, with no
// default value that can be changed in the
// CollectionRec then skip it
if ( m->m_soff != -1 &&
m->m_off == -1 &&
m->m_smaxc == -1 )
continue;
// . if it is a string, like and default is
// NULL then don't worry about reporting it
// . no, just make the default "" then
//if ( m->m_type==TYPE_STRING && ! m->m_def) continue;
// bitch that it was not found
//if ( ! m->m_def[0] )
// log("conf: %s does not have <%s> tag. "
// "Ommitting.",filename,m->m_xml);
//else
if ( ! m->m_def ) //m->m_def[0] )
log("conf: %s does not have <%s> tag. Using "
"default value of \"%s\".", filename,
m->m_xml,m->m_def);
continue;
}
// . next node is the value of this tag
// . skip if none there
if ( nn + 1 >= numNodes2 ) continue;
// point to it
v = m_xml2.getNode ( nn + 1 );
vlen = m_xml2.getNodeLen ( nn + 1 );
// if a back tag... set the value to the empty string
if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
// now, extricate from the tag if we need to
if ( m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_STRINGNONEMPTY ) {
char *oldv = v;
long oldvlen = vlen;
// reset if not a tag node
if ( v[0] != '<' && nn + 2 < numNodes2 ) {
v = m_xml2.getNode ( nn + 2 );
vlen = m_xml2.getNodeLen ( nn + 2 );
}
// should be a
if ( vlen<12 || strncasecmp(v," tag found "
"for \"<%s>\" tag. Trying without CDATA.",
m->m_xml);
v = oldv;
vlen = oldvlen;
}
// point to the nugget
else {
v += 9;
vlen -= 12;
}
}
// get the value
//v = m_xml2.getString ( nn , nn+2 , m->m_xml , &vlen );
// this only happens when tag is there, but without a value
if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
//c = v[vlen];
v[vlen]='\0';
// now decode it into itself
nb = htmlDecode ( v , v , vlen , false,0);
v[nb] = '\0';
// set our parm
setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
false );
// we were set from the backup default file
((CollectionRec *)THIS)->m_orig[i] = 1;
// go back
//v[vlen] = c;
// do not repeat same node
nn++;
// try to get the next node if we're an array
if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
// otherwise, if not an array, go to next parm
continue;
}
// always make sure we got some admin security
if ( g_conf.m_numMasterIps <= 0 && g_conf.m_numMasterPwds <= 0 ) {
log(LOG_INFO,
"conf: No master IP or password provided. Using default "
"password 'footbar23'." );
//g_conf.m_masterIps[0] = atoip ( "64.139.94.202", 13 );
//g_conf.m_numMasterIps = 1;
strcpy ( g_conf.m_masterPwds[0] , "footbar23" );
g_conf.m_numMasterPwds = 1;
}
return true;
}
// returns false and sets g_errno on error
bool Parms::setXmlFromFile(Xml *xml, char *filename, char *buf, long bufSize){
File f;
f.set ( filename );
// is it too big?
long fsize = f.getFileSize();
if ( fsize > bufSize ) {
log ("conf: File size of %s is %li, must be "
"less than %li.",f.getFilename(),fsize,bufSize );
char *xx = NULL; *xx = 0;
}
// open it for reading
f.set ( filename );
if ( ! f.open ( O_RDONLY ) )
return log("conf: Could not open %s: %s.",
filename,mstrerror(g_errno));
// read in the file
long numRead = f.read ( buf , bufSize , 0 /*offset*/ );
f.close ( );
if ( numRead != fsize )
return log ("conf: Could not read %s : %s.",
filename,mstrerror(g_errno));
// null terminate it
buf [ fsize ] = '\0';
// . remove all comments in case they contain tags
// . if you have a # as part of your string, it must be html encoded,
// just like you encode < and >
char *s = buf;
char *d = buf;
while ( *s ) {
// . skip comments
// . watch out for html encoded pound signs though
if ( *s == '#' ) {
if (s>buf && *(s-1)=='&' && is_digit(*(s+1))) goto ok;
while ( *s && *s != '\n' ) s++;
continue;
}
// otherwise, transcribe over
ok:
*d++ = *s++;
}
*d = '\0';
bufSize = d - buf;
// . set to xml
// . use version of 0
return xml->set ( buf ,
bufSize ,
false , // ownData
0 , // allocSize
false , // pureXml?
0 );// version
}
#define MAX_CONF_SIZE 200000
// returns false and sets g_errno on error
bool Parms::saveToXml ( char *THIS , char *f ) {
if ( g_conf.m_readOnlyMode ) return true;
// print into buffer
char buf[MAX_CONF_SIZE];
char *p = buf;
char *pend = buf + MAX_CONF_SIZE;
long len ;
long n ;
File ff ;
long j ;
long count;
char *s;
// now set THIS based on the parameters in the xml file
for ( long i = 0 ; i < m_numParms ; i++ ) {
// get it
Parm *m = &m_parms[i];
// . there are 2 object types, coll recs and g_conf, aka
// OBJ_COLL and OBJ_CONF.
// . make sure we got the right parms for what we want
if ( THIS == (char *)&g_conf && m->m_obj != OBJ_CONF) continue;
if ( THIS != (char *)&g_conf && m->m_obj == OBJ_CONF) continue;
if ( m->m_type == TYPE_MONOD2 ) continue;
if ( m->m_type == TYPE_MONOM2 ) continue;
// skip if we should not save to xml
if ( ! m->m_save ) continue;
// allow comments though
if ( m->m_type == TYPE_COMMENT ) goto skip2;
// skip if this was compiled for a client and they should not
// see this control
#ifdef _GLOBALSPEC_
if ( m->m_priv == 2 ) continue;
if ( m->m_priv == 3 ) continue;
#elif _CLIENT_
if ( m->m_priv ) continue;
#elif _METALINCS_
if ( m->m_priv == 2 ) continue;
if ( m->m_priv == 3 ) continue;
#endif
// skip if offset is negative, that means none
if ( m->m_off < 0 ) continue;
s = (char *)THIS + m->m_off ;
// if array, count can be 0 or more than 1
count = 1;
if ( m->m_max > 1 ) count = *(long *)(s-4);
if ( m->m_fixed > 0 ) count = m->m_fixed;
// sanity check
if ( count > 100000 ) {
log(LOG_LOGIC,"admin: Outrageous array size in for "
"parameter %s. Does the array max size long "
"preceed it in the conf class?",m->m_title);
exit(-1);
}
skip2:
// description, do not wrap words around lines
char *d = m->m_desc;
// if empty array mod description to include the tag name
char tmp [10*1024];
if ( m->m_max > 1 && count == 0 && gbstrlen(d) < 9000 &&
m->m_xml && m->m_xml[0] ) {
char *cc = "";
if ( d && d[0] ) cc = "\n";
sprintf ( tmp , "%s%sUse <%s> tag.",d,cc,m->m_xml);
d = tmp;
}
char *END = d + gbstrlen(d);
char *dend;
char *last;
char *start;
// just print tag if it has no description
if ( ! *d ) goto skip;
if ( p + gbstrlen(d)+5 >= pend ) goto hadError;
if ( p > buf ) *p++='\n';
loop:
dend = d + 77;
if ( dend > END ) dend = END;
last = d;
start = d;
while ( *d && d < dend ) {
if ( *d == ' ' ) last = d;
if ( *d == '\n' ) { last = d; break; }
d++;
}
if ( ! *d ) last = d;
memcpy ( p , "# " , 2 );
p += 2;
memcpy ( p , start , last - start );
p += last - start;
*p++='\n';
d = last + 1;
if ( d < END && *d ) goto loop;
// bail if comment
if ( m->m_type == TYPE_COMMENT ) {
//sprintf ( p , "\n" );
//p += gbstrlen ( p );
continue;
}
if ( m->m_type == TYPE_MONOD2 ) continue;
if ( m->m_type == TYPE_MONOM2 ) continue;
skip:
/* . note: this code commented out because it was specific to
an old client
// if value is from default collection file, do not
// explicitly list it
if ( m->m_obj == OBJ_COLL &&
((CollectionRec *)THIS)->m_orig[i] == 1 ) {
sprintf ( p ,"# Value for <%s> tag taken from "
"default.conf.\n",m->m_xml );
p += gbstrlen ( p );
continue;
}
*/
// debug point
//if ( m->m_type == TYPE_SAFEBUF )
// log("hey");
// loop over all in this potential array
for ( j = 0 ; j < count ; j++ ) {
// the xml
if ( p + gbstrlen(m->m_xml) >= pend ) goto hadError;
sprintf ( p , "<%s>" , m->m_xml );
p += gbstrlen ( p );
// print CDATA if string
if ( m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_SAFEBUF ||
m->m_type == TYPE_STRINGNONEMPTY ) {
sprintf ( p , "m_xml , "filterRulesetDefault")==0)
// log("got it");
// . represent it in ascii form
// . this escapes out <'s and >'s
// . this ALSO encodes #'s (xml comment indicators)
p = getParmHtmlEncoded(p,pend,m,s);
// print CDATA if string
if ( m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_SAFEBUF ||
m->m_type == TYPE_STRINGNONEMPTY ) {
sprintf ( p , "]]>" );
p += gbstrlen ( p );
}
// this is NULL if it ran out of room
if ( ! p ) goto hadError;
// advance to next element in array, if it is one
s = s + m->m_size;
// close the xml tag
if ( p + 4 >= pend ) goto hadError;
sprintf ( p , ">\n" );
p += gbstrlen ( p );
}
}
*p = '\0';
ff.set ( f );
if ( ! ff.open ( O_RDWR | O_CREAT | O_TRUNC ) )
return log("db: Could not open %s : %s",
ff.getFilename(),mstrerror(g_errno));
// save the parm to the file
len = gbstrlen(buf);
// use -1 for offset so we do not use pwrite() so it will not leave
// garbage at end of file
n = ff.write ( buf , len , -1 );
ff.close();
if ( n == len ) return true;
return log("admin: Could not write to file %s.",ff.getFilename());
hadError:
return log("admin: File bigger than %li bytes."
" Please increase #define in Parms.cpp.",
(long)MAX_CONF_SIZE);
}
Parm *Parms::getParm ( char *cgi ) {
for ( long i = 0 ; i < m_numParms ; i++ ) {
if ( ! m_parms[i].m_cgi ) continue ;
if ( m_parms[i].m_cgi[0] != cgi[0] ) continue;
if ( m_parms[i].m_cgi[1] != cgi[1] ) continue;
if ( strcmp ( m_parms[i].m_cgi , cgi ) == 0 )
return &m_parms[i];
}
return NULL;
}
/*
#define PHTABLE_SIZE (MAX_PARMS*2)
Parm *Parms::getParm ( char *cgi ) {
// make the hash table for the first call
static long s_phtable [ PHTABLE_SIZE ];
static Parm *s_phparm [ PHTABLE_SIZE ];
static bool s_init = false;
// do not re-make the table if we already did
if ( s_init ) goto skipMakeTable;
// ok, now make the table
s_init = true;
memset ( s_phparm , 0 , PHTABLE_SIZE );
for ( long i = 0 ; i < m_numParms ; i++ ) {
if ( ! m_parms[i].m_cgi ) continue ;
long h = hash32 ( m_parms[i].m_cgi );
long n = h % PHTABLE_SIZE;
while ( s_phparm[n] ) {
// . sanity check
// . we don't have that many parms, they should never
// collide!!... but it is possible i guess.
if ( s_phtable[n] == h ) {
log(LOG_LOGIC,"Parms: collisions forbidden in "
"getParm(). Duplicate cgi name?");
char *xx = NULL; *xx = 0;
}
if (++n >= PHTABLE_SIZE) n = 0;
}
s_phtable[n] = h; // fill the bucket
s_phparm [n] = m; // the parm
}
skipMakeTable:
// look up in table
long h = hash32 ( cgi );
long n = h % PHTABLE_SIZE;
// while bucket is occupied and does not equal our hash... chain
while ( s_phparm[n] && s_phtable[n] != h )
if (++n >= PHTABLE_SIZE) n = 0;
// if empty, no match
return s_phparm[n];
}
*/
char *Parms::getParmHtmlEncoded ( char *p , char *pend , Parm *m , char *s ) {
// do not breech the buffer
if ( p + 100 >= pend ) return p;
// print it out
char t = m->m_type;
if ( t == TYPE_CHAR || t == TYPE_BOOL ||
t == TYPE_CHECKBOX ||
t == TYPE_PRIORITY || t == TYPE_PRIORITY2 ||
t == TYPE_PRIORITY_BOXES || t == TYPE_RETRIES ||
t == TYPE_RETRIES || t == TYPE_FILTER ||
t == TYPE_BOOL2 || t == TYPE_CHAR2 )
sprintf (p,"%li",(long)*s);
else if ( t == TYPE_FLOAT )
sprintf (p,"%.03f",*(float *)s);
else if ( t == TYPE_IP )
sprintf (p,"%s",iptoa(*(long *)s));
else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
t == TYPE_SITERULE )
sprintf (p,"%li",*(long *)s);
else if ( t == TYPE_LONG_LONG )
sprintf (p,"%lli",*(long long *)s);
else if ( t == TYPE_SAFEBUF ) {
SafeBuf *sb = (SafeBuf *)s;
p = htmlEncode ( p ,
pend ,
sb->getBufStart(),
sb->getBufStart() + sb->length(),
true ); // #?*
}
else if ( t == TYPE_STRING ||
t == TYPE_STRINGBOX ||
t == TYPE_STRINGNONEMPTY ||
t == TYPE_TIME) {
long slen = gbstrlen ( s );
// this returns the length of what was written, it may
// not have converted everything if pend-p was too small...
//p += saftenTags2 ( p , pend - p , s , len );
p = htmlEncode ( p , pend , s , s + slen , true /*#?*/);
}
else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
// time is stored as long
long ct = *(long *)s;
// get the time struct
struct tm *tp = gmtime ( (time_t *)&ct ) ;
// set the "selected" month for the drop down
strftime ( p , 100 , "%d %b %Y %H:%M UTC" , tp );
}
p += gbstrlen ( p );
return p;
}
// returns the size needed to serialize parms
long Parms::getStoredSize() {
long size = 0;
// calling serialize with no ptr gets size
serialize( NULL, &size );
return size;
}
// . serialize parms to buffer
// . accepts addr of buffer ptr and addr of buffer size
// . on entry buf can be NULL to determine required size
// . if buf is not NULL, *bufSize must specify the size of buf
// . on exit *buf is filled with serialized parms
// . on exit *bufSize is set to the actual len of *buf
bool Parms::serialize( char *buf, long *bufSize ) {
g_errno = 0;
if ( ! bufSize ) {
g_errno = EBADENGINEER;
log( "admin: serialize: bad engineer: no bufSize ptr" );
*bufSize = 0;
return false;
}
bool sizeChk = false;
char *end = NULL;
if ( ! buf ) sizeChk = true; // just calc size
else end = buf + *bufSize; // for overrun checking
// serialize OBJ_CONF and OBJ_COLL parms
*bufSize = 0;
char *p = buf;
// now the parms
struct SerParm *sp = NULL;
for ( long i = 0 ; i < m_numParms ; i++ ) {
Parm *m = &m_parms[i];
// ignore these:
if ( m->m_obj == OBJ_SI ) continue;
if ( m->m_off < 0 ) continue;
if ( m->m_type == TYPE_COMMENT ) continue;
if ( m->m_type == TYPE_MONOD2 ) continue;
if ( m->m_type == TYPE_MONOM2 ) continue;
if ( m->m_type == TYPE_CMD ) continue;
if ( m->m_type == TYPE_LONG_CONST ) continue;
if ( ! m->m_sync ) continue; // parm is not to be synced
// determine the size of the parm value
long size = 0;
if ( m->m_type == TYPE_CHAR ) size = 1;
if ( m->m_type == TYPE_CHAR2 ) size = 1;
if ( m->m_type == TYPE_CHECKBOX ) size = 1;
if ( m->m_type == TYPE_BOOL ) size = 1;
if ( m->m_type == TYPE_BOOL2 ) size = 1;
if ( m->m_type == TYPE_PRIORITY ) size = 1;
if ( m->m_type == TYPE_PRIORITY2 ) size = 1;
if ( m->m_type == TYPE_PRIORITY_BOXES ) size = 1;
if ( m->m_type == TYPE_RETRIES ) size = 1;
if ( m->m_type == TYPE_TIME ) size = 6;
if ( m->m_type == TYPE_DATE2 ) size = 4;
if ( m->m_type == TYPE_DATE ) size = 4;
if ( m->m_type == TYPE_FLOAT ) size = 4;
if ( m->m_type == TYPE_IP ) size = 4;
if ( m->m_type == TYPE_RULESET ) size = 4;
if ( m->m_type == TYPE_LONG ) size = 4;
if ( m->m_type == TYPE_LONG_LONG ) size = 8;
if ( m->m_type == TYPE_STRING ) size = m->m_size;
if ( m->m_type == TYPE_STRINGBOX ) size = m->m_size;
if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
if ( m->m_type == TYPE_SAFEBUF ) size = m->m_size;
if ( m->m_type == TYPE_SITERULE ) size = 4;
// . set size to the total size of array
// . set cnt to the number of itmes
long cnt = 1;
if (m->m_fixed > 0) {
size *= m->m_fixed;
cnt = m->m_fixed;
}
else {
size *= m->m_max;
cnt = m->m_max;
}
if ( m->m_obj == OBJ_CONF ) {
bool overflew = serializeConfParm( m, i, &p, end,
size, cnt,
sizeChk, bufSize );
if ( overflew ) goto overflow;
}
else if ( m->m_obj == OBJ_COLL ) {
collnum_t j = g_collectiondb.getFirstCollnum ();
while ( j >= 0 ) {
CollectionRec *cr = g_collectiondb.getRec( j );
bool overflew = serializeCollParm( cr,
m, i, &p,
end,
size, cnt,
sizeChk,
bufSize );
if ( overflew ) goto overflow;
j = g_collectiondb.getNextCollnum ( j );
}
}
}
if ( ! sizeChk ) {
// set the final marker to 0s to indicate the end
sp = (struct SerParm *)p;
sp->i = 0;
sp->obj = 0;
sp->size = 0;
sp->cnt = 0;
}
*bufSize += sizeof( struct SerParm );
return true;
overflow:
g_errno = EBADENGINEER;
log(LOG_WARN, "admin: serialize: bad engineer: overflow" );
*bufSize = 0;
return false;
}
// serialize a conf parm
bool Parms::serializeConfParm( Parm *m, long i, char **p, char *end,
long size, long cnt,
bool sizeChk, long *bufSz ) {
SerParm *sp = NULL;
if (m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_STRINGNONEMPTY ) {
char *sVal = NULL;
if ( ! sizeChk ) {
sp = (SerParm *)*p;
sp->i = i; // index of parm
sp->obj = OBJ_CONF;
sp->size = 0L; // 0 for strings
sp->cnt = cnt; // # of strings
// if an array, get num of member
if ( cnt > 1 ) {
sp->off = m->m_off - sizeof(long);
sp->num = *(long *)((char *)&g_conf
+ sp->off);
}
else {
sp->off = 0;
sp->num = 0;
}
sVal = sp->val;
}
char *sConf = (char *)&g_conf + m->m_off;
long totLen = 0;
long tcnt = cnt;
while ( tcnt ) {
long len = gbstrlen( sConf );
if ( ! sizeChk ) {
// copy the parm value
if ( sVal + len > end )
return true; // overflow
strcpy( sVal, sConf );
}
totLen += len + 1; // incl the NULL
// inc conf ptr by size of strings
sConf += m->m_size;
// inc ser value by len of str + NULL
sVal += len + 1;
tcnt--;
}
if ( ! sizeChk ) {
// inc by tot len of compacted strings
*p += sizeof( *sp ) + totLen;
}
*bufSz += sizeof( SerParm ) + totLen;
}
else {
if ( ! sizeChk ) {
sp = (SerParm *)*p;
sp->i = i;
sp->obj = OBJ_CONF;
sp->size = size; // tot size if array
sp->cnt = cnt; // num of items
// if array, get num of member
if ( cnt > 1 ) {
sp->off = m->m_off - sizeof(long);
sp->num = *(long *)((char *)&g_conf
+ sp->off);
}
else {
sp->off = 0;
sp->num = 0;
}
// copy the parm's whole value
if ( sp->val + size > end )
return true; // overflow
memcpy( sp->val,
(char *)&g_conf + m->m_off, size );
// inc by tot size if array
*p += sizeof( *sp ) + size;
}
*bufSz += sizeof( SerParm ) + size;
}
return false;
}
// TODO: add TYPE_SAFEBUF support
bool Parms::serializeCollParm( CollectionRec *cr,
Parm *m, long i, char **p, char *end,
long size, long cnt,
bool sizeChk, long *bufSize) {
SerParm *sp = NULL;
if (m->m_type == TYPE_STRING ||
m->m_type == TYPE_STRINGBOX ||
m->m_type == TYPE_STRINGNONEMPTY ) {
char *sVal = NULL;
if ( ! sizeChk ) {
sp = (SerParm *)*p;
sp->i = i; // index of parm
sp->obj = OBJ_COLL;
sp->size = 0L; // 0 for strings
sp->cnt = cnt; // # of strings
// get num of member
if ( cnt > 1 ) {
sp->off = m->m_off - sizeof(long);
sp->num = *(long *)((char *)cr + sp->off);
}
else {
sp->off = 0;
sp->num = 0;
}
sVal = sp->val;
}
char *sColl = (char *)cr + m->m_off;
long totLen = 0;
long tcnt = cnt;
while ( tcnt ) {
long len = gbstrlen( sColl );
if ( ! sizeChk ) {
// copy the parm value
if ( sVal+len > end )
return true;
strcpy( sVal, sColl );
}
totLen += len + 1; // incl NULL
// inc cr ptr by size of strs
sColl += m->m_size;
// . inc ser value by len of str + NULL
sVal += len + 1;
tcnt--;
}
if ( ! sizeChk ) {
// inc by tot len of cmpctd str
*p += sizeof( *sp ) + totLen;
}
*bufSize += sizeof( SerParm ) + totLen;
}
else {
if ( ! sizeChk ) {
sp = (SerParm *)*p;
sp->i = i;
sp->obj = OBJ_COLL;
sp->size = size; // tot size
sp->cnt = cnt; // num of items
// get num of member
if ( cnt > 1 ) {
sp->off = m->m_off - sizeof(long);
sp->num = *(long *)((char *)cr + sp->off);
}
else {
sp->off = 0;
sp->num = 0;
}
// copy whole value
if ( sp->val + size > end )
return true;
memcpy( sp->val,
(char *)cr + m->m_off,
size );
// inc by whole size of value
*p += sizeof( *sp ) + size;
}
*bufSize += sizeof( SerParm ) + size;
}
return false;
}
// deserialize parms from buffer and set our values to the new values
void Parms::deserialize( char *buf ) {
g_errno = 0;
char *p = buf;
bool confChgd = false;
SerParm *sp = (SerParm *)p;
long numLooped = 0;
const long MAX_LOOP = (long)(MAX_PARMS*1.5);
// if one of these is non-zero, we're still working
while ( (sp->obj || sp->size || sp->cnt) &&
(sp->obj > 0 && sp->size > 0 && sp->cnt > 0) &&
numLooped < MAX_LOOP ) {
// grab the parm we're working on
if ( sp->i < 0 || sp->i >= m_numParms ) {
log( "admin: invalid parm # in Parms::deserialize" );
char *xx = NULL; *xx = 0;
}
Parm *m = &m_parms[ sp->i ];
if ( sp->obj == OBJ_CONF ) {
deserializeConfParm( m, sp, &p, &confChgd );
sp = (struct SerParm *)p;
}
else if ( sp->obj == OBJ_COLL ) {
collnum_t j = g_collectiondb.getFirstCollnum ();
//if(j <= 0) {
// log("coll: Collectiondb does not have a rec" );
// return;
//}
while ( j >= 0 ) {
CollectionRec *cr = g_collectiondb.getRec( j );
deserializeCollParm( cr,
m, sp, &p );
sp = (SerParm *)p;
j = g_collectiondb.getNextCollnum ( j );
}
}
// setup the next rec
sp = (SerParm *)p;
numLooped++;
}
if (numLooped >= MAX_LOOP) {
log( "admin: infinite loop in Parms::deserialize(). halting!");
char *xx = NULL; *xx = 0;
}
// if we changed the conf, we need to save it
if ( confChgd ) {
g_conf.save ();
}
// if we changed a CollectionRec, we need to save it
long j = g_collectiondb.getFirstCollnum ();
while ( j >= 0 ) {
CollectionRec *cr = g_collectiondb.getRec( j );
if ( cr->m_needsSave ) {
cr->save ();
// so g_spiderCache can reload if sameDomainWait, etc.
// have changed
g_collectiondb.updateTime();
}
j = g_collectiondb.getNextCollnum ( j );
}
}
void Parms::deserializeConfParm( Parm *m, SerParm *sp, char **p,
bool *confChgd ) {
if ( m->m_off + sp->size > (long)sizeof(g_conf) ||
m->m_off + sp->size < 0 ){
log(LOG_WARN, "admin: deserializing parm would overflow "
"the collection rec!");
char *xx =0; *xx = 0;
}
if ( sp->size == 0 ) { // string
char *sVal = sp->val;
char *sConf = (char *)&g_conf + m->m_off;
long totLen = 0;
bool goodParm = true;
long tcnt = sp->cnt;
while ( tcnt ) {
goodParm = (goodParm && 0 == strcmp( sVal, sConf ));
long len = gbstrlen( sVal );
totLen += len + 1;
// inc ser value by len of str + NULL
sVal += len + 1;
// inc conf ptr by size of strings
sConf += m->m_size;
tcnt--;
}
if ( goodParm ) {
// . inc by sizeof rec and tot len of compacted array
*p += sizeof( *sp ) + totLen;
return;
}
// parms don't match
sVal = sp->val;
sConf = (char *)&g_conf + m->m_off;
totLen = 0;
tcnt = sp->cnt;
while ( tcnt ) {
// copy an array value to this parm
strcpy( sConf, sVal );
long len = gbstrlen( sVal );
totLen += len + 1; // incl the NULL
// inc conf ptr by size of strings
sConf += m->m_size;
// inc ser value by len of str + NULL
sVal += len + 1;
tcnt--;
}
// set num of member
if ( sp->off ) {
long *tmp = (long *)((char *)&g_conf + sp->off);
*tmp = sp->num;
}
// log the changed parm
log( LOG_INFO, "admin: Parm "
"#%li \"%s\" (\"%s\") in conf "
"changed on sync.",
sp->i, m->m_cgi, m->m_title );
*confChgd = true;
// inc by sizeof rec and tot len of compacted array
*p += sizeof( *sp ) + totLen;
}
else {
bool goodParm = ( 0 == memcmp( sp->val,
(char *)&g_conf + m->m_off,
sp->size ) );
if ( ! goodParm ) {
// copy the new parm to m's loc
memcpy( (char *)&g_conf + m->m_off, sp->val,
sp->size );
// set num of member
if ( sp->off ) {
long *tmp = (long *)((char *)&g_conf
+ sp->off);
*tmp = sp->num;
}
// log the changed parm
log( LOG_INFO, "admin: Parm "
"#%li \"%s\" (\"%s\") in conf "
"changed on sync.",
sp->i, m->m_cgi, m->m_title );
*confChgd = true;
}
// increase by rec size and size of parm
*p += sizeof( *sp ) + sp->size;
}
}
void Parms::deserializeCollParm( CollectionRec *cr,
Parm *m, SerParm *sp, char **p ) {
if ( m->m_off + sp->size > (long)sizeof(CollectionRec) ||
m->m_off + sp->size < 0 ) {
log(LOG_WARN, "admin: deserializing parm would overflow "
"the collection rec!");
char *xx =0; *xx = 0;
}
if ( sp->size == 0 ) { // strings
char *sVal = sp->val;
char *sColl = (char *)cr + m->m_off;
long totLen = 0;
long tcnt = sp->cnt;
bool goodParm = true;
while ( tcnt ) {
goodParm= (goodParm && 0 == strcmp(sVal, sColl));
long len = gbstrlen( sVal );
totLen += len + 1; //incl NULL
sVal += len + 1; //incl NULL
// inc by size of strs
sColl += m->m_size;
tcnt--;
}
if ( goodParm ) {
// . inc by sizeof rec and
// tot len of compacted array
*p += sizeof( *sp ) + totLen;
return;
}
// parms don't match
sVal = sp->val;
sColl = (char *)cr + m->m_off;
totLen = 0;
tcnt = sp->cnt;
while ( tcnt ) {
// . copy an array value to this parm
strcpy( sColl, sVal );
long len = gbstrlen( sVal );
totLen += len + 1; // +the NULL
// . inc conf ptr by size
// of strings
sColl += m->m_size;
// . inc ser value by len of str + NULL
sVal += len + 1;
tcnt--;
}
// we changed the record
cr->m_needsSave = true;
// set num of member
if ( sp->off ) {
long *tmp = (long *)((char *)cr + sp->off);
*tmp = sp->num;
}
// log the changed parm
log( LOG_INFO, "admin: Parm "
"#%li \"%s\" (\"%s\") in "
"collection \"%s\" "
"changed on sync.",
sp->i, m->m_cgi, m->m_title,
cr->m_coll );
// . inc by sizeof rec and
// tot len of compacted array
*p += sizeof( *sp ) + totLen;
}
else {
if ( 0 != memcmp( sp->val, (char *)cr + m->m_off, sp->size) ) {
// copy the new value
memcpy( (char *)cr + m->m_off,
sp->val,
sp->size );
// set num of member
if ( sp->off ) {
long *tmp = (long *)((char *)cr + sp->off);
*tmp = sp->num;
}
// log the changed parm
log( LOG_INFO, "admin: Parm "
"#%li \"%s\" (\"%s\") "
"in collection \"%s\" "
"changed on sync.",
sp->i, m->m_cgi,
m->m_title,
cr->m_coll );
// we changed the record
cr->m_needsSave = true;
}
// inc by rec size and tot len of array
*p += sizeof( *sp ) + sp->size;
}
}
void Parms::init ( ) {
// initialize the Parms class if we need to, only do it once
static bool s_init = false ;
if ( s_init ) return;
s_init = true ;
// default all
for ( long i = 0 ; i < MAX_PARMS ; i++ ) {
m_parms[i].m_hash = 0 ;
m_parms[i].m_title = "" ; // for detecting if not set
m_parms[i].m_desc = "" ; // for detecting if not set
m_parms[i].m_cgi = NULL ; // for detecting if not set
m_parms[i].m_off = -1 ; // for detecting if not set
m_parms[i].m_def = NULL ; // for detecting if not set
m_parms[i].m_type = TYPE_NONE ; // for detecting if not set
m_parms[i].m_page = -1 ; // for detecting if not set
m_parms[i].m_obj = -1 ; // for detecting if not set
m_parms[i].m_max = 1 ; // max elements in array
m_parms[i].m_fixed = 0 ; // size of fixed size array
m_parms[i].m_size = 0 ; // max string size
m_parms[i].m_cast = 1 ; // send to all hosts?
m_parms[i].m_rowid = -1 ; // rowid of -1 means not in row
m_parms[i].m_addin = 0 ; // add insert row command?
m_parms[i].m_rdonly = 0 ; // is command off in read-only mode?
m_parms[i].m_hdrs = 1 ; // assume to always print headers
m_parms[i].m_perms = 0 ; // same as containing WebPages perms
m_parms[i].m_plen = -1 ; // offset for strings length
m_parms[i].m_group = 1 ; // start of a new group of controls?
m_parms[i].m_priv = 0 ; // is it private?
m_parms[i].m_save = 1 ; // save to xml file?
m_parms[i].m_min = -1 ; // min value (for long parms)
// search fields
m_parms[i].m_sparm = 0;
m_parms[i].m_scmd = "/search";
m_parms[i].m_scgi = NULL;// defaults to m_cgi
m_parms[i].m_flags = 0;
m_parms[i].m_icon = NULL;
m_parms[i].m_class = NULL;
m_parms[i].m_qterm = NULL;
m_parms[i].m_subMenu= 0;
m_parms[i].m_spriv = 0;
// m_sdefo = -1; // just use m_off for this!
m_parms[i].m_sminc = -1; // min in collection rec
m_parms[i].m_smaxc = -1; // max in collection rec
m_parms[i].m_smin = 0x80000000; // 0xffffffff;
m_parms[i].m_smax = 0x7fffffff;
m_parms[i].m_soff = -1; // offset into SearchInput
m_parms[i].m_sprpg = 1; // propagate to other pages via GET
m_parms[i].m_sprpp = 1; // propagate to other pages via POST
m_parms[i].m_sync = true;
}
// inherit perms from page
//for ( long i = 1 ; i < MAX_PARMS ; i++ )
// if ( m_parms[i].m_page )
// m_parms[i].m_perms = m_parms[i-1].m_perms;
Parm *m = &m_parms [ 0 ];
CollectionRec cr;
SearchInput si;
///////////////////////////////////////////
// CAN ONLY BE CHANGED IN CONF AT STARTUP (no cgi field)
///////////////////////////////////////////
char *g = (char *)&g_conf;
char *x = (char *)&cr;
char *y = (char *)&si;
// just a comment in the conf file
m->m_desc =
"All <, >, \" and # characters that are values for a field "
"contained herein must be represented as "
"<, >, " and # respectively.";
m->m_type = TYPE_COMMENT;
m->m_page = PAGE_NONE;
m->m_obj = OBJ_CONF;
m++;
// if the next guy has no description (m_desc) he is assumed to
// share the description of the previous parm with one.
/*
m->m_title = "main external ip";
m->m_desc = "This is the IP and port that a user connects to in "
"order to search this Gigablast network. This should be the "
"same for all gb processes.";
m->m_off = (char *)&g_conf.m_mainExternalIp - g;
m->m_def = "127.0.0.1"; // if no default, it is required!
m->m_type = TYPE_IP;
m++;
m->m_title = "main external port";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_mainExternalPort - g;
m->m_def = "80";
m->m_type = TYPE_LONG;
m++;
*/
m->m_title = "max mem";
m->m_desc = "Mem available to this process. May be exceeded due "
"to fragmentation.";
m->m_off = (char *)&g_conf.m_maxMem - g;
m->m_def = "4000000000";
m->m_type = TYPE_LONG_LONG;
m++;
/*
m->m_title = "indexdb split";
m->m_desc = "Number of times to split indexdb across groups. "
"Must be a power of 2.";
m->m_off = (char *)&g_hostdb.m_indexSplits - g;
// -1 means to do a full split just based on docid, just like titledb
m->m_def = "-1"; // "1";
m->m_type = TYPE_LONG;
m++;
m->m_title = "full indexdb split";
m->m_desc = "Set to 1 (true) if indexdb is fully split. Performance "
"is much better for fully split indexes.";
m->m_off = (char *)&g_conf.m_fullSplit - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
m->m_title = "legacy indexdb split";
m->m_desc = "Set to 1 (true) if using legacy indexdb splitting. For "
"data generated with farmington release.";
m->m_off = (char *)&g_conf.m_legacyIndexdbSplit - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
m->m_title = "tfndb extension bits";
m->m_desc = "Number of extension bits to use in Tfndb. Increased for "
"large indexes.";
m->m_off = (char *)&g_conf.m_tfndbExtBits - g;
m->m_def = "7";
m->m_type = TYPE_LONG;
m++;
*/
/*
m->m_title = "checksumdb key size";
m->m_desc = "This determines the key size for checksums. "
"Must be set for every host.";
//m->m_cgi = "";
m->m_off = (char *)&g_conf.m_checksumdbKeySize - g;
m->m_type = TYPE_LONG;
m->m_def = "12";
m++;
*/
// just a comment in the conf file
m->m_desc =
"Below the various Gigablast databases are configured.\n"
"<*dbMaxTreeMem> - mem used for holding new recs\n"
"<*dbMaxDiskPageCacheMem> - disk page cache mem for this db\n"
"<*dbMaxCacheMem> - cache mem for holding single recs\n"
//"<*dbMinFilesToMerge> - required # files to trigger merge\n"
"<*dbSaveCache> - save the rec cache on exit?\n"
"<*dbMaxCacheAge> - max age (seconds) for recs in rec cache\n"
"See that Stats page for record counts and stats.\n";
m->m_type = TYPE_COMMENT;
m++;
m->m_title = "dns max cache mem";
m->m_desc = "How many bytes should be used for caching DNS replies?";
m->m_off = (char *)&g_conf.m_dnsMaxCacheMem - g;
m->m_def = "128000";
m->m_type = TYPE_LONG;
m++;
// g_dnsDistributed always saves now. main.cpp inits it that way.
//m->m_title = "dns save cache";
//m->m_desc = "Should the DNS reply cache be saved/loaded on "
// "exit/startup?";
//m->m_off = (char *)&g_conf.m_dnsSaveCache - g;
//m->m_def = "0";
//m->m_type = TYPE_BOOL;
//m++;
m->m_title = "tagdb max tree mem";
m->m_desc = "A tagdb record "
"assigns a url or site to a ruleset. Each tagdb record is "
"about 100 bytes or so.";
m->m_off = (char *)&g_conf.m_tagdbMaxTreeMem - g;
m->m_def = "1028000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "tagdb max page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_tagdbMaxDiskPageCacheMem - g;
m->m_def = "200000";
m->m_type = TYPE_LONG;
m++;
//m->m_title = "tagdb max cache mem";
//m->m_desc = "";
//m->m_off = (char *)&g_conf.m_tagdbMaxCacheMem - g;
//m->m_def = "128000";
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "tagdb min files to merge";
//m->m_desc = "";
//m->m_off = (char *)&g_conf.m_tagdbMinFilesToMerge - g;
//m->m_def = "2";
//m->m_type = TYPE_LONG;
//m->m_save = 0;
//m++;
m->m_title = "catdb max tree mem";
m->m_desc = "A catdb record "
"assigns a url or site to DMOZ categories. Each catdb record "
"is about 100 bytes.";
m->m_off = (char *)&g_conf.m_catdbMaxTreeMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "catdb max page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_catdbMaxDiskPageCacheMem - g;
m->m_def = "25000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "catdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_catdbMaxCacheMem - g;
m->m_def = "0";
m->m_type = TYPE_LONG;
m++;
m->m_title = "catdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_catdbMinFilesToMerge - g;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
/*
m->m_title = "revdb max tree mem";
m->m_desc = "Revdb holds the meta list we added for this doc.";
m->m_off = (char *)&g_conf.m_revdbMaxTreeMem - g;
m->m_def = "30000000";
m->m_type = TYPE_LONG;
m++;
*/
/*
m->m_title = "timedb max tree mem";
m->m_desc = "Timedb holds event time intervals";
m->m_off = (char *)&g_conf.m_timedbMaxTreeMem - g;
m->m_def = "30000000";
m->m_type = TYPE_LONG;
m++;
*/
/*
m->m_title = "titledb max tree mem";
m->m_desc = "Titledb holds the compressed documents that have been "
"indexed.";
m->m_off = (char *)&g_conf.m_titledbMaxTreeMem - g;
m->m_def = "10000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "titledb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_titledbMaxCacheMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "titledb max cache age";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_titledbMaxCacheAge - g;
m->m_def = "86400"; // 1 day
m->m_type = TYPE_LONG;
m++;
m->m_title = "titledb save cache";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_titledbSaveCache - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
*/
m->m_title = "clusterdb max tree mem";
m->m_desc = "Clusterdb caches small records for site clustering "
"and deduping.";
m->m_off = (char *)&g_conf.m_clusterdbMaxTreeMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
/*
m->m_title = "clusterdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_clusterdbMaxCacheMem - g;
m->m_def = "100000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "clusterdb max page cache mem";
m->m_desc = "";
m->m_off =(char *)&g_conf.m_clusterdbMaxDiskPageCacheMem - g;
m->m_def = "100000000";
m->m_type = TYPE_LONG;
m++;
*/
// this is overridden by collection
m->m_title = "clusterdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_clusterdbMinFilesToMerge - g;
//m->m_def = "2";
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
m->m_title = "clusterdb save cache";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_clusterdbSaveCache - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
m->m_title = "max vector cache mem";
m->m_desc = "Max memory for dup vector cache.";
m->m_off = (char *)&g_conf.m_maxVectorCacheMem - g;
m->m_def = "10000000";
m->m_type = TYPE_LONG;
m++;
/*
m->m_title = "checksumdb max tree mem";
m->m_desc = "Checksumdb is used for deduping same-site urls at "
"index time.";
m->m_off = (char *)&g_conf.m_checksumdbMaxTreeMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "checksumdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_checksumdbMaxCacheMem - g;
m->m_def = "2000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "checksumdb max page cache mem";
m->m_desc = "";
m->m_off =(char *)&g_conf.m_checksumdbMaxDiskPageCacheMem-g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
// this is overridden by collection
m->m_title = "checksumdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_checksumdbMinFilesToMerge- g;
//m->m_def = "2";
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
*/
/*
m->m_title = "tfndb max tree mem";
m->m_desc = "Tfndb holds small records for each url in Spiderdb or "
"Titledb.";
m->m_off = (char *)&g_conf.m_tfndbMaxTreeMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "tfndb max page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_tfndbMaxDiskPageCacheMem - g;
m->m_def = "5000000";
m->m_type = TYPE_LONG;
m++;
*/
/*
// this is overridden by collection
m->m_title = "tfndb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_tfndbMinFilesToMerge - g;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
*/
/*
m->m_title = "spiderdb max tree mem";
m->m_desc = "Spiderdb holds urls to be spidered.";
m->m_off = (char *)&g_conf.m_spiderdbMaxTreeMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "spiderdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_spiderdbMaxCacheMem - g;
m->m_def = "0";
m->m_type = TYPE_LONG;
m++;
m->m_title = "spiderdb max page cache mem";
m->m_desc = "";
m->m_off =(char *)&g_conf.m_spiderdbMaxDiskPageCacheMem-g;
m->m_def = "500000";
m->m_type = TYPE_LONG;
m++;
// this is overridden by collection
m->m_title = "spiderdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_spiderdbMinFilesToMerge - g;
//m->m_def = "2";
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
*/
m->m_title = "robotdb max cache mem";
m->m_desc = "Robotdb caches robot.txt files.";
m->m_off = (char *)&g_conf.m_robotdbMaxCacheMem - g;
m->m_def = "128000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "robotdb save cache";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_robotdbSaveCache - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
/*
m->m_title = "indexdb max tree mem";
m->m_desc = "Indexdb holds the terms extracted from spidered "
"documents.";
m->m_off = (char *)&g_conf.m_indexdbMaxTreeMem - g;
m->m_def = "10000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "indexdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_indexdbMaxCacheMem - g;
m->m_def = "5000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "indexdb max page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_indexdbMaxDiskPageCacheMem - g;
m->m_def = "50000000";
m->m_type = TYPE_LONG;
m++;
*/
m->m_title = "linkdb max page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_linkdbMaxDiskPageCacheMem - g;
m->m_def = "0";
m->m_type = TYPE_LONG;
m++;
/*
// this is overridden by collection
m->m_title = "indexdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_indexdbMinFilesToMerge - g;
//m->m_def = "6";
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
m->m_title = "indexdb max index list age";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_indexdbMaxIndexListAge - g;
m->m_def = "60";
m->m_type = TYPE_LONG;
m++;
//m->m_title = "indexdb truncation limit";
//m->m_desc = "";
//m->m_off = (char *)&g_conf.m_indexdbTruncationLimit - g;
//m->m_def = "50000000";
//m->m_type = TYPE_LONG;
//m++;
m->m_title = "indexdb save cache";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_indexdbSaveCache - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
*/
/*
m->m_title = "datedb max tree mem";
m->m_desc = "Datedb holds the terms extracted from spidered "
"documents.";
m->m_off = (char *)&g_conf.m_datedbMaxTreeMem - g;
m->m_def = "10000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "datedb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_datedbMaxCacheMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
// this is overridden by collection
m->m_title = "datedb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_datedbMinFilesToMerge - g;
//m->m_def = "8";
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
m->m_save = 0;
m++;
m->m_title = "datedb max index list age";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_datedbMaxIndexListAge - g;
m->m_def = "60";
m->m_type = TYPE_LONG;
m++;
m->m_title = "datedb save cache";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_datedbSaveCache - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
*/
/*
m->m_title = "linkdb max tree mem";
m->m_desc = "Linkdb stores linking information";
m->m_off = (char *)&g_conf.m_linkdbMaxTreeMem - g;
m->m_def = "20000000";
m->m_type = TYPE_LONG;
m++;
// this is overridden by collection
m->m_title = "linkdb min files to merge";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_linkdbMinFilesToMerge - g;
m->m_def = "-1"; // -1 means to use collection rec
m->m_type = TYPE_LONG;
//m->m_save = 0;
m++;
*/
/*
m->m_title = "quota table max mem";
m->m_desc = "For caching and keeping tabs on exact quotas per "
"domain without having to do a disk seek. If you are using "
"exact quotas and see a lot of disk seeks on Indexdb, try "
"increasing this.";
m->m_off = (char *)&g_conf.m_quotaTableMaxMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
*/
m->m_title = "statsdb max tree mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_statsdbMaxTreeMem - g;
m->m_def = "5000000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "statsdb max cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_statsdbMaxCacheMem - g;
m->m_def = "0";
m->m_type = TYPE_LONG;
m++;
m->m_title = "statsdb max disk page cache mem";
m->m_desc = "";
m->m_off = (char *)&g_conf.m_statsdbMaxDiskPageCacheMem - g;
m->m_def = "1000000";
m->m_type = TYPE_LONG;
m++;
//m->m_title = "statsdb min files to merge";
//m->m_desc = "";
//m->m_off = (char *)&g_conf.m_statsdbMinFilesToMerge - g;
//m->m_def = "5";
//m->m_type = TYPE_LONG;
//m++;
/*
m->m_title = "use buckets for in memory recs";
m->m_desc = "Use buckets for in memory recs for indexdb, datedb, "
"and linkdb.";
m->m_off = (char *)&g_conf.m_useBuckets - g;
m->m_def = "1";
m->m_type = TYPE_BOOL;
m++;
*/
m->m_title = "http max send buf size";
m->m_desc = "Maximum bytes of a doc that can be sent before having "
"to read more from disk";
m->m_off = (char *)&g_conf.m_httpMaxSendBufSize - g;
m->m_def = "128000";
m->m_type = TYPE_LONG;
m++;
m->m_title = "search results max cache mem";
m->m_desc = "Bytes to use for caching search result pages.";
m->m_off = (char *)&g_conf.m_searchResultsMaxCacheMem - g;
m->m_def = "100000";
m->m_type = TYPE_LONG;
m++;
//m->m_title = "search results max cache age";
//m->m_desc = "Maximum age to cache search results page in seconds.";
//m->m_off = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
//m->m_def = "86400";
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "search results save cache";
//m->m_desc = "Should the search results cache be saved to disk?";
//m->m_off = (char *)&g_conf.m_searchResultsSaveCache - g;
//m->m_def = "0";
//m->m_type = TYPE_BOOL;
//m++;
//m->m_title = "site link info max cache mem";
//m->m_desc = "Bytes to use for site link info data.";
//m->m_off = (char *)&g_conf.m_siteLinkInfoMaxCacheMem - g;
//m->m_def = "100000";
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "site link info max cache age";
//m->m_desc = "Maximum age to cache site link info data in seconds.";
//m->m_off = (char *)&g_conf.m_siteLinkInfoMaxCacheAge - g;
//m->m_def = "3600";
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "site link info save cache";
//m->m_desc = "Should the site link info cache be saved to disk?";
//m->m_off = (char *)&g_conf.m_siteLinkInfoSaveCache - g;
//m->m_def = "0";
//m->m_type = TYPE_BOOL;
//m++;
//m->m_title = "site quality max cache mem";
//m->m_desc = "Bytes to use for site or root page quality.";
//m->m_off = (char *)&g_conf.m_siteQualityMaxCacheMem - g;
//m->m_def = "2000000"; // 2MB
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "site quality save cache";
//m->m_desc = "Should the site link info cache be saved to disk?";
//m->m_off = (char *)&g_conf.m_siteQualitySaveCache - g;
//m->m_def = "0";
//m->m_type = TYPE_BOOL;
//m++;
//m->m_title = "max incoming links to sample";
//m->m_desc = "Max linkers to a doc that are sampled to determine "
// "quality and for gathering link text.";
//m->m_off = (char *)&g_conf.m_maxIncomingLinksToSample - g;
//m->m_def = "100";
//m->m_type = TYPE_LONG;
//m++;
//m->m_title = "allow async signals";
//m->m_desc = "Allow software interrupts?";
//m->m_off = (char *)&g_conf.m_allowAsyncSignals - g;
//m->m_def = "1";
//m->m_type = TYPE_BOOL;
//m++;
m->m_title = "read only mode";
m->m_desc = "Read only mode does not allow spidering.";
m->m_off = (char *)&g_conf.m_readOnlyMode - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
/*
Disable this until it works.
m->m_title = "use merge token";
m->m_desc = "Restrict merging to one host per token group? Hosts "
"that use the same disk and mirror hosts are generally in the "
"same token group so that only one host in the group can be "
"doing a merge at a time. This prevents query response time "
"from suffering too much.";
m->m_off = (char *)&g_conf.m_useMergeToken - g;
m->m_def = "1";
m->m_type = TYPE_BOOL;
m++;
*/
m->m_title = "do spell checking";
m->m_desc = "Spell check using the dictionary.";
m->m_off = (char *)&g_conf.m_doSpellChecking - g;
m->m_def = "1";
m->m_type = TYPE_BOOL;
m++;
m->m_title = "do narrow search";
m->m_desc = "give narrow search suggestions.";
m->m_off = (char *)&g_conf.m_doNarrowSearch - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
///////////////////////////////////////////
// MASTER CONTROLS
///////////////////////////////////////////
m->m_title = "local spidering enabled";
m->m_desc = "Overrides all spidering for all collections on just "
"this host.";
m->m_cgi = "se";
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_cast = 0;
m->m_page = PAGE_MASTER;
m++;
/*
m->m_title = "web spidering enabled";
m->m_desc = "Spiders events on web";
m->m_cgi = "wse";
m->m_off = (char *)&g_conf.m_webSpideringEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
m->m_title = "local add url enabled";
m->m_desc = "Overrides all add urls for all collections on just this "
"host.";
m->m_cgi = "ae";
m->m_off = (char *)&g_conf.m_addUrlEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_cast = 0;
m++;
m->m_title = "use temporary cluster";
m->m_desc = "Used by proxy to point to a temporary cluster while the "
"original cluster is updated with a new binary. The "
"temporary cluster is the same as the original cluster but "
"the ports are all incremented by one from what is in "
"the hosts.conf. This should ONLY be used for the proxy.";
m->m_cgi = "aotp";
m->m_off = (char *)&g_conf.m_useTmpCluster - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "url injection enabled";
m->m_desc = "If enabled you can directly inject URLs into the index.";
m->m_cgi = "ie";
m->m_off = (char *)&g_conf.m_injectionEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
m->m_title = "init parser test run";
m->m_desc = "If enabled gb injects the urls in the "
"./test-parser/urls.txt "
"file and outputs ./test-parser/qa.html";
m->m_cgi = "qaptei";
m->m_type = TYPE_CMD;
m->m_func = CommandParserTestInit;
m->m_def = "1";
m->m_cast = 1;
m++;
m->m_title = "init spider test run";
m->m_desc = "If enabled gb injects the urls in "
"./test-spider/spider.txt "
"and spiders links.";
m->m_cgi = "qasptei";
m->m_type = TYPE_CMD;
m->m_func = CommandSpiderTestInit;
m->m_def = "1";
m->m_cast = 1;
m->m_group = 0;
m++;
m->m_title = "continue spider test run";
m->m_desc = "Resumes the test.";
m->m_cgi = "qaspter";
m->m_type = TYPE_CMD;
m->m_func = CommandSpiderTestCont;
m->m_def = "1";
m->m_cast = 1;
m->m_group = 0;
m++;
/*
m->m_title = "do docid range splitting";
m->m_desc = "Split msg39 docids into ranges to save mem?";
m->m_cgi = "ddrs";
m->m_off = (char *)&g_conf.m_doDocIdRangeSplitting - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
m->m_title = "qa search test enabled";
m->m_desc = "If enabled gb does the search queries in "
"./test-search/queries.txt and compares to the last run and "
"outputs the diffs for inspection and validation.";
m->m_cgi = "qasste";
m->m_off = (char *)&g_conf.m_testSearchEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
//m->m_cast = 0;
m->m_group = 0;
m++;
m->m_title = "just save";
m->m_desc = "Copies the data in memory to disk for just this host. "
"Does Not exit.";
m->m_cgi = "js";
m->m_type = TYPE_CMD;
m->m_func = CommandJustSave;
m->m_page = PAGE_MASTER;
m->m_cast = 0;
m++;
m->m_title = "all just save";
m->m_desc = "Saves the data for all hosts. Does Not exit.";
m->m_cgi = "js";
m->m_type = TYPE_CMD;
m++;
m->m_title = "all spiders on";
m->m_desc = "Enable spidering on all hosts";
m->m_cgi = "se";
m->m_def = "1";
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
m->m_type = TYPE_BOOL2; // no yes or no, just a link
m++;
m->m_title = "all spiders off";
m->m_desc = "Disable spidering on all hosts";
m->m_cgi = "se";
m->m_def = "0";
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
m->m_type = TYPE_BOOL2; // no yes or no, just a link
m++;
m->m_title = "save & exit";
m->m_desc = "Copies the data in memory to disk for just this host "
"and then shuts down the gb process.";
m->m_cgi = "save";
m->m_type = TYPE_CMD;
m->m_func = CommandSaveAndExit;
m->m_cast = 0;
m++;
m->m_title = "urgent save & exit";
m->m_desc = "Copies the data in memory to disk for just this host "
"and then shuts down the gb process.";
m->m_cgi = "usave";
m->m_type = TYPE_CMD;
m->m_func = CommandUrgentSaveAndExit;
m->m_cast = 0;
m->m_priv = 4;
m++;
m->m_title = "all save & exit";
m->m_desc = "Saves the data and exits for all hosts.";
m->m_cgi = "save";
m->m_type = TYPE_CMD;
m++;
m->m_title = "dump to disk";
m->m_desc = "Flushes all records in memory to the disk.";
m->m_cgi = "dump";
m->m_type = TYPE_CMD;
m->m_func = CommandDiskDump;
m->m_cast = 0;
m++;
m->m_title = "tight merge posdb";
m->m_desc = "Merges all outstanding indexdb files.";
m->m_cgi = "pmerge";
m->m_type = TYPE_CMD;
m->m_func = CommandMergePosdb;
m->m_cast = 1;
m++;
//m->m_title = "tight merge sectiondb";
//m->m_desc = "Merges all outstanding sectiondb files.";
//m->m_cgi = "smerge";
//m->m_type = TYPE_CMD;
//m->m_func = CommandMergeSectiondb;
//m->m_cast = 1;
//m++;
m->m_title = "tight merge titledb";
m->m_desc = "Merges all outstanding titledb files.";
m->m_cgi = "tmerge";
m->m_type = TYPE_CMD;
m->m_func = CommandMergeTitledb;
m->m_cast = 1;
m++;
m->m_title = "tight merge spiderdb";
m->m_desc = "Merges all outstanding spiderdb files.";
m->m_cgi = "spmerge";
m->m_type = TYPE_CMD;
m->m_func = CommandMergeSpiderdb;
m->m_cast = 1;
m++;
m->m_title = "disk page cache off";
m->m_desc = "Disable all disk page caches to save mem for "
"tmp cluster. Just for this host. Run "
"gb cacheoff to do for all hosts.";
m->m_cgi = "dpco";
m->m_type = TYPE_CMD;
m->m_func = CommandDiskPageCacheOff;
m->m_cast = 0;
m++;
//m->m_title = "http server enabled";
//m->m_desc = "Disable this if you do not want anyone hitting your "
// "http server. Admin and local IPs are still permitted, "
// "however.";
//m->m_cgi = "hse";
//m->m_off = (char *)&g_conf.m_httpServerEnabled - g;
//m->m_type = TYPE_BOOL;
//m->m_def = "1";
//m++;
m->m_title = "ad feed enabled";
m->m_desc = "Serves ads unless pure=1 is in cgi parms.";
m->m_cgi = "afe";
m->m_off = (char *)&g_conf.m_adFeedEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_scgi = "ads";
m->m_soff = (char *)&si.m_adFeedEnabled - y;
m->m_sparm = 1;
m->m_priv = 2;
m++;
m->m_title = "do stripe balancing";
m->m_desc = "Stripe #n contains twin #n from each group. Doing "
"stripe balancing helps prevent too many query requests "
"coming into one host. This parm is only for the proxy. "
"Stripe balancing is done by default unless the parm is "
"disabled on the proxy in which case it appends a "
"&dsb=0 to the query url it sends to the host. The proxy "
"alternates to which host it forwards the incoming query "
"based on the stripe. It takes the number of query terms in "
"the query into account to make a more even balance.";
m->m_cgi = "dsb";
m->m_off = (char *)&g_conf.m_doStripeBalancing - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
//m->m_scgi = "dsb";
//m->m_soff = (char *)&si.m_doStripeBalancing - y;
//m->m_sparm = 1;
m++;
m->m_title = "is live cluster";
m->m_desc = "Is this cluster part of a live production cluster? "
"If this is true we make sure that elvtune is being "
"set properly for best performance, otherwise, gb will "
"not startup.";
m->m_cgi = "live";
m->m_off = (char *)&g_conf.m_isLive - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "is BuzzLogic";
m->m_desc = "Is this a BuzzLogic cluster?";
m->m_cgi = "isbuzz";
m->m_off = (char *)&g_conf.m_isBuzzLogic - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
// we use wikipedia cluster for quick categorization
m->m_title = "is wikipedia cluster";
m->m_desc = "Is this cluster just used for indexing wikipedia pages?";
m->m_cgi = "iswiki";
m->m_off = (char *)&g_conf.m_isWikipedia - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "max hard drive temperature";
m->m_desc = "At what temperature in Celsius should we send "
"an email alert if a hard drive reaches it?";
m->m_cgi = "mhdt";
m->m_off = (char *)&g_conf.m_maxHardDriveTemp - g;
m->m_type = TYPE_LONG;
m->m_def = "45";
m++;
m->m_title = "max heartbeat delay in milliseconds";
m->m_desc = "If a heartbeat is delayed this many milliseconds "
"dump a core so we can see where the CPU was. "
"Logs 'db: missed heartbeat by %lli ms'. "
"Use 0 or less to disable.";
m->m_cgi = "mhdms";
m->m_off = (char *)&g_conf.m_maxHeartbeatDelay - g;
m->m_type = TYPE_LONG;
m->m_def = "0";
m++;
m->m_title = "max delay before logging a callback or handler";
m->m_desc = "If a call to a message callback or message handler "
"in the udp server takes more than this many milliseconds, "
"then log it. "
"Logs 'udp: Took %lli ms to call callback for msgType="
"0x%hhx niceness=%li'. "
"Use -1 or less to disable the logging.";
m->m_cgi = "mdch";
m->m_off = (char *)&g_conf.m_maxCallbackDelay - g;
m->m_type = TYPE_LONG;
m->m_def = "-1";
m->m_group = 0;
m++;
m->m_title = "send email alerts";
m->m_desc = "Sends emails to admin if a host goes down.";
m->m_cgi = "sea";
m->m_off = (char *)&g_conf.m_sendEmailAlerts - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m++;
//m->m_title = "send email alerts to matt at tmobile 450-3518";
//m->m_desc = "Sends to cellphone.";
//m->m_cgi = "seatmt";
//m->m_off = (char *)&g_conf.m_sendEmailAlertsToMattTmobile - g;
//m->m_type = TYPE_BOOL;
//m->m_def = "1";
//m->m_priv = 2;
//m->m_group = 0;
//m++;
//m->m_title = "send email alerts to matt at alltel 362-6809";
/*
m->m_title = "send email alerts to matt at alltel 450-3518";
m->m_desc = "Sends to cellphone.";
m->m_cgi = "seatmv";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToMattAlltell - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send email alerts to javier";
m->m_desc = "Sends to cellphone.";
m->m_cgi = "seatj";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToJavier - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
*/
// m->m_title = "send email alerts to melissa";
// m->m_desc = "Sends to cell phone.";
// m->m_cgi = "seatme";
// m->m_off = (char *)&g_conf.m_sendEmailAlertsToMelissa - g;
// m->m_type = TYPE_BOOL;
// m->m_def = "0";
// m->m_priv = 2;
// m->m_group = 0;
// m++;
/*
m->m_title = "send email alerts to partap";
m->m_desc = "Sends to cell phone.";
m->m_cgi = "seatp";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToPartap - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
*/
// m->m_title = "send email alerts to cinco";
// m->m_desc = "Sends to cell phone.";
// m->m_cgi = "seatc";
// m->m_off = (char *)&g_conf.m_sendEmailAlertsToCinco - g;
// m->m_type = TYPE_BOOL;
// m->m_def = "0";
// m->m_priv = 2;
// m->m_group = 0;
// m++;
m->m_title = "send email alerts to sysadmin";
m->m_desc = "Sends to sysadmin@gigablast.com.";
m->m_cgi = "seatsa";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToSysadmin - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_priv = 2;
m->m_group = 0;
m++;
/*
m->m_title = "send email alerts to zak";
m->m_desc = "Sends to zak@gigablast.com.";
m->m_cgi = "seatz";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToZak - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send email alerts to sabino";
m->m_desc = "Sends to cell phone.";
m->m_cgi = "seatms";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToSabino - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
*/
m->m_title = "send email alerts to email 1";
m->m_desc = "Sends to email address 1 through email server 1.";
m->m_cgi = "seatone";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail1 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send parm change email alerts to email 1";
m->m_desc = "Sends to email address 1 through email server 1 if "
"any parm is changed.";
m->m_cgi = "seatonep";
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail1 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email server 1";
m->m_desc = "Connects to this server directly when sending email 1 ";
m->m_cgi = "esrvone";
m->m_off = (char *)&g_conf.m_email1MX - g;
m->m_type = TYPE_STRING;
m->m_def = "mail.gigablast.com";
m->m_size = MAX_MX_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email address 1";
m->m_desc = "Sends to this address when sending email 1 ";
m->m_cgi = "eaddrone";
m->m_off = (char *)&g_conf.m_email1Addr - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "from email address 1";
m->m_desc = "The from field when sending email 1 ";
m->m_cgi = "efaddrone";
m->m_off = (char *)&g_conf.m_email1From - g;
m->m_type = TYPE_STRING;
m->m_def = "sysadmin@gigablast.com";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send email alerts to email 2";
m->m_desc = "Sends to email address 2 through email server 2.";
m->m_cgi = "seattwo";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail2 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send parm change email alerts to email 2";
m->m_desc = "Sends to email address 2 through email server 2 if "
"any parm is changed.";
m->m_cgi = "seattwop";
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail2 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email server 2";
m->m_desc = "Connects to this server directly when sending email 2 ";
m->m_cgi = "esrvtwo";
m->m_off = (char *)&g_conf.m_email2MX - g;
m->m_type = TYPE_STRING;
m->m_def = "mail.gigablast.com";
m->m_size = MAX_MX_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email address 2";
m->m_desc = "Sends to this address when sending email 2 ";
m->m_cgi = "eaddrtwo";
m->m_off = (char *)&g_conf.m_email2Addr - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "from email address 2";
m->m_desc = "The from field when sending email 2 ";
m->m_cgi = "efaddrtwo";
m->m_off = (char *)&g_conf.m_email2From - g;
m->m_type = TYPE_STRING;
m->m_def = "sysadmin@gigablast.com";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send email alerts to email 3";
m->m_desc = "Sends to email address 3 through email server 3.";
m->m_cgi = "seatthree";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail3 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send parm change email alerts to email 3";
m->m_desc = "Sends to email address 3 through email server 3 if "
"any parm is changed.";
m->m_cgi = "seatthreep";
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail3 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email server 3";
m->m_desc = "Connects to this server directly when sending email 3 ";
m->m_cgi = "esrvthree";
m->m_off = (char *)&g_conf.m_email3MX - g;
m->m_type = TYPE_STRING;
m->m_def = "mail.gigablast.com";
m->m_size = MAX_MX_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email address 3";
m->m_desc = "Sends to this address when sending email 3 ";
m->m_cgi = "eaddrthree";
m->m_off = (char *)&g_conf.m_email3Addr - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "from email address 3";
m->m_desc = "The from field when sending email 3 ";
m->m_cgi = "efaddrthree";
m->m_off = (char *)&g_conf.m_email3From - g;
m->m_type = TYPE_STRING;
m->m_def = "sysadmin@gigablast.com";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send email alerts to email 4";
m->m_desc = "Sends to email address 4 through email server 4.";
m->m_cgi = "seatfour";
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail4 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "send parm change email alerts to email 4";
m->m_desc = "Sends to email address 4 through email server 4 if "
"any parm is changed.";
m->m_cgi = "seatfourp";
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail4 - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email server 4";
m->m_desc = "Connects to this server directly when sending email 4 ";
m->m_cgi = "esrvfour";
m->m_off = (char *)&g_conf.m_email4MX - g;
m->m_type = TYPE_STRING;
m->m_def = "mail.gigablast.com";
m->m_size = MAX_MX_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "email address 4";
m->m_desc = "Sends to this address when sending email 4 ";
m->m_cgi = "eaddrfour";
m->m_off = (char *)&g_conf.m_email4Addr - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "from email address 4";
m->m_desc = "The from field when sending email 4 ";
m->m_cgi = "efaddrfour";
m->m_off = (char *)&g_conf.m_email4From - g;
m->m_type = TYPE_STRING;
m->m_def = "sysadmin@gigablast.com";
m->m_size = MAX_EMAIL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "delay non critical email alerts";
m->m_desc = "Do not send email alerts about dead hosts to "
"anyone except sysadmin@gigablast.com between the times "
"given below unless all the twins of the dead host are "
"also dead. Instead, wait till after if the host "
"is still dead. ";
m->m_cgi = "dnca";
m->m_off = (char *)&g_conf.m_delayNonCriticalEmailAlerts - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m->m_group = 0;
m++;
/*
m->m_title = "delay emails after";
m->m_desc = "If delay non critical email alerts is on, don't send "
"emails after this time. Time is hh:mm. Time is take from "
"host #0's system clock in UTC.";
m->m_cgi = "dea";
m->m_off = (char *)&g_conf.m_delayEmailsAfter - g;
m->m_type = TYPE_TIME; // time format -- very special
m->m_def = "00:00";
m->m_priv = 2;
m++;
m->m_title = "delay emails before";
m->m_desc = "If delay non critical email alerts is on, don't send "
"emails before this time. Time is hh:mm Time is take from "
"host #0's system clock in UTC.";
m->m_cgi = "deb";
m->m_off = (char *)&g_conf.m_delayEmailsBefore - g;
m->m_type = TYPE_TIME; // time format -- very special
m->m_def = "00:00";
m->m_priv = 2;
m++;
*/
/*
Disable this until it works.
m->m_title = "use merge token";
m->m_desc = "If used, prevents twins, or hosts on the same ide "
"channel, from merging simultaneously.";
m->m_cgi = "umt";
m->m_off = (char *)&g_conf.m_useMergeToken - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
*/
m->m_title = "error string 1";
m->m_desc = "Look for this string in the kernel buffer for sending "
"email ";
m->m_cgi = "errstrone";
m->m_off = (char *)&g_conf.m_errstr1 - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_URL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "error string 2";
m->m_desc = "Look for this string in the kernel buffer for sending "
"email ";
m->m_cgi = "errstrtwo";
m->m_off = (char *)&g_conf.m_errstr2 - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_URL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "error string 3";
m->m_desc = "Look for this string in the kernel buffer for sending "
"email ";
m->m_cgi = "errstrthree";
m->m_off = (char *)&g_conf.m_errstr3 - g;
m->m_type = TYPE_STRING;
m->m_def = "";
m->m_size = MAX_URL_LEN;
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "prefer local reads";
m->m_desc = "If you have scsi drives or a slow network, say yes here "
"to minimize data fetches across the network.";
m->m_cgi = "plr";
m->m_off = (char *)&g_conf.m_preferLocalReads - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "use biased tfndb";
m->m_desc = "Should we always send titledb record lookup requests "
"to a particular host in order to increase tfndb page cache "
"hits? This bypasses load balancing and may result in "
"slower hosts being more of a bottleneck. Keep this disabled "
"unless you notice tfndb disk seeks slowing things down.";
m->m_cgi = "ubu";
m->m_off = (char *)&g_conf.m_useBiasedTfndb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
m->m_title = "do synchronous writes";
m->m_desc = "If enabled then all writes will be flushed to disk. "
"This is generally a good thing.";
m->m_cgi = "fw";
m->m_off = (char *)&g_conf.m_flushWrites - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "verify disk writes";
m->m_desc = "Read what was written in a verification step. Decreases "
"performance, but may help fight disk corruption mostly on "
"Maxtors and Western Digitals.";
m->m_cgi = "vdw";
m->m_off = (char *)&g_conf.m_verifyWrites - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
// this is ifdef'd out in Msg3.cpp for performance reasons,
// so do it here, too
#ifdef _SANITY_CHECK_
m->m_title = "max corrupted read retries";
m->m_desc = "How many times to retry disk reads that had corrupted "
"data before requesting the list from a twin, and, if that "
"fails, removing the bad data.";
m->m_cgi = "crr";
m->m_off = (char *)&g_conf.m_corruptRetries - g;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m++;
#endif
m->m_title = "do incremental updating";
m->m_desc = "When reindexing a document, do not re-add data "
"that should already be in index or clusterdb "
"since the last time the document was indexed. Otherwise, "
"re-add the data regardless.";
m->m_cgi = "oic";
//m->m_off = (char *)&g_conf.m_onlyAddUnchangedTermIds - g;
m->m_off = (char *)&g_conf.m_doIncrementalUpdating - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
// you can really screw up the index if this is false, so
// comment it out for now
/*
m->m_title = "index deletes";
m->m_desc = "Should we allow indexdb recs to be deleted? This is "
"always true, except in very rare indexdb rebuilds.";
m->m_cgi = "id";
m->m_off = (char *)&g_conf.m_indexDeletes - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
m->m_title = "use etc hosts";
m->m_desc = "Use /etc/hosts file to resolve hostnames? the "
"/etc/host file is reloaded every minute, so if you make "
"a change to it you might have to wait one minute for the "
"change to take affect.";
m->m_cgi = "ueh";
m->m_off = (char *)&g_conf.m_useEtcHosts - g;
m->m_def = "0";
m->m_type = TYPE_BOOL;
m++;
m->m_title = "twins are split";
m->m_desc = "If enabled, Gigablast assumes the first half of "
"machines in hosts.conf "
"are on a different network switch than the second half, "
"and minimizes transmits between the switches.";
m->m_cgi = "stw";
m->m_off = (char *)&g_conf.m_splitTwins - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "do out of memory testing";
m->m_desc = "When enabled Gigablast will randomly fail at "
"allocating memory. Used for testing stability.";
m->m_cgi = "dot";
m->m_off = (char *)&g_conf.m_testMem - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "do consistency testing";
m->m_desc = "When enabled Gigablast will make sure it reparses "
"the document exactly the same way. It does this every "
"1000th document anyway, but enabling this makes it do it "
"for every document.";
m->m_cgi = "dct";
m->m_off = (char *)&g_conf.m_doConsistencyTesting - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "use shotgun";
m->m_desc = "If enabled, all servers must have two gigabit "
"ethernet ports hooked up and Gigablast will round robin "
"packets between both ethernet ports when sending to another "
"host. Can speed up network transmissions as much as 2x.";
m->m_cgi = "usht";
m->m_off = (char *)&g_conf.m_useShotgun - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "use quickpoll";
m->m_desc = "If enabled, Gigablast will use quickpoll. Significantly "
"improves performance. Only turn this off for testing.";
m->m_cgi = "uqp";
m->m_off = (char *)&g_conf.m_useQuickpoll - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
// m->m_title = "quickpoll core on error";
// m->m_desc = "If enabled, quickpoll will terminate the process and "
// "generate a core file when callbacks are called with the "
// "wrong niceness.";
// m->m_cgi = "qpoe";
// m->m_off = (char *)&g_conf.m_quickpollCoreOnError - g;
// m->m_type = TYPE_BOOL;
// m->m_def = "1";
// m++;
m->m_title = "use threads";
m->m_desc = "If enabled, Gigablast will use threads.";
m->m_cgi = "ut";
m->m_off = (char *)&g_conf.m_useThreads - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
// . this will leak the shared mem if the process is Ctrl+C'd
// . that is expected behavior
// . you can clean up the leaks using 'gb freecache 20000000'
// and use 'ipcs -m' to see what leaks you got
// . generally, only the main gb should use shared mem, so
// keep this off for teting
m->m_title = "use shared mem";
m->m_desc = "If enabled, Gigablast will use shared memory. "
"Should really only be used on the live cluster, "
"keep this on the testing cluster since it can "
"leak easily.";
m->m_cgi = "ushm";
m->m_off = (char *)&g_conf.m_useSHM - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
// disable disk caches... for testing really
/*
m->m_title = "use disk page cache for indexdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpci";
m->m_off = (char *)&g_conf.m_useDiskPageCacheIndexdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
*/
m->m_title = "use disk page cache for posdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpci";
m->m_off = (char *)&g_conf.m_useDiskPageCachePosdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "use disk page cache for datedb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcd";
m->m_off = (char *)&g_conf.m_useDiskPageCacheDatedb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "use disk page cache for titledb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpct";
m->m_off = (char *)&g_conf.m_useDiskPageCacheTitledb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use disk page cache for spiderdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcs";
m->m_off = (char *)&g_conf.m_useDiskPageCacheSpiderdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
m->m_title = "use disk page cache for urldb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcu";
m->m_off = (char *)&g_conf.m_useDiskPageCacheTfndb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
m->m_title = "use disk page cache for tagdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcg";
m->m_off = (char *)&g_conf.m_useDiskPageCacheTagdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use disk page cache for checksumdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpck";
m->m_off = (char *)&g_conf.m_useDiskPageCacheChecksumdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use disk page cache for clusterdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcl";
m->m_off = (char *)&g_conf.m_useDiskPageCacheClusterdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use disk page cache for catdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpca";
m->m_off = (char *)&g_conf.m_useDiskPageCacheCatdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use disk page cache for linkdb";
m->m_desc = "Use disk page cache?";
m->m_cgi = "udpcnk";
m->m_off = (char *)&g_conf.m_useDiskPageCacheLinkdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
m->m_title = "exclude link text";
m->m_desc = "Exclude search results that have one or more query "
"that only appear in the incoming link text";
m->m_cgi = "exlt";
m->m_off = (char *)&g_conf.m_excludeLinkText - g;
m->m_sparm = 1;
m->m_soff = (char *)&si.m_excludeLinkText - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_scgi = "excludelinktext";
m++;
m->m_title = "exclude meta text";
m->m_desc = "Exclude search results that have one or more query "
"that only appear in the meta text";
m->m_cgi = "exmt";
m->m_off = (char *)&g_conf.m_excludeMetaText - g;
m->m_sparm = 1;
m->m_soff = (char *)&si.m_excludeMetaText - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_scgi = "excludemetatext";
m++;
*/
m->m_title = "scan all if not found";
m->m_desc = "Scan all titledb files if rec not found. You should "
"keep this on to avoid corruption. Do not turn it off unless "
"you are Matt Wells.";
m->m_cgi = "sainf";
m->m_off = (char *)&g_conf.m_scanAllIfNotFound - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "interface machine";
m->m_desc = "for specifying if this is an interface machine"
"messages are rerouted from this machine to the main"
"cluster set in the hosts.conf.";
m->m_cgi = "intmch";
m->m_off = (char *)&g_conf.m_interfaceMachine - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m++;
m->m_title = "generate vector at query time";
m->m_desc = "At query time, should Gigablast generate content "
"vectors for title records lacking them? This is an "
"expensive operation, so is really just for testing purposes.";
m->m_cgi = "gv";
m->m_off = (char *)&g_conf.m_generateVectorAtQueryTime - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "autoban IPs which violate the queries per day quotas";
m->m_desc = "Keep track of ips which do queries, disallow "
"non-customers from hitting us too hard.";
m->m_cgi = "ab";
m->m_off = (char *)&g_conf.m_doAutoBan - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
if ( g_isYippy ) {
m->m_title = "Max outstanding search requests out for yippy";
m->m_desc = "Max outstanding search requests out for yippy";
m->m_cgi = "ymo";
m->m_off = (char *)&g_conf.m_maxYippyOut - g;
m->m_type = TYPE_LONG;
m->m_def = "150";
m++;
}
m->m_title = "free queries per day ";
m->m_desc = "Non-customers get this many queries per day before"
"being autobanned";
m->m_cgi = "nfqpd";
m->m_off = (char *)&g_conf.m_numFreeQueriesPerDay - g;
m->m_type = TYPE_LONG;
m->m_def = "1024";
m++;
m->m_title = "free queries per minute ";
m->m_desc = "Non-customers get this many queries per minute before"
"being autobanned";
m->m_cgi = "nfqpm";
m->m_off = (char *)&g_conf.m_numFreeQueriesPerMinute - g;
m->m_type = TYPE_CHAR;
m->m_def = "30";
m++;
m->m_title = "redirect non-raw traffic";
m->m_desc = "If this is non empty, http traffic will be redirected "
"to the specified address.";
m->m_cgi = "redir";
m->m_off = (char *)&g_conf.m_redirect - g;
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_def = "";
m++;
m->m_title = "send requests to compression proxy";
m->m_desc = "If this is true, gb will route download requests for"
" web pages to proxies in hosts.conf. Proxies will"
" download and compress docs before sending back. ";
m->m_cgi = "srtcp";
m->m_off = (char *)&g_conf.m_useCompressionProxy - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "synchronize proxy to cluster time";
m->m_desc = "Enable/disable the ability to synchronize time between "
"the cluster and the proxy";
m->m_cgi = "sptct";
m->m_off = (char *)&g_conf.m_timeSyncProxy - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "use data feed account server";
m->m_desc = "Enable/disable the use of a remote account verification "
"for Data Feed Customers. This should ONLY be used for the "
"proxy.";
m->m_cgi = "pdfuas";
m->m_off = (char *)&g_conf.m_useDFAcctServer - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "data feed server ip";
m->m_desc = "The ip address of the Gigablast data feed server to "
"retrieve customer account information from. This should ONLY "
"be used for the proxy.";
m->m_cgi = "pdfip";
m->m_off = (char *)&g_conf.m_dfAcctIp - g;
m->m_type = TYPE_IP;
m->m_def = "2130706433";
m->m_group = 0;
m++;
m->m_title = "data feed server port";
m->m_desc = "The port of the Gigablast data feed server to retrieve "
"customer account information from. This should ONLY be used "
"for the proxy";
m->m_cgi = "pdfport";
m->m_off = (char *)&g_conf.m_dfAcctPort - g;
m->m_type = TYPE_LONG;
m->m_def = "8040";
m->m_group = 0;
m++;
m->m_title = "data feed server collection";
m->m_desc = "The collection on the Gigablast data feed server to "
"retrieve customer account information from. This should ONLY "
"be used for the proxy.";
m->m_cgi = "pdfcoll";
m->m_off = (char *)&g_conf.m_dfAcctColl - g;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN;
m->m_def = "customers";
m->m_group = 0;
m++;
*/
m->m_title = "allow scaling of hosts";
m->m_desc = "Allows scaling up of hosts by deleting recs not in "
"the correct group. This should only happen why copying "
"a set of servers to the new hosts. Otherwise corrupted "
"data will cause a halt.";
m->m_cgi = "asoh";
m->m_off = (char *)&g_conf.m_allowScale - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "allow bypass of db validation";
m->m_desc = "Allows bypass of db validation so gigablast will not "
"halt if a corrupt db is discovered durring load. Use this "
"when attempting to load with a collection that has known "
"corruption.";
m->m_cgi = "abov";
m->m_off = (char *)&g_conf.m_bypassValidation - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "reload language pages";
m->m_desc = "Reloads language specific pages.";
m->m_cgi = "rlpages";
m->m_type = TYPE_CMD;
m->m_func = CommandReloadLanguagePages;
m->m_cast = 0;
m++;
m->m_title = "all reload language pages";
m->m_desc = "Reloads language specific pages for all hosts.";
m->m_cgi = "rlpages";
m->m_type = TYPE_CMD;
m++;
m->m_title = "clear kernel error message";
m->m_desc = "clears the kernel error message that the host may be "
"sending to other hosts.";
m->m_cgi = "clrkrnerr";
m->m_type = TYPE_CMD;
m->m_func = CommandClearKernelError;
m->m_cast = 0;
m++;
// do we need this any more?
/*
m->m_title = "give up on dead hosts";
m->m_desc = "Give up requests to dead hosts. Only set this when you "
"know a host is dead and will not come back online without "
"a restarting all hosts. Messages will timeout on the dead "
"host but will not error, allowing outstanding spidering to "
"finish to the twin.";
m->m_cgi = "gvup";
m->m_off = (char *)&g_conf.m_giveupOnDeadHosts - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
/*
m->m_title = "ask root name servers";
m->m_desc = "if enabled Gigablast will direct DNS requests to "
"the root DNS servers, otherwise it will continue to "
"send DNS queries to the bind9 servers defined in "
"the Master Controls.";
m->m_cgi = "bdns";
m->m_off = (char *)&g_conf.m_askRootNameservers - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
*/
/*
m->m_title = "do dig sanity checks";
m->m_desc = "call dig @nameServer hostname and on timedout lookups"
" and see if dig also timed out";
m->m_cgi = "dig";
m->m_off = (char *)&g_conf.m_useDig - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
/*
m->m_title = "dns root name server 1";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsa";
m->m_off = (char *)&g_conf.m_rnsIps[0] - g;
m->m_type = TYPE_IP;
m->m_def = "192.228.79.201";
m++;
m->m_title = "dns root name server 2";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsb";
m->m_off = (char *)&g_conf.m_rnsIps[1] - g;
m->m_type = TYPE_IP;
m->m_def = "192.33.4.12";
m++;
m->m_title = "dns root name server 3";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsc";
m->m_off = (char *)&g_conf.m_rnsIps[2] - g;
m->m_type = TYPE_IP;
m->m_def = "128.8.10.90";
m++;
m->m_title = "dns root name server 4";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsd";
m->m_off = (char *)&g_conf.m_rnsIps[3] - g;
m->m_type = TYPE_IP;
m->m_def = "192.203.230.10";
m++;
m->m_title = "dns root name server 5";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnse";
m->m_off = (char *)&g_conf.m_rnsIps[4] - g;
m->m_type = TYPE_IP;
m->m_def = "192.5.5.241";
m++;
m->m_title = "dns root name server 6";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsf";
m->m_off = (char *)&g_conf.m_rnsIps[5] - g;
m->m_type = TYPE_IP;
m->m_def = "192.112.36.4";
m++;
m->m_title = "dns root name server 7";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsg";
m->m_off = (char *)&g_conf.m_rnsIps[6] - g;
m->m_type = TYPE_IP;
m->m_def = "128.63.2.53";
m++;
m->m_title = "dns root name server 8";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsh";
m->m_off = (char *)&g_conf.m_rnsIps[7] - g;
m->m_type = TYPE_IP;
m->m_def = "192.36.148.17";
m++;
m->m_title = "dns root name server 9";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsi";
m->m_off = (char *)&g_conf.m_rnsIps[8] - g;
m->m_type = TYPE_IP;
m->m_def = "192.58.128.30";
m++;
m->m_title = "dns root name server 10";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsj";
m->m_off = (char *)&g_conf.m_rnsIps[9] - g;
m->m_type = TYPE_IP;
m->m_def = "193.0.14.129";
m++;
m->m_title = "dns root name server 11";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsk";
m->m_off = (char *)&g_conf.m_rnsIps[10] - g;
m->m_type = TYPE_IP;
m->m_def = "198.32.64.12";
m++;
m->m_title = "dns root name server 12";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsl";
m->m_off = (char *)&g_conf.m_rnsIps[11] - g;
m->m_type = TYPE_IP;
m->m_def = "202.12.27.33";
m++;
m->m_title = "dns root name server 13";
m->m_desc = "IP address of a DNS root server. Assumes UDP "
"port 53.";
m->m_cgi = "rnsm";
m->m_off = (char *)&g_conf.m_rnsIps[12] - g;
m->m_type = TYPE_IP;
m->m_def = "198.41.0.4";
m++;
*/
m->m_title = "dns 0";
m->m_desc = "IP address of the primary DNS server. Assumes UDP "
"port 53.";
m->m_cgi = "pdns";
m->m_off = (char *)&g_conf.m_dnsIps[0] - g;
m->m_type = TYPE_IP;
// default to google public dns #1
m->m_def = "8.8.8.8";
m++;
m->m_title = "dns 1";
m->m_desc = "IP address of the secondary DNS server. Assumes UDP "
"port 53. Will be accessed in conjunction with the primary "
"dns, so make sure this is always up. An ip of 0 means "
"disabled.";
m->m_cgi = "sdns";
m->m_off = (char *)&g_conf.m_dnsIps[1] - g;
m->m_type = TYPE_IP;
// default to google public dns #2
m->m_def = "8.8.4.4";
m->m_group = 0;
m++;
m->m_title = "dns 2";
m->m_desc = "";
m->m_cgi = "sdnsa";
m->m_off = (char *)&g_conf.m_dnsIps[2] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 3";
m->m_desc = "";
m->m_cgi = "sdnsb";
m->m_off = (char *)&g_conf.m_dnsIps[3] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 4";
m->m_desc = "";
m->m_cgi = "sdnsc";
m->m_off = (char *)&g_conf.m_dnsIps[4] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 5";
m->m_desc = "";
m->m_cgi = "sdnsd";
m->m_off = (char *)&g_conf.m_dnsIps[5] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 6";
m->m_desc = "";
m->m_cgi = "sdnse";
m->m_off = (char *)&g_conf.m_dnsIps[6] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 7";
m->m_desc = "";
m->m_cgi = "sdnsf";
m->m_off = (char *)&g_conf.m_dnsIps[7] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 8";
m->m_desc = "";
m->m_cgi = "sdnsg";
m->m_off = (char *)&g_conf.m_dnsIps[8] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 9";
m->m_desc = "";
m->m_cgi = "sdnsh";
m->m_off = (char *)&g_conf.m_dnsIps[9] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 10";
m->m_desc = "";
m->m_cgi = "sdnsi";
m->m_off = (char *)&g_conf.m_dnsIps[10] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 11";
m->m_desc = "";
m->m_cgi = "sdnsj";
m->m_off = (char *)&g_conf.m_dnsIps[11] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 12";
m->m_desc = "";
m->m_cgi = "sdnsk";
m->m_off = (char *)&g_conf.m_dnsIps[12] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 13";
m->m_desc = "";
m->m_cgi = "sdnsl";
m->m_off = (char *)&g_conf.m_dnsIps[13] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 14";
m->m_desc = "";
m->m_cgi = "sdnsm";
m->m_off = (char *)&g_conf.m_dnsIps[14] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dns 15";
m->m_desc = "";
m->m_cgi = "sdnsn";
m->m_off = (char *)&g_conf.m_dnsIps[15] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "geocoder IP #1";
m->m_desc = "";
m->m_cgi = "gca";
m->m_off = (char *)&g_conf.m_geocoderIps[0] - g;
m->m_type = TYPE_IP;
m->m_def = "10.5.66.11"; // sp1
m++;
m->m_title = "geocoder IP #2";
m->m_desc = "";
m->m_cgi = "gcb";
m->m_off = (char *)&g_conf.m_geocoderIps[1] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "geocoder IP #3";
m->m_desc = "";
m->m_cgi = "gcc";
m->m_off = (char *)&g_conf.m_geocoderIps[2] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "geocoder IP #4";
m->m_desc = "";
m->m_cgi = "gcd";
m->m_off = (char *)&g_conf.m_geocoderIps[3] - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "wiki proxy ip";
m->m_desc = "Access the wiki coll through this proxy ip";
m->m_cgi = "wpi";
m->m_off = (char *)&g_conf.m_wikiProxyIp - g;
m->m_type = TYPE_IP;
m->m_def = "0";
m++;
m->m_title = "wiki proxy port";
m->m_desc = "Access the wiki coll through this proxy port";
m->m_cgi = "wpp";
m->m_off = (char *)&g_conf.m_wikiProxyPort - g;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "cluster name";
m->m_desc = "Email alerts will include the cluster name";
m->m_cgi = "cn";
m->m_off = (char *)&g_conf.m_clusterName - g;
m->m_type = TYPE_STRING;
m->m_size = 32;
m->m_def = "unspecified";
m++;
m->m_title = "spider user agent";
m->m_desc = "Identification seen by web servers when "
"the Gigablast spider downloads their web pages. "
"It is polite to insert a contact email address here so "
"webmaster that experience problems from the Gigablast "
"spider have somewhere to vent.";
m->m_cgi = "sua";
m->m_off = (char *)&g_conf.m_spiderUserAgent - g;
m->m_type = TYPE_STRING;
m->m_size = USERAGENTMAXSIZE;
m->m_def = "GigaBot/1.0";
m++;
m->m_title = "ask for gzipped docs when downloading";
m->m_desc = "If this is true, gb will send accept-encoding: gzip"
"when doing http downloads.";
m->m_cgi = "afgdwd";
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "default collection";
m->m_desc = "When no collection is explicitly specified, assume "
"this collection name.";
m->m_cgi = "dcn";
m->m_off = (char *)&g_conf.m_defaultColl - g;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN+1;
m->m_def = "";
m++;
m->m_title = "directory collection";
m->m_desc = "Collection to be used for directory searching and "
"display of directory topic pages.";
m->m_cgi = "dircn";
m->m_off = (char *)&g_conf.m_dirColl - g;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN+1;
m->m_def = "main";
m++;
m->m_title = "directory hostname";
m->m_desc = "Hostname of the server providing the directory. "
"Leave empty to use this host.";
m->m_cgi = "dirhn";
m->m_off = (char *)&g_conf.m_dirHost - g;
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_def = "";
m++;
m->m_title = "max incoming bandwidth for spider";
m->m_desc = "Total incoming bandwidth used by all spiders should "
"not exceed this many kilobits per second. ";
m->m_cgi = "mkbps";
m->m_off = (char *)&g_conf.m_maxIncomingKbps - g;
m->m_type = TYPE_FLOAT;
m->m_def = "999999.0";
m->m_units = "Kbps";
m++;
m->m_title = "max 1-minute sliding-window loadavg";
m->m_desc = "Spiders will shed load when their host exceeds this "
"value for the 1-minute load average in /proc/loadavg. "
"The value 0.0 disables this feature.";
m->m_cgi = "mswl";
m->m_off = (char *)&g_conf.m_maxLoadAvg - g;
m->m_type = TYPE_FLOAT;
m->m_def = "0.0";
m->m_units = "";
m->m_group = 0;
m++;
m->m_title = "max cpu threads";
m->m_desc = "Maximum number of threads to use per Gigablast process "
"for intersecting docid lists. Generally, set this to the "
"number of CPUs on the machine.";
m->m_cgi = "mct";
m->m_off = (char *)&g_conf.m_maxCpuThreads - g;
m->m_type = TYPE_LONG;
// make it 3 for new gb in case one query takes way longer
// than the others
m->m_def = "3"; // "2";
m->m_units = "threads";
m->m_min = 1;
m++;
m->m_title = "max pages per second";
m->m_desc = "Maximum number of pages to index or delete from index "
"per second for all hosts combined.";
m->m_cgi = "mpps";
m->m_off = (char *)&g_conf.m_maxPagesPerSecond - g;
m->m_type = TYPE_FLOAT;
m->m_def = "999999.0";
m->m_units = "pages/second";
m->m_group = 0;
m++;
/*
m->m_title = "distributed spider balance";
m->m_desc = "Max number of ready domains a host can have distributed "
"to it by all other host. This should be some multiple of the "
"total number of hosts in the cluster.";
m->m_cgi = "dsb";
m->m_off = (char *)&g_conf.m_distributedSpiderBalance - g;
m->m_type = TYPE_LONG;
m->m_def = "1024";
m->m_units = "domains";
m++;
m->m_title = "distributed same ip wait (hack)";
m->m_desc = "Amount of time to wait if this IP is already being "
"downloaded by a host. Works only in conjunction with "
"distribute spider downloads by ip in Spider Controls.";
m->m_cgi = "dsiw";
m->m_off = (char *)&g_conf.m_distributedIpWait - g;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_units = "ms";
m->m_group = 0;
m->m_min = 0;
m++;
*/
m->m_title = "dead host timeout";
m->m_desc = "Consider a host in the Gigablast network to be dead if "
"it does not respond to successive pings for this number of "
"seconds. Gigablast does not send requests to dead hosts. "
"Outstanding requests may be re-routed to a twin.";
m->m_cgi = "dht";
m->m_off = (char *)&g_conf.m_deadHostTimeout - g;
m->m_type = TYPE_LONG;
m->m_def = "4000";
m->m_units = "milliseconds";
m++;
m->m_title = "send email timeout";
m->m_desc = "Send an email after a host has not responded to "
"successive pings for this many milliseconds.";
m->m_cgi = "set";
m->m_off = (char *)&g_conf.m_sendEmailTimeout - g;
m->m_type = TYPE_LONG;
m->m_def = "62000";
m->m_priv = 2;
m->m_units = "milliseconds";
m->m_group = 0;
m++;
m->m_title = "ping spacer";
m->m_desc = "Wait this many milliseconds before pinging the next "
"host. Each host pings all other hosts in the network.";
m->m_cgi = "ps";
m->m_off = (char *)&g_conf.m_pingSpacer - g;
m->m_min = 50; // i've seen values of 0 hammer the cpu
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_units = "milliseconds";
m->m_group = 0;
m++;
m->m_title = "average query latency threshold";
m->m_desc = "Send email alerts when average query latency goes above "
"this threshold.";
m->m_cgi = "aqpst";
m->m_off = (char *)&g_conf.m_avgQueryTimeThreshold - g;
m->m_type = TYPE_FLOAT;
// a titlerec fetch times out after 2 seconds and is re-routed
m->m_def = "2.0";
m->m_priv = 2;
m->m_group = 0;
m->m_units = "seconds";
m++;
//m->m_title = "max query time";
//m->m_desc = "When computing the avgerage query latency "
// "truncate query latency times to this so that "
// "a single insanely long query latency time does "
// "not trigger the alarm. This is in seconds.";
//m->m_cgi = "mqlr";
//m->m_off = (char *)&g_conf.m_maxQueryTime - g;
//m->m_type = TYPE_FLOAT;
//m->m_def = "30.0";
//m->m_priv = 2;
//m->m_group = 0;
//m++;
m->m_title = "query success rate threshold";
m->m_desc = "Send email alerts when query success rate goes below "
"this threshold.";
m->m_cgi = "qsrt";
m->m_off = (char *)&g_conf.m_querySuccessThreshold - g;
m->m_type = TYPE_FLOAT;
m->m_def = "0.85";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "number of query times in average";
m->m_desc = "Record this number of query times before calculating "
"average query latency.";
m->m_cgi = "nqt";
m->m_off = (char *)&g_conf.m_numQueryTimes - g;
m->m_type = TYPE_LONG;
m->m_def = "300";
m->m_priv = 2;
m->m_group = 0;
m++;
m->m_title = "max corrupt index lists";
m->m_desc = "If we reach this many corrupt index lists, send "
"an admin email. Set to -1 to disable.";
m->m_cgi = "mcil";
m->m_off = (char *)&g_conf.m_maxCorruptLists - g;
m->m_type = TYPE_LONG;
m->m_def = "5";
m->m_priv = 2;
m->m_group = 0;
m++;
/*
m->m_title = "root quality max cache age base";
m->m_desc = "Maximum age to cache quality of a root url in seconds. "
"Computing "
"the quality of especially root urls can be expensive. "
"This number is multiplied by (Q-30)/10 where Q is the cached "
"quality of the root url. Therefore, higher quality and more "
"stable root urls are updated less often, which is a good thing "
"since they are more expensive to recompute.";
m->m_cgi = "rqmca";
m->m_off = (char *)&g_conf.m_siteQualityMaxCacheAge - g;
m->m_type = TYPE_LONG;
m->m_def = "7257600"; // 3 months (in seconds)
m->m_units = "seconds";
m++;
*/
m->m_title = "max write threads";
m->m_desc = "Maximum number of threads to use per Gigablast process "
"for writing data to the disk. "
"Keep low to reduce file interlace effects and impact "
"on query response time.";
m->m_cgi = "mwt";
m->m_off = (char *)&g_conf.m_maxWriteThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_units = "threads";
m++;
m->m_title = "max spider read threads";
m->m_desc = "Maximum number of threads to use per Gigablast process "
"for accessing the disk "
"for index-building purposes. Keep low to reduce impact "
"on query response time. Increase for RAID systems or when "
"initially building an index.";
m->m_cgi = "smdt";
m->m_off = (char *)&g_conf.m_spiderMaxDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "7";
m->m_units = "threads";
m++;
m->m_title = "max spider big read threads";
m->m_desc = "This particular number applies to all reads above 1MB.";
m->m_cgi = "smbdt";
m->m_off = (char *)&g_conf.m_spiderMaxBigDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "3"; // 1
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "max spider medium read threads";
m->m_desc = "This particular number applies to all reads above 100K.";
m->m_cgi = "smmdt";
m->m_off = (char *)&g_conf.m_spiderMaxMedDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "4"; // 3
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "max spider small read threads";
m->m_desc = "This particular number applies to all reads above 1MB.";
m->m_cgi = "smsdt";
m->m_off = (char *)&g_conf.m_spiderMaxSmaDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "5";
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "max query read threads";
m->m_desc = "Maximum number of threads to use per Gigablast process "
"for accessing the disk "
"for querying purposes. IDE systems tend to be more "
"responsive when this is low. Increase for SCSI or RAID "
"systems.";
m->m_cgi = "qmdt";
m->m_off = (char *)&g_conf.m_queryMaxDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "20";
m->m_units = "threads";
m++;
m->m_title = "max query big read threads";
m->m_desc = "This particular number applies to all reads above 1MB.";
m->m_cgi = "qmbdt";
m->m_off = (char *)&g_conf.m_queryMaxBigDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "20"; // 1
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "max query medium read threads";
m->m_desc = "This particular number applies to all reads above 100K.";
m->m_cgi = "qmmdt";
m->m_off = (char *)&g_conf.m_queryMaxMedDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "20"; // 3
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "max query small read threads";
m->m_desc = "This particular number applies to all reads above 1MB.";
m->m_cgi = "qmsdt";
m->m_off = (char *)&g_conf.m_queryMaxSmaDiskThreads - g;
m->m_type = TYPE_LONG;
m->m_def = "20";
m->m_units = "threads";
m->m_group = 0;
m++;
m->m_title = "min popularity for speller";
m->m_desc = "Word or phrase must be present in this percent "
"of documents in order to qualify as a spelling "
"recommendation.";
m->m_cgi = "mps";
m->m_off = (char *)&g_conf.m_minPopForSpeller - g;
m->m_type = TYPE_FLOAT;
m->m_def = ".01";
m->m_units = "%%";
m->m_priv = 2;
m++;
m->m_title = "phrase weight";
m->m_desc = "Percent to weight phrases in queries.";
m->m_cgi = "qp";
m->m_off = (char *)&g_conf.m_queryPhraseWeight - g;
m->m_type = TYPE_FLOAT;
// was 350, but 'new mexico tourism' and 'boots uk'
// emphasized the phrase terms too much!!
m->m_def = "100";
m->m_units = "%%";
m++;
m->m_title = "weights.cpp slider parm (tmp)";
m->m_desc = "Percent of how much to use words to phrase ratio weights.";
m->m_cgi = "wsp";
m->m_off = (char *)&g_conf.m_sliderParm - g;
m->m_type = TYPE_LONG;
m->m_def = "90";
m->m_units = "%%";
m++;
/*
m->m_title = "indextable intersection algo to use";
m->m_desc = "0 means adds the term scores, 1 means average them "
"and 2 means take the RMS.";
m->m_cgi = "iia";
m->m_off = (char *)&g_conf.m_indexTableIntersectionAlgo - g;
m->m_type = TYPE_LONG;
m->m_def = "2";
m->m_group = 0;
m++;
*/
/*
m->m_title = "max weight";
m->m_desc = "Maximum, relative query term weight. Set to 0 or less "
"to indicate now max. 10.0 or 20.0 might be a good value.";
m->m_cgi = "qm";
m->m_off = (char *)&g_conf.m_queryMaxMultiplier - g;
m->m_type = TYPE_FLOAT;
m->m_def = "0.0";
m->m_group = 0;
m++;
*/
/*
m->m_title = "query term exponent";
m->m_desc = "Raise the weights of the query "
"terms to this power. The weight of a query term is "
"basically the log of its term frequency. Increasing "
"this will increase the effects of the term frequency "
"related to each term in the query. Term frequency is "
"also known as the term popularity. Very common words "
"typically have lower weights tied to them, but the effects "
"of such weighting will be increased if you increase this "
"exponent.";
m->m_cgi = "qte";
m->m_off = (char *)&g_conf.m_queryExp - g;
m->m_type = TYPE_FLOAT;
m->m_def = "1.1";
m->m_group = 0;
m++;
*/
/*
m->m_title = "use dynamic phrase weighting";
m->m_desc = "A new algorithm which reduces the weight on a query "
"word term if the query phrase terms it is in are of "
"similar popularity (term frequency) to that of the word "
"term.";
m->m_cgi = "udpw";
m->m_off = (char *)&g_conf.m_useDynamicPhraseWeighting - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
m->m_title = "maximum serialized query size";
m->m_desc = "When passing queries around the network, send the raw "
"string instead of the serialized query if the required "
"buffer is bigger than this. Smaller values decrease network "
"traffic for large queries at the expense of processing time.";
m->m_cgi = "msqs";
m->m_off = (char *)&g_conf.m_maxSerializedQuerySize - g;
m->m_type = TYPE_LONG;
m->m_def = "8192";
m->m_units = "bytes";
m++;
m->m_title = "merge buf size";
m->m_desc = "Read and write this many bytes at a time when merging "
"files. Smaller values are kinder to query performance, "
" but the merge takes longer. Use at least 1000000 for "
"fast merging.";
m->m_cgi = "mbs";
m->m_off = (char *)&g_conf.m_mergeBufSize - g;
m->m_type = TYPE_LONG;
// keep this way smaller than that 800k we had in here, 100k seems
// to be way better performance for qps
m->m_def = "500000";
m->m_units = "bytes";
m++;
m->m_title = "catdb minRecSizes";
m->m_desc = "minRecSizes for Catdb lookups";
m->m_cgi = "catmsr";
m->m_off = (char *)&g_conf.m_catdbMinRecSizes - g;
m->m_type = TYPE_LONG;
m->m_def = "100000000"; // 100 million
m++;
m->m_title = "max http sockets";
m->m_desc = "Maximum sockets available to serve incoming HTTP "
"requests. Too many outstanding requests will increase "
"query latency. Excess requests will simply have their "
"sockets closed.";
m->m_cgi = "ms";
m->m_off = (char *)&g_conf.m_httpMaxSockets - g;
m->m_type = TYPE_LONG;
m->m_def = "100";
m++;
m->m_title = "max https sockets";
m->m_desc = "Maximum sockets available to serve incoming HTTPS "
"requests. Like max http sockets, but for secure sockets.";
m->m_cgi = "mss";
m->m_off = (char *)&g_conf.m_httpsMaxSockets - g;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m++;
/*
m->m_title = "max http download sockets";
m->m_desc = "Maximum sockets available to spiders for downloading "
"web pages.";
m->m_cgi = "mds";
m->m_off = (char *)&g_conf.m_httpMaxDownloadSockets - g;
m->m_type = TYPE_LONG;
m->m_def = "5000";
m->m_group = 0;
m++;
*/
m->m_title = "auto save frequency";
m->m_desc = "Copy data in memory to disk after this many minutes "
"have passed without the data having been dumped or saved "
"to disk. Use 0 to disable.";
m->m_cgi = "asf";
m->m_off = (char *)&g_conf.m_autoSaveFrequency - g;
m->m_type = TYPE_LONG;
m->m_def = "30";
m->m_units = "mins";
m++;
m->m_title = "doc count adjustment";
m->m_desc = "Add this number to the total document count in the "
"index. Just used for displaying on the homepage.";
m->m_cgi = "dca";
m->m_off = (char *)&g_conf.m_docCountAdjustment - g;
m->m_type = TYPE_LONG;
m->m_def = "0";
m++;
m->m_title = "dynamic performance graph";
m->m_desc = "Generates profiling data for callbacks on page "
"performance";
m->m_cgi = "dpg";
m->m_off = (char *)&g_conf.m_dynamicPerfGraph - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "enable profiling";
m->m_desc = "Enable profiler to do accounting of time taken by "
"functions. ";
m->m_cgi = "enp";
m->m_off = (char *)&g_conf.m_profilingEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "minimum profiling threshold";
m->m_desc = "Profiler will not show functions which take less "
"than this many milliseconds "
"in the log or on the perfomance graph.";
m->m_cgi = "mpt";
m->m_off = (char *)&g_conf.m_minProfThreshold - g;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m++;
m->m_title = "sequential profiling.";
m->m_desc = "Produce a LOG_TIMING log message for each "
"callback called, along with the time it took. "
"Profiler must be enabled.";
m->m_cgi = "ensp";
m->m_off = (char *)&g_conf.m_sequentialProfiling - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "use statsdb";
m->m_desc = "Archive system statistics information in Statsdb.";
m->m_cgi = "usdb";
m->m_off = (char *)&g_conf.m_useStatsdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
/*
m->m_title = "statsdb snapshots.";
m->m_desc = "Archive system statistics information in Statsdb. "
"Takes one snapshot every minute.";
m->m_cgi = "sdbss";
m->m_off = (char *)&g_conf.m_statsdbSnapshots - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "statsdb web interface.";
m->m_desc = "Enable the Statsdb page for viewing stats history.";
m->m_cgi = "sdbwi";
m->m_off = (char *)&g_conf.m_statsdbPageEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
/*
m->m_title = "max synonyms";
m->m_desc = "Maximum possible synonyms to expand a word to.";
m->m_cgi = "msyn";
m->m_off = (char *)&g_conf.m_maxSynonyms - g;
m->m_def = "5";
m->m_type = TYPE_LONG;
m++;
m->m_title = "default affinity";
m->m_desc = "spelling/number synonyms get this number as their "
"affinity; negative values mean treat them as unknown, "
"values higher than 1.0 get treated as 1.0";
m->m_cgi = "daff";
m->m_off = (char *)&g_conf.m_defaultAffinity - g;
m->m_def = "0.9";
m->m_type = TYPE_FLOAT;
m++;
m->m_title = "frequency threshold";
m->m_desc = "the minimum amount a synonym term has to be in relation "
"to its master term in order to be considered as a synonym";
m->m_cgi = "fqth";
m->m_off = (char *)&g_conf.m_frequencyThreshold - g;
m->m_def = "0.25";
m->m_type = TYPE_FLOAT;
m++;
m->m_title = "maximum affinity requests";
m->m_desc = "Maximum number of outstanding requests the affinity "
"builder can generate. Keep this number at 10 or lower for "
"local servers, higher for internet servers or servers with "
"high latency.";
m->m_cgi = "mar";
m->m_off = (char *)&g_conf.m_maxAffinityRequests - g;
m->m_def = "10";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "maximum affinity errors";
m->m_desc = "Maximum number of times the affinity builder should "
"encounter an error before giving up entirely.";
m->m_cgi = "mae";
m->m_off = (char *)&g_conf.m_maxAffinityErrors - g;
m->m_def = "100";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "affinity timeout";
m->m_desc = "Amount of time in milliseconds to wait for a response to "
"an affinity query. You shouldn't have to touch this unless "
"the network is slow or overloaded.";
m->m_cgi = "ato";
m->m_off = (char *)&g_conf.m_affinityTimeout - g;
m->m_def = "30000";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "affinity rebuild server";
m->m_desc = "Use this server:port to rebuild the affinity.";
m->m_cgi = "ars";
m->m_off = (char *)&g_conf.m_affinityServer - g;
m->m_def = "localhost:8000";
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_group = 0;
m++;
m->m_title = "additional affinity parameters";
m->m_desc = "Additional parameters to pass in the query. Tweak these "
"to get better/faster responses. Don't touch the raw parameter "
"unless you know what you are doing.";
m->m_cgi = "aap";
m->m_off = (char *)&g_conf.m_affinityParms - g;
m->m_def = "&raw=5&dio=1&n=1000&code=gbmonitor";
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_group = 0;
m++;
*/
m->m_title = "search results cache max age";
m->m_desc = "How many seconds should we cache a search results "
"page for?";
m->m_cgi = "srcma";
m->m_off = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
m->m_def = "10800"; // 3 hrs
m->m_type = TYPE_LONG;
m->m_units = "seconds";
m++;
///////////////////////////////////////////
// AUTOBAN CONTROLS
//
///////////////////////////////////////////
m->m_title = "ban IPs";
m->m_desc = "add Ips here to bar them from accessing this "
"gigablast server.";
m->m_cgi = "banIps";
m->m_xml = "banIps";
m->m_off = (char *)g_conf.m_banIps - g;
m->m_type = TYPE_STRINGBOX;
m->m_page = PAGE_AUTOBAN;
m->m_size = AUTOBAN_TEXT_SIZE;
m->m_group = 1;
m->m_def = "";
m->m_sparm = 0;
m->m_plen = (char *)&g_conf.m_banIpsLen - g; // length of string
m++;
m->m_title = "allow IPs";
m->m_desc = "add Ips here to give them an infinite query quota.";
m->m_cgi = "allowIps";
m->m_xml = "allowIps";
m->m_off = (char *)g_conf.m_allowIps - g;
m->m_type = TYPE_STRINGBOX;
m->m_page = PAGE_AUTOBAN;
m->m_size = AUTOBAN_TEXT_SIZE;
m->m_group = 1;
m->m_sparm = 0;
m->m_def = "";
m->m_plen = (char *)&g_conf.m_allowIpsLen - g; // length of string
m++;
m->m_title = "valid search codes";
m->m_desc = "Don't try to autoban queries that have one "
"of these codes. Also, the code must be valid for us "
"to use &uip=IPADDRESS as the IP address of the submitter "
"for purposes of autoban AND purposes of addurl daily quotas.";
m->m_cgi = "validCodes";
m->m_xml = "validCodes";
m->m_off = (char *)g_conf.m_validCodes - g;
m->m_type = TYPE_STRINGBOX;
m->m_page = PAGE_AUTOBAN;
m->m_size = AUTOBAN_TEXT_SIZE;
m->m_group = 1;
m->m_def = "";
m->m_sparm = 0;
m->m_plen = (char *)&g_conf.m_validCodesLen - g; // length of string
m++;
m->m_title = "Extra Parms";
m->m_desc = "Append extra default parms to queries that match "
"certain substrings. Format: text to match in url, "
"followed by a space, then the list of extra parms as "
"they would appear appended to the url. "
"One match per line.";
m->m_cgi = "extraParms";
m->m_xml = "extraParms";
m->m_off = (char *)g_conf.m_extraParms - g;
m->m_type = TYPE_STRINGBOX;
m->m_page = PAGE_AUTOBAN;
m->m_size = AUTOBAN_TEXT_SIZE;
m->m_group = 1;
m->m_def = "";
m->m_sparm = 0;
m->m_plen = (char *)&g_conf.m_extraParmsLen - g; // length of string
m++;
m->m_title = "ban substrings";
m->m_desc = "ban any query that matches this list of "
"substrings. Must match all comma-separated strings "
"on the same line. ('\\n' = OR, ',' = AND)";
m->m_cgi = "banRegex";
m->m_xml = "banRegex";
m->m_off = (char *)g_conf.m_banRegex - g;
m->m_type = TYPE_STRINGBOX;
m->m_page = PAGE_AUTOBAN;
m->m_size = AUTOBAN_TEXT_SIZE;
m->m_group = 1;
m->m_sparm = 0;
m->m_def = "";
m->m_plen = (char *)&g_conf.m_banRegexLen - g; // length of string
m++;
///////////////////////////////////////////
// SECURITY CONTROLS
///////////////////////////////////////////
m->m_title = "Super Turks";
m->m_desc = "Add facebook user IDs here so those people can "
"turk the results. Later we may limit each person to "
"turking a geographic region.";
m->m_cgi = "supterturks";
m->m_xml = "supterturks";
m->m_def = "";
m->m_off = (char *)&g_conf.m_superTurks - g;
m->m_type = TYPE_STRINGBOX;
m->m_perms = PAGE_MASTER;
m->m_size = USERS_TEXT_SIZE;
m->m_plen = (char *)&g_conf.m_superTurksLen - g;
m->m_page = PAGE_SECURITY;
m++;
/*
m->m_title = "Users";
m->m_desc = "Add users here. The format is "
"collection:ip:username:password:relogin:pages:tagnames"
" Username and password cannot be blank."
" You can specify "
"* for collection to indicate all collections. "
" * can be used in IP as wildcard. "
" * in pages means user has access to all pages. Also"
" you can specify individual pages. A \'-\' sign at the"
" start of page means user is not allowed to access that"
" page. Please refer the page reference table at the bottom "
"of this page for available pages. If you want to just login "
" once and avoid relogin for gb shutdowns then set relogin=1,"
" else set it to 0. If relogin is 1 your login will never expire either."
" "
" Ex: 1. master user -> *:*:master:master:1:*:english "
" 2. public user -> *:*:public:1234:0:index.html"
",get,search,login,dir:english "
"3. turk user -> 66.28.58.122:main:turk:1234:0:pageturkhome,"
"pageturk,pageturkget,get,login:english";
m->m_cgi = "users";
m->m_xml = "users";
m->m_off = (char *)&g_conf.m_users - g;
m->m_type = TYPE_STRINGBOX;
m->m_perms = PAGE_MASTER;
m->m_size = USERS_TEXT_SIZE;
m->m_plen = (char *)&g_conf.m_usersLen - g;
m->m_page = PAGE_SECURITY;
m++;
*/
/*m->m_title = "Master Passwords";
m->m_desc = "Passwords allowed to change Gigablast's general "
"parameters and also the parameters for any collection. "
"If no Master Password or Master IP is specified then "
"Gigablast will assign a default password of footbar23.";
m->m_cgi = "mpwd";
m->m_xml = "masterPassword";
m->m_max = MAX_MASTER_PASSWORDS;
m->m_off = (char *)&g_conf.m_masterPwds - g;
m->m_type = TYPE_STRINGNONEMPTY;
m->m_size = PASSWORD_MAX_LEN+1;
m->m_page = PAGE_SECURITY;
m++;
m->m_title = "Master IPs";
m->m_desc = "If someone connects from one of these IPs "
"then they will have full "
"master administrator priviledges. "
"If no IPs are specified, then master administrators can "
"get access for any IP. "
"Connecting from 127.0.0.1 always grants master privledges. "
"If no Master Password or Master IP is specified then "
"Gigablast will assign a default password of footbar23.";
m->m_cgi = "masterip";
m->m_xml = "masterIp";
m->m_max = MAX_MASTER_IPS;
m->m_off = (char *)g_conf.m_masterIps - g;
m->m_type = TYPE_IP;
m++;
*/
m->m_title = "Connect IPs";
m->m_desc = "Allow UDP requests from this list of IPs. Any datagram "
"received not coming from one of these IPs, or an IP in "
"hosts.conf, is dropped. If another cluster is accessing this "
"cluster for getting link text or whatever, you will need to "
"list the IPs of the accessing machines here. These IPs are "
"also used to allow access to the HTTP server even if it "
"was disabled in the Master Controls. IPs that have 0 has "
"their Least Significant Byte are treated as wildcards for "
"IP blocks. That is, 1.2.3.0 means 1.2.3.*.";
m->m_cgi = "connectip";
m->m_xml = "connectIp";
m->m_max = MAX_CONNECT_IPS;
m->m_off = (char *)g_conf.m_connectIps - g;
m->m_type = TYPE_IP;
m->m_priv = 2;
m->m_def = "";
m++;
///////////////////////////////////////////
// LOG CONTROLS
///////////////////////////////////////////
m->m_title = "log http requests";
m->m_desc = "Log GET and POST requests received from the "
"http server?";
m->m_cgi = "hr";
m->m_off = (char *)&g_conf.m_logHttpRequests - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_page = PAGE_LOG;
m++;
m->m_title = "log autobanned queries";
m->m_desc = "Should we log queries that are autobanned? "
"They can really fill up the log.";
m->m_cgi = "laq";
m->m_off = (char *)&g_conf.m_logAutobannedQueries - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "log query time threshold";
m->m_desc = "If query took this many millliseconds or longer, then log the "
"query and the time it took to process.";
m->m_cgi = "lqtt";
m->m_off = (char *)&g_conf.m_logQueryTimeThreshold- g;
m->m_type = TYPE_LONG;
m->m_def = "5000";
m++;
m->m_title = "log query reply";
m->m_desc = "Log query reply in proxy, but only for those queries "
"above the time threshold above.";
m->m_cgi = "lqr";
m->m_off = (char *)&g_conf.m_logQueryReply - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "log spidered urls";
m->m_desc = "Log status of spidered or injected urls?";
m->m_cgi = "lsu";
m->m_off = (char *)&g_conf.m_logSpideredUrls - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "log network congestion";
m->m_desc = "Log messages if Gigablast runs out of udp sockets?";
m->m_cgi = "lnc";
m->m_off = (char *)&g_conf.m_logNetCongestion - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log informational messages";
m->m_desc = "Log messages not related to an error condition, "
"but meant more to give an idea of the state of "
"the gigablast process. These can be useful when "
"diagnosing problems.";
m->m_cgi = "li";
m->m_off = (char *)&g_conf.m_logInfo - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "log limit breeches";
m->m_desc = "Log it when document not added due to quota "
"breech. Log it when url is too long and it gets "
"truncated.";
m->m_cgi = "ll";
m->m_off = (char *)&g_conf.m_logLimits - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug admin messages";
m->m_desc = "Log various debug messages.";
m->m_cgi = "lda";
m->m_off = (char *)&g_conf.m_logDebugAdmin - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug build messages";
m->m_cgi = "ldb";
m->m_off = (char *)&g_conf.m_logDebugBuild - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug build time messages";
m->m_cgi = "ldbt";
m->m_off = (char *)&g_conf.m_logDebugBuildTime - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug database messages";
m->m_cgi = "ldd";
m->m_off = (char *)&g_conf.m_logDebugDb - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug disk messages";
m->m_cgi = "lddi";
m->m_off = (char *)&g_conf.m_logDebugDisk - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug dns messages";
m->m_cgi = "lddns";
m->m_off = (char *)&g_conf.m_logDebugDns - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug http messages";
m->m_cgi = "ldh";
m->m_off = (char *)&g_conf.m_logDebugHttp - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug loop messages";
m->m_cgi = "ldl";
m->m_off = (char *)&g_conf.m_logDebugLoop - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug language detection messages";
m->m_cgi = "ldg";
m->m_off = (char *)&g_conf.m_logDebugLang - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug link info";
m->m_cgi = "ldli";
m->m_off = (char *)&g_conf.m_logDebugLinkInfo - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug mem messages";
m->m_cgi = "ldm";
m->m_off = (char *)&g_conf.m_logDebugMem - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug mem usage messages";
m->m_cgi = "ldmu";
m->m_off = (char *)&g_conf.m_logDebugMemUsage - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug net messages";
m->m_cgi = "ldn";
m->m_off = (char *)&g_conf.m_logDebugNet - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug post query rerank messages";
m->m_cgi = "ldpqr";
m->m_off = (char *)&g_conf.m_logDebugPQR - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug query messages";
m->m_cgi = "ldq";
m->m_off = (char *)&g_conf.m_logDebugQuery - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug quota messages";
m->m_cgi = "ldqta";
m->m_off = (char *)&g_conf.m_logDebugQuota - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug robots messages";
m->m_cgi = "ldr";
m->m_off = (char *)&g_conf.m_logDebugRobots - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug spider cache messages";
m->m_cgi = "lds";
m->m_off = (char *)&g_conf.m_logDebugSpcache - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug speller messages";
m->m_cgi = "ldsp";
m->m_off = (char *)&g_conf.m_logDebugSpeller - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug sections messages";
m->m_cgi = "ldscc";
m->m_off = (char *)&g_conf.m_logDebugSections - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug seo insert messages";
m->m_cgi = "ldsi";
m->m_off = (char *)&g_conf.m_logDebugSEOInserts - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug seo messages";
m->m_cgi = "ldseo";
m->m_off = (char *)&g_conf.m_logDebugSEO - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_priv = 1;
m++;
m->m_title = "log debug stats messages";
m->m_cgi = "ldst";
m->m_off = (char *)&g_conf.m_logDebugStats - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug summary messages";
m->m_cgi = "ldsu";
m->m_off = (char *)&g_conf.m_logDebugSummary - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug spider messages";
m->m_cgi = "ldspid";
m->m_off = (char *)&g_conf.m_logDebugSpider - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug url attempts";
m->m_cgi = "ldspua";
m->m_off = (char *)&g_conf.m_logDebugUrlAttempts - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug spider downloads";
m->m_cgi = "ldsd";
m->m_off = (char *)&g_conf.m_logDebugDownloads - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug facebook";
m->m_cgi = "ldfb";
m->m_off = (char *)&g_conf.m_logDebugFacebook - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug tagdb messages";
m->m_cgi = "ldtm";
m->m_off = (char *)&g_conf.m_logDebugTagdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug tcp messages";
m->m_cgi = "ldt";
m->m_off = (char *)&g_conf.m_logDebugTcp - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug thread messages";
m->m_cgi = "ldth";
m->m_off = (char *)&g_conf.m_logDebugThread - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug title messages";
m->m_cgi = "ldti";
m->m_off = (char *)&g_conf.m_logDebugTitle - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug timedb messages";
m->m_cgi = "ldtim";
m->m_off = (char *)&g_conf.m_logDebugTimedb - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug topic messages";
m->m_cgi = "ldto";
m->m_off = (char *)&g_conf.m_logDebugTopics - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug topDoc messages";
m->m_cgi = "ldtopd";
m->m_off = (char *)&g_conf.m_logDebugTopDocs - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug udp messages";
m->m_cgi = "ldu";
m->m_off = (char *)&g_conf.m_logDebugUdp - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug unicode messages";
m->m_cgi = "ldun";
m->m_off = (char *)&g_conf.m_logDebugUnicode - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug repair messages";
m->m_cgi = "ldre";
m->m_off = (char *)&g_conf.m_logDebugRepair - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log debug pub date extraction messages";
m->m_cgi = "ldpd";
m->m_off = (char *)&g_conf.m_logDebugDate - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for build";
m->m_desc = "Log various timing related messages.";
m->m_cgi = "ltb";
m->m_off = (char *)&g_conf.m_logTimingBuild - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for admin";
m->m_desc = "Log various timing related messages.";
m->m_cgi = "ltadm";
m->m_off = (char *)&g_conf.m_logTimingAdmin - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for database";
m->m_cgi = "ltd";
m->m_off = (char *)&g_conf.m_logTimingDb - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for network layer";
m->m_cgi = "ltn";
m->m_off = (char *)&g_conf.m_logTimingNet - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for query";
m->m_cgi = "ltq";
m->m_off = (char *)&g_conf.m_logTimingQuery - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for spcache";
m->m_desc = "Log various timing related messages.";
m->m_cgi = "ltspc";
m->m_off = (char *)&g_conf.m_logTimingSpcache - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log timing messages for related topics";
m->m_cgi = "ltt";
m->m_off = (char *)&g_conf.m_logTimingTopics - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
m->m_title = "log reminder messages";
m->m_desc = "Log reminders to the programmer. You do not need this.";
m->m_cgi = "lr";
m->m_off = (char *)&g_conf.m_logReminders - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m++;
///////////////////////////////////////////
// SYNC CONTROLS
///////////////////////////////////////////
/*
m->m_title = "sync enabled";
m->m_desc = "Turn data synchronization on or off. When a host comes "
"up he will perform an incremental synchronization with a "
"twin if he detects that he was unable to save his data "
"when he last exited.";
m->m_cgi = "sye";
m->m_off = (char *)&g_conf.m_syncEnabled - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_page = PAGE_SYNC;
m++;
m->m_title = "dry run";
m->m_desc = "Should Gigablast just run through and log the changes "
"it would make without actually making them?";
m->m_cgi = "sdr";
m->m_off = (char *)&g_conf.m_syncDryRun - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "sync indexdb";
m->m_desc = "Turn data synchronization on or off for indexdb. "
"Indexdb holds the index information.";
m->m_cgi = "si";
m->m_off = (char *)&g_conf.m_syncIndexdb - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "sync logging";
m->m_desc = "Log fixes?";
m->m_cgi = "slf";
m->m_off = (char *)&g_conf.m_syncLogging - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "union titledb and spiderdb";
m->m_desc = "If a host being sync'd has a title record (cached web "
"page) that the "
"remote host does not, normally, it would be deleted. "
"But if this is true then it is kept. "
"Useful for reducing title rec not found errors.";
m->m_cgi = "sdu";
m->m_off = (char *)&g_conf.m_syncDoUnion - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "force out of sync";
m->m_desc = "Forces this host to be out of sync.";
m->m_cgi = "foos";
m->m_type = TYPE_CMD;
m->m_func = CommandForceOutOfSync;
m->m_cast = 0;
m++;
m->m_title = "bytes per second";
m->m_desc = "How many bytes to read per second for syncing. "
"Decrease to reduce impact of syncing on query "
"response time.";
m->m_cgi = "sbps";
m->m_off = (char *)&g_conf.m_syncBytesPerSecond - g;
m->m_type = TYPE_LONG;
m->m_def = "10000000";
m->m_units = "bytes";
m++;
*/
/////////////////////
//
// DIFFBOT CRAWLBOT PARMS
//
//////////////////////
///////////
//
// DO NOT INSERT parms above here, unless you set
// m_obj = OBJ_COLL !!! otherwise it thinks it belongs to
// OBJ_CONF as used in the above parms.
//
///////////
m->m_cgi = "dbtoken";
m->m_xml = "diffbotToken";
m->m_off = (char *)&cr.m_diffbotToken - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_obj = OBJ_COLL;
m->m_def = "";
m++;
m->m_cgi = "dbseed";
m->m_xml = "diffbotSeed";
m->m_off = (char *)&cr.m_diffbotSeed - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_obj = OBJ_COLL;
m->m_def = "";
m++;
m->m_cgi = "dbapi";
m->m_xml = "diffbotApi";
m->m_off = (char *)&cr.m_diffbotApi - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbapiqs";
m->m_xml = "diffbotApiQueryString";
m->m_off = (char *)&cr.m_diffbotApiQueryString - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbucp";
m->m_xml = "diffbotUrlCrawlPattern";
m->m_off = (char *)&cr.m_diffbotUrlCrawlPattern - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbupp";
m->m_xml = "diffbotUrlProcessPattern";
m->m_off = (char *)&cr.m_diffbotUrlProcessPattern - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbppp";
m->m_xml = "diffbotPageProcessPattern";
m->m_off = (char *)&cr.m_diffbotPageProcessPattern - x;
m->m_type = TYPE_SAFEBUF;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbclassify";
m->m_xml = "diffbotClassify";
m->m_off = (char *)&cr.m_diffbotClassify - x;
m->m_type = TYPE_CHAR;
m->m_page = PAGE_NONE;
m->m_def = "0";
m++;
m->m_xml = "useDiffbot";
m->m_off = (char *)&cr.m_useDiffbot - x;
m->m_type = TYPE_CHAR;
m->m_page = PAGE_NONE;
m->m_def = "0";
m++;
m->m_xml = "isCustomCrawl";
m->m_off = (char *)&cr.m_isCustomCrawl - x;
m->m_type = TYPE_CHAR;
m->m_page = PAGE_NONE;
m->m_def = "0";
m++;
m->m_cgi = "dbmaxtocrawl";
m->m_xml = "diffbotMaxToCrawl";
m->m_off = (char *)&cr.m_diffbotMaxToCrawl - x;
m->m_type = TYPE_LONG_LONG;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
m->m_cgi = "dbmaxtoprocess";
m->m_xml = "diffbotMaxToProcess";
m->m_off = (char *)&cr.m_diffbotMaxToProcess - x;
m->m_type = TYPE_LONG_LONG;
m->m_page = PAGE_NONE;
m->m_def = "";
m++;
/*
m->m_cgi = "dbcrawlstarttime";
m->m_xml = "diffbotCrawlStartTime";
m->m_off = (char *)&cr.m_diffbotCrawlStartTime - x;
m->m_type = TYPE_LONG_LONG;
m->m_page = PAGE_NONE;
m++;
m->m_cgi = "dbcrawlendtime";
m->m_xml = "diffbotCrawlEndTime";
m->m_off = (char *)&cr.m_diffbotCrawlEndTime - x;
m->m_type = TYPE_LONG_LONG;
m->m_page = PAGE_NONE;
m++;
m->m_cgi = "isdbtestcrawl";
m->m_xml = "isDiffbotTestCrawl";
m->m_off = (char *)&cr.m_isDiffbotTestCrawl - x;
m->m_type = TYPE_BOOL;
m->m_page = PAGE_NONE;
m++;
*/
///////////////////////////////////////////
// SPIDER CONTROLS
///////////////////////////////////////////
// just a comment in the conf file
m->m_desc =
"All <, >, \" and # characters that are values for a field "
"contained herein must be represented as "
"<, >, " and # respectively.";
m->m_type = TYPE_COMMENT;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;
m->m_title = "spidering enabled";
m->m_desc = "When enabled the spider adds pages to your index. ";
m->m_cgi = "cse";
m->m_off = (char *)&cr.m_spideringEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
/*
m->m_title = "new spidering enabled";
m->m_desc = "When enabled the spider adds NEW "
"pages to your index. ";
m->m_cgi = "nse";
m->m_off = (char *)&cr.m_newSpideringEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "old spidering enabled";
m->m_desc = "When enabled the spider will re-visit "
"and update pages that are already in your index.";
m->m_cgi = "ose";
m->m_off = (char *)&cr.m_oldSpideringEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "new spider weight";
m->m_desc = "Weight time slices of new spiders in the priority "
"page by this factor relative to the old spider queues.";
m->m_cgi = "nsw";
m->m_off = (char *)&cr.m_newSpiderWeight - x;
m->m_type = TYPE_FLOAT;
m->m_def = "1.0";
m->m_group = 0;
m++;
*/
m->m_title = "spider delay in milliseconds";
m->m_desc = "make each spider wait this many milliseconds before "
"getting the ip and downloading the page.";
m->m_cgi = "sdms";
m->m_off = (char *)&cr.m_spiderDelayInMilliseconds - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m++;
m->m_title = "max spiders";
m->m_desc = "What is the maximum number of web "
"pages the spider is allowed to download "
"simultaneously?";
m->m_cgi = "mns";
m->m_off = (char *)&cr.m_maxNumSpiders - x;
m->m_type = TYPE_LONG;
m->m_def = "200";
m->m_group = 0;
m++;
m->m_title = "add url enabled";
m->m_desc = "If this is enabled others can add "
"web pages to your index via the add url page.";
m->m_cgi = "aue";
m->m_off = (char *)&cr.m_addUrlEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "daily merge time";
m->m_desc = "Do a tight merge on indexdb and datedb at this time "
"every day. This is expressed in MINUTES past midnight UTC. "
"UTC is 5 hours ahead "
"of EST and 7 hours ahead of MST. Leave this as -1 to "
"NOT perform a daily merge. To merge at midnight EST use "
"60*5=300 and midnight MST use 60*7=420.";
m->m_cgi = "dmt";
m->m_off = (char *)&cr.m_dailyMergeTrigger - x;
m->m_type = TYPE_LONG;
m->m_def = "-1";
m->m_units = "minutes";
m++;
m->m_title = "daily merge days";
m->m_desc = "Comma separated list of days to merge on. Use "
"0 for Sunday, 1 for Monday, ... 6 for Saturday. Leaving "
"this parmaeter empty or without any numbers will make the "
"daily merge happen every day";
m->m_cgi = "dmdl";
m->m_off = (char *)&cr.m_dailyMergeDOWList - x;
m->m_type = TYPE_STRING;
m->m_size = 48;
// make sunday the default
m->m_def = "0";
m++;
m->m_title = "daily merge last started";
m->m_desc = "When the daily merge was last kicked off. Expressed in "
"UTC in seconds since the epoch.";
m->m_cgi = "dmls";
m->m_off = (char *)&cr.m_dailyMergeStarted - x;
m->m_type = TYPE_LONG_CONST;
m->m_def = "-1";
m->m_group = 0;
m++;
m->m_title = "use datedb";
m->m_desc = "Index documents for generating results sorted by date "
"or constrained by date range. Only documents indexed while "
"this is enabled will be returned for date-related searches.";
m->m_cgi = "ud";
m->m_off = (char *)&cr.m_useDatedb - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "age cutoff for datedb";
m->m_desc = "Do not index pubdates into datedb that are more "
"than this many days old. Use -1 for no limit. A value "
"of zero essentially turns off datedb. Pre-existing pubdates "
"in datedb that fail to meet this constraint WILL BE "
"COMPLETELY ERASED when datedb is merged.";
m->m_cgi = "dbc";
m->m_off = (char *)&cr.m_datedbCutoff - x;
m->m_type = TYPE_LONG;
m->m_def = "-1";
m->m_units = "days";
m++;
m->m_title = "datedb default timezone";
m->m_desc = "Default timezone to use when none specified on parsed "
"time. Use offset from GMT, i.e 0400 (AMT) or -0700 (MST)";
m->m_cgi = "ddbdt";
m->m_off = (char *)&cr.m_datedbDefaultTimezone - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m++;
//m->m_title = "days before now to index";
//m->m_desc = "Only index page if the datedb date was found to be "
// "within this many days of the current time. Use 0 to index "
// "all dates. Parm is float for fine control.";
//m->m_cgi = "ddbdbn";
//m->m_off = (char *)&cr.m_datedbDaysBeforeNow - x;
//m->m_type = TYPE_FLOAT;
//m->m_def = "0";
//m->m_group = 0;
//m++;
m->m_title = "turing test enabled";
m->m_desc = "If this is true, users will have to "
"pass a simple Turing test to add a url. This prevents "
"automated url submission.";
m->m_cgi = "dtt";
m->m_off = (char *)&cr.m_doTuringTest - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "spider links";
m->m_desc = "If this is false, the spider will not "
"harvest links from web pages it visits. Links that it does "
"harvest will be attempted to be indexed at a later time. ";
m->m_cgi = "sl";
m->m_off = (char *)&cr.m_spiderLinks - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
/*
MDW: use the "onsite" directive in the url filters page now...
m->m_title = "only spider links from same host";
m->m_desc = "If this is true the spider will only harvest links "
"to pages that are contained on the same host as the page "
"that is being spidered. "
"Example: When spidering a page from "
"www.gigablast.com, only links to pages that are from "
"www.gigablast.com would "
"be harvested, if this switch were enabled. This allows you "
"to seed the spider with URLs from a specific set of hosts "
"and ensure that only links to pages that are from those "
"hosts are harvested.";
m->m_cgi = "slsh";
m->m_off = (char *)&cr.m_sameHostLinks - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
m->m_title = "do not re-add old outlinks more than this many days";
m->m_desc = "If less than this many days have elapsed since the "
"last time we added the outlinks to spiderdb, do not re-add "
"them to spiderdb. Saves resources.";
m->m_cgi = "slrf";
m->m_off = (char *)&cr.m_outlinksRecycleFrequencyDays - x;
m->m_type = TYPE_FLOAT;
m->m_def = "30";
m->m_group = 0;
m++;
/*
m->m_title = "spider links by priority";
m->m_desc = "Specify priorities for which links should be spidered. "
"If the spider links option above is "
"disabled then these setting will have no effect.";
m->m_cgi = "slp";
m->m_xml = "spiderLinksByPriority";
m->m_off = (char *)&cr.m_spiderLinksByPriority - x;
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
m->m_fixed = MAX_SPIDER_PRIORITIES;
m->m_def = "1"; // default for each one is on
m->m_group = 0;
m++;
*/
/*
m->m_title = "min link priority";
m->m_desc = "Only add links to the spider "
"queue if their spider priority is this or higher. "
"This can make the spider process more efficient "
"since a lot of disk seeks are used when adding "
"links.";
m->m_cgi = "mlp";
m->m_off = (char *)&cr.m_minLinkPriority - x;
m->m_type = TYPE_PRIORITY;
m->m_def = "0";
m->m_group = 0;
m++;
*/
/* m->m_title = "maximum hops from parent page";
m->m_desc = "Only index pages that are within a particular number "
"of hops from the parent page given in Page Add Url. -1 means "
"that max hops is infinite.";
m->m_cgi = "mnh";
m->m_off = (char *)&cr.m_maxNumHops - x;
m->m_type = TYPE_CHAR2;
m->m_def = "-1";
m->m_group = 0;
m++;*/
m->m_title = "scraping enabled procog";
m->m_desc = "Do searches for queries in this hosts part of the "
"query log.";
m->m_cgi = "scrapepc";
m->m_off = (char *)&cr.m_scrapingEnabledProCog - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "scraping enabled web";
m->m_desc = "Perform random searches on googles news search engine "
"to add sites with ingoogle tags into tagdb.";
m->m_cgi = "scrapeweb";
m->m_off = (char *)&cr.m_scrapingEnabledWeb - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "scraping enabled news";
m->m_desc = "Perform random searches on googles news search engine "
"to add sites with news and goognews and ingoogle "
"tags into tagdb.";
m->m_cgi = "scrapenews";
m->m_off = (char *)&cr.m_scrapingEnabledNews - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "scraping enabled blogs";
m->m_desc = "Perform random searches on googles news search engine "
"to add sites with blogs and googblogs and ingoogle "
"tags into tagdb.";
m->m_cgi = "scrapeblogs";
m->m_off = (char *)&cr.m_scrapingEnabledBlogs - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "subsite detection enabled";
m->m_desc = "Add the \"sitepathdepth\" to tagdb if a hostname "
"is determined to have subsites at a particular depth.";
m->m_cgi = "ssd";
m->m_off = (char *)&cr.m_subsiteDetectionEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "deduping enabled";
m->m_desc = "When enabled, the spider will "
"discard web pages which are identical to other web pages "
"that are already in the index AND that are from the same "
"hostname. An example of a hostname is www1.ibm.com. "
"However, root urls, urls that have no path, are never "
"discarded. ";
m->m_cgi = "de";
m->m_off = (char *)&cr.m_dedupingEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "deduping enabled for www";
m->m_desc = "When enabled, the spider will "
"discard web pages which, when a www is prepended to the "
"page's url, result in a url already in the index.";
m->m_cgi = "dew";
m->m_off = (char *)&cr.m_dupCheckWWW - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "detect custom error pages";
m->m_desc = "Detect and do not index pages which have a 200 status"
" code, but are likely to be error pages.";
m->m_cgi = "dcep";
m->m_off = (char *)&cr.m_detectCustomErrorPages - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "delete 404s";
m->m_desc = "Should pages be removed from the index if they are no "
"longer accessible on the web?";
m->m_cgi = "dnf";
m->m_off = (char *)&cr.m_delete404s - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "delete timed out docs";
m->m_desc = "Should documents be deleted from the index "
"if they have been retried them enough times and the "
"last received error is a time out? "
"If your internet connection is flaky you may say "
"no here to ensure you do not lose important docs.";
m->m_cgi = "dt";
m->m_off = (char *)&cr.m_deleteTimeouts - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "use simplified redirects";
m->m_desc = "If this is true, the spider, when a url redirects "
"to a \"simpler\" url, will add that simpler url into "
"the spider queue and abandon the spidering of the current "
"url.";
m->m_cgi = "usr";
m->m_off = (char *)&cr.m_useSimplifiedRedirects - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "use ifModifiedSince";
m->m_desc = "If this is true, the spider, when "
"updating a web page that is already in the index, will "
"not even download the whole page if it hasn't been "
"updated since the last time Gigablast spidered it. "
"This is primarily a bandwidth saving feature. It relies on "
"the remote webserver's returned Last-Modified-Since field "
"being accurate.";
m->m_cgi = "uims";
m->m_off = (char *)&cr.m_useIfModifiedSince - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "build similarity vector from content only";
m->m_desc = "If this is true, the spider, when checking the page "
"if it has changed enough to reindex or update the "
"published date, it will build the vector only from "
"the content located on that page.";
m->m_cgi = "bvfc";
m->m_off = (char *)&cr.m_buildVecFromCont - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "use content similarity to index publish date";
m->m_desc = "This requires build similarity from content only to be "
"on. This indexes the publish date (only if the content "
"has changed enough) to be between the last two spider "
"dates.";
m->m_cgi = "uspd";
m->m_off = (char *)&cr.m_useSimilarityPublishDate - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "max percentage similar to update publish date";
m->m_desc = "This requires build similarity from content only and "
"use content similarity to index publish date to be "
"on. This percentage is the maximum similarity that can "
"exist between an old document and new before the publish "
"date will be updated.";
m->m_cgi = "mpspd";
m->m_off = (char *)&cr.m_maxPercentSimilarPublishDate - x;
m->m_type = TYPE_LONG;
m->m_def = "80";
m->m_group = 0;
m++;
m->m_title = "use robots.txt";
m->m_desc = "If this is true Gigablast will respect "
"the robots.txt convention.";
m->m_cgi = "urt";
m->m_off = (char *)&cr.m_useRobotsTxt - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "do url sporn checking";
m->m_desc = "If this is true and the spider finds "
"lewd words in the hostname of a url it will throw "
"that url away. It will also throw away urls that have 5 or "
"more hyphens in their hostname.";
m->m_cgi = "dusc";
m->m_off = (char *)&cr.m_doUrlSpamCheck - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "hours before adding unspiderable url to spiderdb";
m->m_desc = "Hours to wait after trying to add an unspiderable url "
"to spiderdb again.";
m->m_cgi = "dwma";
m->m_off = (char *)&cr.m_deadWaitMaxAge - x;
m->m_type = TYPE_LONG;
m->m_def = "24";
m++;
//m->m_title = "link text anomaly threshold";
//m->m_desc = "Prevent pages from link voting for "
// "another page if its link text has a "
// "word which doesn't occur in at least this "
// "many other link texts. (set to 1 to disable)";
//m->m_cgi = "ltat";
//m->m_off = (char *)&cr.m_linkTextAnomalyThresh - x;
//m->m_type = TYPE_LONG;
//m->m_def = "2";
//m++;
m->m_title = "enforce domain quotas on new docs";
m->m_desc = "If this is true then new documents will be removed "
"from the index if the quota for their domain "
"has been breeched.";
m->m_cgi = "enq";
m->m_off = (char *)&cr.m_enforceNewQuotas - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "enforce domain quotas on indexed docs";
m->m_desc = "If this is true then indexed documents will be removed "
"from the index if the quota for their domain has been "
"breeched.";
m->m_cgi = "eoq";
m->m_off = (char *)&cr.m_enforceOldQuotas - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "use exact quotas";
m->m_desc = "Does not use approximations so will do more disk seeks "
"and may impact indexing performance significantly.";
m->m_cgi = "ueq";
m->m_off = (char *)&cr.m_exactQuotas - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "restrict indexdb for spidering";
m->m_desc = "If this is true then only the root indexb file is "
"searched for linkers. Saves on disk seeks, "
"but may use older versions of indexed web pages.";
m->m_cgi = "ris";
m->m_off = (char *)&cr.m_restrictIndexdbForSpider - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "indexdb max total files to merge";
m->m_desc = "Do not merge more than this many files during a single "
"merge operation. Merge does not scale well to numbers above "
"50 or so.";
m->m_cgi = "mttftm";
m->m_off = (char *)&cr.m_indexdbMinTotalFilesToMerge - x;
m->m_def = "50";
//m->m_max = 100;
m->m_type = TYPE_LONG;
m++;
m->m_title = "indexdb min files needed to trigger merge";
m->m_desc = "Merge is triggered when this many indexdb data files "
"are on disk.";
m->m_cgi = "miftm";
m->m_off = (char *)&cr.m_indexdbMinFilesToMerge - x;
m->m_def = "6"; // default to high query performance, not spider
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "datedb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many datedb data files "
"are on disk.";
m->m_cgi = "mdftm";
m->m_off = (char *)&cr.m_datedbMinFilesToMerge - x;
m->m_def = "5";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
*/
m->m_title = "spiderdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many spiderdb data files "
"are on disk.";
m->m_cgi = "msftm";
m->m_off = (char *)&cr.m_spiderdbMinFilesToMerge - x;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
/*
m->m_title = "checksumdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many checksumdb data files "
"are on disk.";
m->m_cgi = "mcftm";
m->m_off = (char *)&cr.m_checksumdbMinFilesToMerge - x;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
*/
m->m_title = "clusterdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many clusterdb data files "
"are on disk.";
m->m_cgi = "mclftm";
m->m_off = (char *)&cr.m_clusterdbMinFilesToMerge - x;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "linkdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many linkdb data files "
"are on disk.";
m->m_cgi = "mlkftm";
m->m_off = (char *)&cr.m_linkdbMinFilesToMerge - x;
m->m_def = "4";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
//m->m_title = "tagdb min files to merge";
//m->m_desc = "Merge is triggered when this many linkdb data files "
// "are on disk.";
//m->m_cgi = "mtftm";
//m->m_off = (char *)&cr.m_tagdbMinFilesToMerge - x;
//m->m_def = "2";
//m->m_type = TYPE_LONG;
//m->m_group = 0;
//m++;
// this is overridden by collection
m->m_title = "titledb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many titledb data files "
"are on disk.";
m->m_cgi = "mtftm";
m->m_off = (char *)&cr.m_titledbMinFilesToMerge - x;
m->m_def = "6";
m->m_type = TYPE_LONG;
//m->m_save = 0;
m->m_group = 0;
m++;
//m->m_title = "sectiondb min files to merge";
//m->m_desc ="Merge is triggered when this many sectiondb data files "
// "are on disk.";
//m->m_cgi = "mscftm";
//m->m_off = (char *)&cr.m_sectiondbMinFilesToMerge - x;
//m->m_def = "4";
//m->m_type = TYPE_LONG;
//m->m_group = 0;
//m++;
m->m_title = "posdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many posdb data files "
"are on disk.";
m->m_cgi = "mpftm";
m->m_off = (char *)&cr.m_posdbMinFilesToMerge - x;
m->m_def = "6";
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "recycle content";
m->m_desc = "Rather than downloading the content again when "
"indexing old urls, use the stored content. Useful for "
"reindexing documents under a different ruleset or for "
"rebuilding an index. You usually "
"should turn off the 'use robots.txt' switch. "
"And turn on the 'use old ips' and "
"'recycle link votes' switches for speed. If rebuilding an "
"index then you should turn off the 'only index changes' "
"switches.";
m->m_cgi = "rc";
m->m_off = (char *)&cr.m_recycleContent - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "enable link voting";
m->m_desc = "If this is true Gigablast will "
"index hyper-link text and use hyper-link "
"structures to boost the quality of indexed documents.";
m->m_cgi = "glt";
m->m_off = (char *)&cr.m_getLinkInfo - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "do link spam checking";
m->m_desc = "If this is true, do not allow spammy inlinks to vote. "
"This check is "
"too aggressive for some collections, i.e. it "
"does not allow pages with cgi in their urls to vote.";
m->m_cgi = "dlsc";
m->m_off = (char *)&cr.m_doLinkSpamCheck - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "use new link algo";
m->m_desc = "Use the links: termlists instead of link:. Also "
"allows pages linking from the same domain or IP to all "
"count as a single link from a different IP. This is also "
"required for incorporating RSS and Atom feed information "
"when indexing a document.";
m->m_cgi = "na";
m->m_off = (char *)&cr.m_newAlgo - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
m->m_title = "recycle link votes";
m->m_desc = "If this is true Gigablast will "
"use the old links and link text when re-indexing old urls "
"and not do any link voting when indexing new urls.";
m->m_cgi = "rv";
m->m_off = (char *)&cr.m_recycleVotes - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
m->m_title = "update link info frequency";
m->m_desc = "How often should Gigablast recompute the "
"link info for a url. "
"Also applies to getting the quality of a site "
"or root url, which is based on the link info. "
"In days. Can use decimals. 0 means to update "
"the link info every time the url's content is re-indexed. "
"If the content is not reindexed because it is unchanged "
"then the link info will not be updated. When getting the "
"link info or quality of the root url from an "
"external cluster, Gigablast will tell the external cluster "
"to recompute it if its age is this or higher.";
m->m_cgi = "uvf";
m->m_off = (char *)&cr.m_updateVotesFreq - x;
m->m_type = TYPE_FLOAT;
m->m_def = "60.0";
m->m_group = 0;
m++;
/*
m->m_title = "recycle imported link info";
m->m_desc = "If true, we ALWAYS recycle the imported link info and "
"NEVER recompute it again. Otherwise, recompute it when we "
"recompute the local link info.";
m->m_cgi = "rili";
m->m_off = (char *)&cr.m_recycleLinkInfo2 - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
/*
m->m_title = "use imported link info for quality";
m->m_desc = "If true, we will use the imported link info to "
"help us determine the quality of the page we are indexing.";
m->m_cgi = "uifq";
m->m_off = (char *)&cr.m_useLinkInfo2ForQuality - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
// this can hurt us too much if mis-assigned, remove it
/*
m->m_title = "restrict link voting to roots";
m->m_desc = "If this is true Gigablast will "
"not perform link analysis on urls that are not "
"root urls.";
m->m_cgi = "rvr";
m->m_off = (char *)&cr.m_restrictVotesToRoots - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
m->m_title = "restrict link voting by ip";
m->m_desc = "If this is true Gigablast will "
"only allow one vote per the top 2 significant bytes "
"of the IP address. Otherwise, multiple pages "
"from the same top IP can contribute to the link text and "
"link-based quality ratings of a particular URL. "
"Furthermore, no votes will be accepted from IPs that have "
"the same top 2 significant bytes as the IP of the page "
"being indexed.";
m->m_cgi = "ovpid";
m->m_off = (char *)&cr.m_oneVotePerIpDom - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
m->m_title = "index link text";
m->m_desc = "If this is true Gigablast will "
"index both incoming and outgoing link text for the "
"appropriate documents, depending on url filters and "
"site rules, under the gbinlinktext: and gboutlinktext: "
"fields. Generally, you want this disabled, it was for "
"a client.";
m->m_cgi = "ilt";
m->m_off = (char *)&cr.m_indexLinkText - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
*/
/*
m->m_title = "index incoming link text";
m->m_desc = "If this is false no incoming link text is indexed.";
m->m_cgi = "iilt";
m->m_off = (char *)&cr.m_indexLinkText - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
m->m_title = "index inlink neighborhoods";
m->m_desc = "If this is true Gigablast will "
"index the plain text surrounding the hyper-link text. The "
"score will be x times that of the hyper-link text, where x "
"is the scalar below.";
m->m_cgi = "iin";
m->m_off = (char *)&cr.m_indexInlinkNeighborhoods - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
// this is now hard-coded in XmlNode.cpp, currently .8
m->m_title = "inlink neighborhoods score scalar";
m->m_desc = "Gigablast can "
"index the plain text surrounding the hyper-link text. The "
"score will be x times that of the hyper-link text, where x "
"is this number.";
m->m_cgi = "inss";
m->m_off = (char *)&cr.m_inlinkNeighborhoodsScoreScalar - x;
m->m_type = TYPE_FLOAT;
m->m_def = ".20";
m->m_group = 0;
m++;
*/
/*
m->m_title = "break web rings";
m->m_desc = "If this is true Gigablast will "
"attempt to detect link spamming rings and decrease "
"their influence on the link text for a URL.";
m->m_cgi = "bwr";
m->m_off = (char *)&cr.m_breakWebRings - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
*/
/*
m->m_title = "break log spam";
m->m_desc = "If this is true Gigablast will attempt to detect "
"dynamically generated pages and remove their voting power. "
"Additionally, pages over 100k will not be have their "
"outgoing links counted. Pages that have a form which POSTS "
"to a cgi page will not be considered either.";
m->m_cgi = "bls";
m->m_off = (char *)&cr.m_breakLogSpam - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
*/
m->m_title = "tagdb collection name";
m->m_desc = "Sometimes you want the spiders to use the tagdb of "
"another collection, like the main collection. "
"If this is empty it defaults to the current collection.";
m->m_cgi = "tdbc";
m->m_off = (char *)&cr.m_tagdbColl - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN+1;
m->m_def = "";
m++;
m->m_title = "catdb lookups enabled";
m->m_desc = "Spiders will look to see if the current page is in "
"catdb. If it is, all Directory information for that page "
"will be indexed with it.";
m->m_cgi = "cdbe";
m->m_off = (char *)&cr.m_catdbEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "recycle catdb info";
m->m_desc = "Rather than requesting new info from DMOZ, like "
"titles and topic ids, grab it from old record. Increases "
"performance if you are seeing a lot of "
"\"getting catdb record\" entries in the spider queues.";
m->m_cgi = "rci";
m->m_off = (char *)&cr.m_recycleCatdb - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "allow banning of pages in catdb";
m->m_desc = "If this is 'NO' then pages that are in catdb, "
"but banned from tagdb or the url filters page, can not "
"be banned.";
m->m_cgi = "abpc";
m->m_off = (char *)&cr.m_catdbPagesCanBeBanned - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "override spider errors for catdb";
m->m_desc = "Ignore and skip spider errors if the spidered site"
" is found in Catdb (DMOZ).";
m->m_cgi = "catose";
m->m_off = (char *)&cr.m_overrideSpiderErrorsForCatdb - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
//m->m_title = "only spider root urls";
//m->m_desc = "Only spider urls that are roots.";
//m->m_cgi = "osru";
//m->m_off = (char *)&cr.m_onlySpiderRoots - x;
//m->m_type = TYPE_BOOL;
//m->m_def = "0";
//m++;
m->m_title = "allow asian docs";
m->m_desc = "If this is disabled the spider "
"will not allow any docs from the gb2312 charset "
"into the index.";
m->m_cgi = "aad";
m->m_off = (char *)&cr.m_allowAsianDocs - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "allow adult docs";
m->m_desc = "If this is disabled the spider "
"will not allow any docs which contain adult content "
"into the index (overides tagdb).";
m->m_cgi = "aprnd";
m->m_off = (char *)&cr.m_allowAdultDocs - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0 ;
m++;
m->m_title = "allow xml docs";
m->m_desc = "If this is disabled the spider "
"will not allow any xml "
"into the index.";
m->m_cgi = "axd";
m->m_off = (char *)&cr.m_allowXmlDocs - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "do serp detection";
m->m_desc = "If this is eabled the spider "
"will not allow any docs which are determined to "
"be serps.";
m->m_cgi = "dsd";
m->m_off = (char *)&cr.m_doSerpDetection - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0 ;
m++;
m->m_title = "do IP lookup";
m->m_desc = "If this is disabled and the proxy "
"IP below is not zero then Gigablast will assume "
"all spidered URLs have an IP address of 1.2.3.4.";
m->m_cgi = "dil";
m->m_off = (char *)&cr.m_doIpLookups - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "use old IPs";
m->m_desc = "Should the stored IP "
"of documents we are reindexing be used? Useful for "
"pages banned by IP address and then reindexed with "
"the reindexer tool.";
m->m_cgi = "useOldIps";
m->m_off = (char *)&cr.m_useOldIps - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "remove banned pages";
m->m_desc = "Remove banned pages from the index. Pages can be "
"banned using tagdb or the Url Filters table.";
m->m_cgi = "rbp";
m->m_off = (char *)&cr.m_removeBannedPages - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
/*
m->m_title = "ban domains of urls banned by IP";
m->m_desc = "Most urls are banned by IP "
"address. But owners often will keep the same "
"domains and change their IP address. So when "
"banning a url that was banned by IP, should its domain "
"be banned too? (obsolete)";
m->m_cgi = "banDomains";
m->m_off = (char *)&cr.m_banDomains - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
m->m_title = "apply filter to text pages";
m->m_desc = "If this is false then the filter "
"will not be used on html or text pages.";
m->m_cgi = "aft";
m->m_off = (char *)&cr.m_applyFilterToText - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "allow HTTPS pages using SSL";
m->m_desc = "If this is true, spiders will read "
"HTTPS pages using SSL Protocols.";
m->m_cgi = "ahttps";
m->m_off = (char *)&cr.m_allowHttps - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
/*
m->m_title = "require dollar sign";
m->m_desc = "If this is YES, then do not allow document to be "
"indexed if they do not contain a dollar sign ($), but the "
"links will still be harvested. Used for building shopping "
"index.";
m->m_cgi = "nds";
m->m_off = (char *)&cr.m_needDollarSign - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
/*
m->m_title = "require numbers in url";
m->m_desc = "If this is YES, then do not allow document to be "
"indexed if they do not have two back-to-back digits in the "
"path of the url, but the links will still be harvested. Used "
"to build a news index.";
m->m_cgi = "nniu";
m->m_off = (char *)&cr.m_needNumbersInUrl - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "index news topics";
m->m_desc = "If this is YES, Gigablast will attempt to categorize "
"every page as being in particular news categories like "
"sports, business, etc. and will be searchable by doing a "
"query like \"newstopic:sports.";
m->m_cgi = "int";
m->m_off = (char *)&cr.m_getNewsTopic - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
m->m_title = "follow RSS links";
m->m_desc = "If an item on a page has an RSS feed link, add the "
"RSS link to the spider queue and index the RSS pages "
"instead of the current page.";
m->m_cgi = "frss";
m->m_off = (char *)&cr.m_followRSSLinks - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "only index articles from RSS feeds";
m->m_desc = "Only index pages that were linked to by an RSS feed. "
"Follow RSS Links must be enabled (above).";
m->m_cgi = "orss";
m->m_off = (char *)&cr.m_onlyIndexRSS - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max add urls";
m->m_desc = "Maximum number of urls that can be "
"submitted via the addurl interface, per IP domain, per "
"24 hour period. A value less than or equal to zero "
"implies no limit.";
m->m_cgi = "mau";
m->m_off = (char *)&cr.m_maxAddUrlsPerIpDomPerDay - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m++;
/*
m->m_title = "max text doc length";
m->m_desc = "Gigablast will not download, index or "
"store more than this many bytes of an html or text "
"document. Use -1 for no max.";
m->m_cgi = "mtdl";
m->m_off = (char *)&cr.m_maxTextDocLen - x;
m->m_type = TYPE_LONG;
m->m_def = "204800";
m++;
m->m_title = "max other doc length";
m->m_desc = "Gigablast will not download, index or "
"store more than this many bytes of a non-html, non-text "
"document. Use -1 for no max.";
m->m_cgi = "modl";
m->m_off = (char *)&cr.m_maxOtherDocLen - x;
m->m_type = TYPE_LONG;
m->m_def = "1048576";
m->m_group = 0;
m++;
*/
//m->m_title = "indexdb truncation limit";
//m->m_cgi = "itl";
//m->m_desc = "How many documents per term? Keep this very high.";
//m->m_off = (char *)&cr.m_indexdbTruncationLimit - x;
//m->m_def = "50000000";
//m->m_type = TYPE_LONG;
//m->m_min = MIN_TRUNC; // from Indexdb.h
//m++;
m->m_title = "filter name";
m->m_desc = "Program to spawn to filter all HTTP "
"replies the spider receives. Leave blank for none.";
m->m_cgi = "filter";
m->m_def = "";
m->m_off = (char *)&cr.m_filter - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_FILTER_LEN+1;
m++;
m->m_title = "filter timeout";
m->m_desc = "Kill filter shell after this many seconds. Assume it "
"stalled permanently.";
m->m_cgi = "fto";
m->m_def = "40";
m->m_off = (char *)&cr.m_filterTimeout - x;
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
m->m_title = "proxy ip";
m->m_desc = "Retrieve pages from the proxy at this IP address.";
m->m_cgi = "proxyip";
m->m_off = (char *)&cr.m_proxyIp - x;
m->m_type = TYPE_IP;
m->m_def = "0";
m++;
m->m_title = "proxy port";
m->m_desc = "Retrieve pages from the proxy on "
"this port.";
m->m_cgi = "proxyport";
m->m_off = (char *)&cr.m_proxyPort - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max robots.txt cache age";
m->m_desc = "How many second to cache a robots.txt file for. "
"86400 is 1 day. 0 means Gigablast will not read from the "
"cache at all and will download the robots.txt before every "
"page if robots.txt use is enabled above. However, if this is "
"0 then Gigablast will still store robots.txt files into the "
"cache.";
m->m_cgi = "mrca";
m->m_off = (char *)&cr.m_maxRobotsCacheAge - x;
m->m_type = TYPE_LONG;
m->m_def = "86400"; // 24*60*60 = 1day
m->m_units = "seconds";
m++;
m->m_title = "spider start time";
m->m_desc = "Only spider URLs scheduled to be spidered "
"at this time or after. In UTC.";
m->m_cgi = "sta";
m->m_off = (char *)&cr.m_spiderTimeMin - x;
m->m_type = TYPE_DATE; // date format -- very special
m->m_def = "01 Jan 1970";
m++;
m->m_title = "spider end time";
m->m_desc = "Only spider URLs scheduled to be spidered "
"at this time or before. If \"use current time\" is true "
"then the current local time is used for this value instead. "
"in UTC.";
m->m_cgi = "stb";
m->m_off = (char *)&cr.m_spiderTimeMax - x;
m->m_type = TYPE_DATE2;
m->m_def = "01 Jan 2010";
m->m_group = 0;
m++;
m->m_title = "use current time";
m->m_desc = "Use the current time as the spider end time?";
m->m_cgi = "uct";
m->m_off = (char *)&cr.m_useCurrentTime - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
/*
m->m_title = "default ruleset site file num";
m->m_desc = "Use this as the current Sitedb file num for Sitedb "
"entries that always use the current default";
m->m_cgi = "dftsfn";
m->m_off = (char *)&cr.m_defaultSiteRec - x;
m->m_type = TYPE_LONG;
m->m_def = "16";
m++;
m->m_title = "RSS ruleset site file num";
m->m_desc = "Use this Sitedb file num ruleset for RSS feeds";
m->m_cgi = "rssrs";
m->m_off = (char *)&cr.m_rssSiteRec - x;
m->m_type = TYPE_LONG;
m->m_def = "25";
m->m_group = 0;
m++;
m->m_title = "TOC ruleset site file num";
m->m_desc = "Use this Sitedb file num ruleset "
"for Table of Contents pages";
m->m_cgi = "tocrs";
m->m_off = (char *)&cr.m_tocSiteRec - x;
m->m_type = TYPE_LONG;
m->m_def = "29";
m->m_group = 0;
m++;
*/
/*
m->m_title = "store topics vector";
m->m_desc = "Should Gigablast compute and store a topics vector "
"for every document indexed. This allows Gigablast to "
"do topic clustering without having to compute this vector "
"at query time. You can turn topic clustering on in the "
"Search Controls page.";
m->m_cgi = "utv";
m->m_off = (char *)&cr.m_useGigabitVector - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "use gigabits for vector";
m->m_desc = "For news collection. "
"Should Gigablast form the similarity vector using "
"Gigabits, as opposed to a straight out random sample. "
"This does clustering more "
"by topic rather than by explicit content in common.";
m->m_cgi = "uct";
m->m_off = (char *)&cr.m_useGigabitVector - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "max similarity to reindex";
m->m_desc = "If the url's content is over X% similar to what we "
"already "
"have indexed, then do not reindex it, and treat the content "
"as if it were unchanged for intelligent spider scheduling "
"purposes. Set to 100% to always reindex the document, "
"regardless, although the use-ifModifiedSince check "
"above may still be in affect, as well as the "
"deduping-enabled check. This will also affect the re-spider "
"time, because Gigablast spiders documents that change "
"frequently faster.";
m->m_cgi = "msti";
m->m_off = (char *)&cr.m_maxSimilarityToIndex - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m++;
*/
// this is obsolete -- we can use the reg exp "isroot"
/*
m->m_title = "root url priority";
m->m_desc = "What spider priority should root urls "
"be assigned? Spider priorities range from 0 to 31. If no "
"urls are scheduled to be spidered in the priority 31 "
"bracket, the spider moves down to 30, etc., until it finds "
"a url to spider. If this priority is undefined "
"then that url's priority is determined based on the rules "
"on the URL filters page. If the priority is still "
"undefined then the priority is taken to be the priority of "
"the parent minus one, which results in a breadth first "
"spidering algorithm."; // html
m->m_cgi = "srup";
m->m_off = (char *)&cr.m_spiderdbRootUrlPriority - x;
m->m_type = TYPE_PRIORITY2;// 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
m->m_def = "15";
m++;
*/
/*
-- mdw, now in urlfilters using "isaddurl" "reg exp"
m->m_title = "add url priority";
m->m_desc = "What is the priority of a url which "
"is added to the spider queue via the "
"add url page?"; // html
m->m_cgi = "saup";
m->m_off = (char *)&cr.m_spiderdbAddUrlPriority - x;
m->m_type = TYPE_PRIORITY; // 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
m->m_def = "16";
m->m_group = 0;
m++;
*/
/*
m->m_title = "new spider by priority";
m->m_desc = "Specify priorities for which "
"new urls not yet in the index should be spidered.";
m->m_cgi = "sn";
m->m_xml = "spiderNewBits";
m->m_off = (char *)&cr.m_spiderNewBits - x;
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
m->m_fixed = MAX_SPIDER_PRIORITIES;
m->m_def = "1"; // default for each one is on
m++;
m->m_title = "old spider by priority";
m->m_desc = "Specify priorities for which old "
"urls already in the index should be spidered.";
m->m_cgi = "so";
m->m_xml = "spiderOldBits";
m->m_off = (char *)&cr.m_spiderOldBits - x;
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
m->m_fixed = MAX_SPIDER_PRIORITIES;
m->m_def = "1"; // default for each one is on
m->m_group = 0;
m++;
m->m_title = "max spiders per domain";
m->m_desc = "How many pages should the spider "
"download simultaneously from any one domain? This can "
"prevents the spider from hitting one server too hard.";
m->m_cgi = "mspd";
m->m_off = (char *)&cr.m_maxSpidersPerDomain - x;
m->m_type = TYPE_LONG;
m->m_def = "1";
m++;
m->m_title = "same domain wait";
m->m_desc = "How many milliseconds should Gigablast wait "
"between spidering a second url from the same domain. "
"This is used to prevent the spiders from hitting a "
"website too hard.";
m->m_cgi = "sdw";
m->m_off = (char *)&cr.m_sameDomainWait - x;
m->m_type = TYPE_LONG;
m->m_def = "500";
m->m_group = 0;
m++;
m->m_title = "same ip wait";
m->m_desc = "How many milliseconds should Gigablast wait "
"between spidering a second url from the same IP address. "
"This is used to prevent the spiders from hitting a "
"website too hard.";
m->m_cgi = "siw";
m->m_off = (char *)&cr.m_sameIpWait - x;
m->m_type = TYPE_LONG;
m->m_def = "10000";
m->m_group = 0;
m++;
*/
/*
m->m_title = "use distributed spider lock";
m->m_desc = "Enable distributed spider locking to strictly enforce "
"same domain waits at a global level.";
m->m_cgi = "udsl";
m->m_off = (char *)&cr.m_useSpiderLocks - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "distribute spider download based on ip";
m->m_desc = "Distribute web downloads based on the ip of the host so "
"only one spider ip hits the same hosting ip. Helps "
"webmaster's logs look nicer.";
m->m_cgi = "udsd";
m->m_off = (char*)&cr.m_distributeSpiderGet - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "percent of water mark to reload queues";
m->m_desc = "When a spider queue drops below this percent of its "
"max level it will reload from disk.";
m->m_cgi = "rlqp";
m->m_off = (char*)&cr.m_reloadQueuePercent - x;
m->m_type = TYPE_LONG;
m->m_def = "25";
m++;
*/
/*
m->m_title = "min respider wait";
m->m_desc = "What is the minimum number of days "
"the spider should wait before re-visiting a particular "
"web page? "
"The spiders attempts to determine the update cycle of "
"each web page and it tries to visit them as needed, but it "
"will not wait less than this number of days regardless.";
m->m_cgi = "mrw";
m->m_off = (char *)&cr.m_minRespiderWait - x;
m->m_type = TYPE_FLOAT;
m->m_def = "1.0";
m++;
m->m_title = "max respider wait";
m->m_desc = "What is the maximum number of days "
"the spider should wait before re-visiting a particular "
"web page?";
m->m_cgi = "xrw";
m->m_off = (char *)&cr.m_maxRespiderWait - x;
m->m_type = TYPE_FLOAT;
m->m_def = "90.0";
m->m_group = 0;
m++;
m->m_title = "first respider wait";
m->m_desc = "What is the number of days "
"Gigablast should wait before spidering a particular web page "
"for the second time? Tag in ruleset will override this value "
"if it is present.";
m->m_cgi = "frw";
m->m_off = (char *)&cr.m_firstRespiderWait - x;
m->m_type = TYPE_FLOAT;
m->m_def = "30.0";
m->m_group = 0;
m++;
m->m_title = "error respider wait";
m->m_desc = "If a spidered web page has a network "
"error, such as a DNS not found error, or a time out error, "
"how many days should Gigablast wait before reattempting "
"to spider that web page?";
m->m_cgi = "erw";
m->m_off = (char *)&cr.m_errorRespiderWait - x;
m->m_type = TYPE_FLOAT;
m->m_def = "2.0";
m->m_group = 0;
m++;
m->m_title = "doc not found error respider wait";
m->m_desc = "If a spidered web page has a http status "
"error, such as a 404 page not found error, "
"how many days should Gigablast wait before reattempting "
"to spider that web page?";
m->m_cgi = "dnferw";
m->m_off = (char *)&cr.m_docNotFoundErrorRespiderWait - x;
m->m_type = TYPE_FLOAT;
m->m_def = "7.0";
m->m_group = 0;
m++;
*/
/*
m->m_title = "spider max kbps";
m->m_desc = "The maximum kilobits per second "
"that the spider can download.";
m->m_cgi = "cmkbps";
m->m_off = (char *)&cr.m_maxKbps - x;
m->m_type = TYPE_FLOAT;
m->m_def = "999999.0";
m++;
m->m_title = "spider max pages per second";
m->m_desc = "The maximum number of pages per "
"second that can be indexed or deleted from the index.";
m->m_cgi = "cmpps";
m->m_off = (char *)&cr.m_maxPagesPerSecond - x;
m->m_type = TYPE_FLOAT;
m->m_def = "999999.0";
m->m_group = 0;
m++;
*/
/*
m->m_title = "spider new percent";
m->m_desc = "Approximate percentage of new vs. old docs to spider. "
"If set to a negative number, the old alternating "
"priority algorithm is used.";
m->m_cgi = "snp";
m->m_off = (char *)&cr.m_spiderNewPct - x;
m->m_type = TYPE_FLOAT;
m->m_def = "-1.0";
m->m_group = 0;
m++;
*/
m->m_title = "number retries per url";
m->m_desc = "How many times should the spider be "
"allowed to fail to download a particular web page before "
"it gives up? "
"Failure may result from temporary loss of internet "
"connectivity on the remote end, dns or routing problems.";
m->m_cgi = "nr";
m->m_off = (char *)&cr.m_numRetries - x;
m->m_type = TYPE_RETRIES; // dropdown from 0 to 3
m->m_def = "1";
m++;
m->m_title = "priority of urls being retried";
m->m_desc = "Keep this pretty high so that we get problem urls "
"out of the index fast, otherwise, you might be waiting "
"months for another retry. Use undefined to indicate "
"no change in the priority of the url.";
m->m_cgi = "rtp";
m->m_off = (char *)&cr.m_retryPriority - x;
m->m_type = TYPE_PRIORITY2; // -1 to 31
m->m_def = "-1";
m->m_group = 0;
m++;
/*
m->m_title = "max pages in index";
m->m_desc = "What is the maximum number of "
"pages that are permitted for this collection?";
m->m_cgi = "mnp";
m->m_off = (char *)&cr.m_maxNumPages - x;
m->m_type = TYPE_LONG_LONG;
m->m_def = "10000000000"; // 10 billion
m++;
m->m_title = "import link info"; // from other cluster";
m->m_desc = "Say yes here to make Gigablast import "
"link text from another collection into this one "
"when spidering urls. Gigablast will "
"use the hosts.conf file in the working directory to "
"tell it what hosts belong to the cluster to import from. "
"Gigablast "
"will use the \"update link votes frequency\" parm above "
"to determine if the info should be recomputed on the other "
"cluster.";
m->m_cgi = "eli"; // external link info
m->m_off = (char *)&cr.m_getExternalLinkInfo - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 2;
m++;
m->m_title = "use hosts2.conf for import cluster";
m->m_desc = "Tell Gigablast to import from the cluster defined by "
"hosts2.conf in the working directory, rather than "
"hosts.conf";
m->m_cgi = "elib"; // external link info
m->m_off = (char *)&cr.m_importFromHosts2Conf - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_priv = 2;
m->m_group = 0;
m++;
//m->m_title = "get link info from other cluster in real-time";
//m->m_desc = "Say yes here to make Gigablast tell the other "
// "cluster to compute the link info, not just return a "
// "stale copy from the last time it computed it.";
//m->m_cgi = "elif"; // external link info fresh
//m->m_off = (char *)&cr.m_getExternalLinkInfoFresh - x;
//m->m_type = TYPE_BOOL;
//m->m_def = "0";
//m->m_group = 0;
//m->m_priv = 2;
//m++;
m->m_title = "collection to import from";
m->m_desc = "Gigablast will fetch the link info from this "
"collection.";
m->m_cgi = "elic"; // external link info
m->m_off = (char *)&cr.m_externalColl - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN+1;
m->m_def = "";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "turk tags to display";
m->m_desc = "Tell pageturk to display the tag questions "
"for the comma seperated tag names."
" no space allowed.";
m->m_cgi = "ttags";
m->m_xml = "turkTags";
m->m_type = TYPE_STRING;
m->m_size = 256;
m->m_def = "blog,spam,news";
m->m_off = (char *)&cr.m_turkTags - x;
m->m_group = 0;
m->m_priv = 2;
m++;
*/
// now we store this in title recs, so we can change it on the fly
m->m_title = "title weight";
m->m_desc = "Weight title this much more or less. This units are "
"percentage. A 100 means to not give the title any special "
"weight. Generally, though, you want to give it significantly "
"more weight than that, so 2400 is the default.";
m->m_cgi = "tw";
m->m_off = (char *)&cr.m_titleWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "4600";
m->m_min = 0;
m++;
// now we store this in title recs, so we can change it on the fly
m->m_title = "header weight";
m->m_desc = "Weight terms in header tags by this much more or less. "
"This units are "
"percentage. A 100 means to not give the header any special "
"weight. Generally, though, you want to give it significantly "
"more weight than that, so 600 is the default.";
m->m_cgi = "hw";
m->m_off = (char *)&cr.m_headerWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "600";
m->m_min = 0;
m->m_group = 0;
m++;
// now we store this in title recs, so we can change it on the fly
m->m_title = "url path word weight";
m->m_desc = "Weight text in url path this much more. "
"The units are "
"percentage. A 100 means to not give any special "
"weight. Generally, though, you want to give it significantly "
"more weight than that, so 600 is the default.";
m->m_cgi = "upw";
m->m_off = (char *)&cr.m_urlPathWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "1600";
m->m_min = 0;
m->m_group = 0;
m++;
// now we store this in title recs, so we can change it on the fly
m->m_title = "external link text weight";
m->m_desc = "Weight text in the incoming external link text this "
"much more. The units are percentage. It already receives a "
"decent amount of weight naturally.";
m->m_cgi = "eltw";
m->m_off = (char *)&cr.m_externalLinkTextWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "600";
m->m_min = 0;
m->m_group = 0;
m++;
// now we store this in title recs, so we can change it on the fly
m->m_title = "internal link text weight";
m->m_desc = "Weight text in the incoming internal link text this "
"much more. The units are percentage. It already receives a "
"decent amount of weight naturally.";
m->m_cgi = "iltw";
m->m_off = (char *)&cr.m_internalLinkTextWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "200";
m->m_min = 0;
m->m_group = 0;
m++;
// now we store this in title recs, so we can change it on the fly
m->m_title = "concept weight";
m->m_desc = "Weight concepts this much more. "
"The units are "
"percentage. It already receives a decent amount of weight "
"naturally. AKA: surrounding text boost.";
m->m_cgi = "cw";
m->m_off = (char *)&cr.m_conceptWeight - x;
m->m_type = TYPE_LONG;
m->m_def = "50";
m->m_min = 0;
m->m_group = 0;
m++;
/*
// now we store this in title recs, so we can change it on the fly
m->m_title = "site num inlinks boost base";
m->m_desc = "Boost the score of all terms in the document using "
"this number. "
"The boost itself is expressed as a percentage. "
"The boost is B^X, where X is the number of good "
"inlinks to the document's site "
"and B is this is this boost base. "
"The score of each term in the "
"document is multiplied by the boost. That product "
"becomes the new score of that term. "
"For purposes of this calculation we limit X to 1000.";
m->m_cgi = "qbe";
m->m_off = (char *)&cr.m_siteNumInlinksBoostBase - x;
m->m_type = TYPE_FLOAT;
m->m_def = "1.005";
m->m_min = 0;
m->m_group = 0;
m++;
*/
/*
// use menu elimination technology?
m->m_title = "only index article content";
m->m_desc = "If this is true gigablast will only index the "
"article content on pages identifed as permalinks. It will "
"NOT index any page content on non-permalink pages, and it "
"will avoid indexing menu content on any page. It will not "
"index meta tags on any page. It will only index incoming "
"link text for permalink pages. Useful when "
"indexing blog or news sites.";
m->m_cgi = "met";
m->m_off = (char *)&cr.m_eliminateMenus - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
// replace by lang== lang!= in url filters
//m->m_title = "collection language";
//m->m_desc = "Only spider pages determined to be in "
// "this language (see Language.h)";
//m->m_cgi = "clang";
//m->m_off = (char *)&cr.m_language - x;
//m->m_type = TYPE_LONG;
//m->m_def = "0";
//m++;
///////////////////////////////////////////
// SEARCH CONTROLS
///////////////////////////////////////////
//m->m_title = "allow RAID style list intersection";
//m->m_desc = "Allow using RAID style lookup for intersecting term "
// "lists and getting docIds for queries.";
//m->m_cgi = "uraid";
//m->m_off = (char *)&cr.m_allowRaidLookup - x;
//m->m_type = TYPE_BOOL;
//m->m_def = "0";
//m++;
//m->m_title = "allow RAIDed term list read";
//m->m_desc = "Allow splitting up the term list read for large lists "
// "amongst twins.";
//m->m_cgi = "ulraid";
//m->m_off = (char *)&cr.m_allowRaidListRead - x;
//m->m_type = TYPE_BOOL;
//m->m_def = "0";
//m->m_group = 0;
//m++;
//m->m_title = "max RAID mercenaries";
//m->m_desc = "Max number of mercenaries to use in RAID lookup and "
// "intersection.";
//m->m_cgi = "raidm";
//m->m_off = (char *)&cr.m_maxRaidMercenaries - x;
//m->m_type = TYPE_LONG;
//m->m_def = "2";
//m->m_group = 0;
//m++;
//m->m_title = "min term list size to RAID";
//m->m_desc = "Term list size to begin doing term list RAID";
//m->m_cgi = "raidsz";
//m->m_off = (char *)&cr.m_minRaidListSize - x;
//m->m_type = TYPE_LONG;
//m->m_def = "1000000";
//m->m_group = 0;
//m++;
m->m_title = "restrict indexdb for queries";
m->m_desc = "If this is true Gigablast will only search the root "
"index file for docIds. Saves on disk seeks, "
"but may use older versions of indexed web pages.";
m->m_cgi = "riq";
m->m_off = (char *)&cr.m_restrictIndexdbForQuery - x;
m->m_type = TYPE_BOOL;
m->m_page = PAGE_SEARCH;
m->m_def = "0";
m->m_sparm = 1;
m->m_scgi = "ri";
m->m_soff = (char *)&si.m_restrictIndexdbForQuery - y;
m++;
m->m_title = "restrict indexdb for xml feed";
m->m_desc = "Like above, but specifically for XML feeds.";
m->m_cgi = "rix";
m->m_off = (char *)&cr.m_restrictIndexdbForXML - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
//m->m_title = "restrict indexdb for queries in xml feed";
//m->m_desc = "Same as above, but just for the XML feed.";
//m->m_cgi = "riqx";
//m->m_off = (char *)&cr.m_restrictIndexdbForQueryRaw - x;
//m->m_type = TYPE_BOOL;
//m->m_def = "1";
//m->m_group = 0;
//m++;
m->m_title = "read from cache by default";
m->m_desc = "Should we read search results from the cache? Set "
"to false to fix dmoz bug.";
m->m_cgi = "rcd";
m->m_off = (char *)&cr.m_rcache - x;
m->m_soff = (char *)&si.m_rcache - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_sparm = 1;
m->m_scgi = "rcache";
m->m_sprpg = 0;
m->m_sprpp = 0;
m++;
m->m_title = "site cluster by default";
m->m_desc = "Should search results be site clustered by default?";
m->m_cgi = "scd";
m->m_off = (char *)&cr.m_siteClusterByDefault - x;
m->m_soff = (char *)&si.m_doSiteClustering - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_sparm = 1;
m->m_scgi = "sc";
m++;
m->m_title = "use min ranking algo";
m->m_desc = "Should search results be ranked using this algo?";
//m->m_cgi = "uma";
//m->m_off = (char *)&cr.m_siteClusterByDefault - x;
m->m_soff = (char *)&si.m_useMinAlgo - y;
m->m_type = TYPE_BOOL;
m->m_obj = OBJ_SI;
// seems, good, default it on
m->m_def = "1";
m->m_sparm = 1;
m->m_scgi = "uma";
m++;
// limit to this # of the top term pairs from inlink text whose
// score is accumulated
m->m_title = "real max top";
m->m_desc = "Only score up to this many inlink text term pairs";
m->m_soff = (char *)&si.m_realMaxTop - y;
m->m_type = TYPE_LONG;
m->m_obj = OBJ_SI;
m->m_def = "10";
m->m_sparm = 1;
m->m_scgi = "mit";
m++;
m->m_title = "use new ranking algo";
m->m_desc = "Should search results be ranked using this new algo?";
m->m_soff = (char *)&si.m_useNewAlgo - y;
m->m_type = TYPE_BOOL;
m->m_obj = OBJ_SI;
// seems, good, default it on
m->m_def = "1";
m->m_sparm = 1;
m->m_scgi = "una";
m++;
m->m_title = "do max score algo";
m->m_desc = "Quickly eliminated docids using max score algo";
m->m_soff = (char *)&si.m_doMaxScoreAlgo - y;
m->m_type = TYPE_BOOL;
m->m_obj = OBJ_SI;
m->m_def = "1";
m->m_sparm = 1;
m->m_scgi = "dmsa";
m++;
m->m_title = "use fast intersection algo";
m->m_desc = "Should we try to speed up search results generation?";
m->m_soff = (char *)&si.m_fastIntersection - y;
m->m_type = TYPE_CHAR;
m->m_obj = OBJ_SI;
// turn off until we debug
m->m_def = "-1";
m->m_sparm = 1;
m->m_scgi = "fi";
m++;
// buzz
m->m_title = "hide all clustered results";
m->m_desc = "Hide all clustered results instead of displaying two "
"results from each site.";
m->m_cgi = "hacr";
m->m_off = (char *)&cr.m_hideAllClustered - x;
m->m_type = TYPE_BOOL;
m->m_obj = OBJ_COLL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "dedup results by default";
m->m_desc = "Should duplicate search results be removed by default?";
m->m_cgi = "drd"; // dedupResultsByDefault";
m->m_off = (char *)&cr.m_dedupResultsByDefault - x;
m->m_soff = (char *)&si.m_doDupContentRemoval - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "dr";
m++;
m->m_title = "dedup URLs";
m->m_desc = "Should we dedup URLs with case insensitivity? This is "
"mainly to correct duplicate wiki pages.";
m->m_cgi = "ddu";
m->m_off = (char *)&cr.m_dedupURLDefault - x;
m->m_soff = (char *)&si.m_dedupURL - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "ddu";
m++;
m->m_title = "use vhost language detection";
m->m_desc = "Use language specific pages for home, etc.";
m->m_cgi = "vhost";
m->m_off = (char *)&cr.m_useLanguagePages - x;
m->m_soff = (char *)&si.m_useLanguagePages - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_sparm = 1;
m->m_scgi = "vhost";
m->m_smin = 0;
m++;
/*
m->m_title = "special query";
m->m_desc = "List of docids to restrain results to.";
m->m_cgi = "sq";
m->m_soff = (char *)&si.m_sq - y;
m->m_type = TYPE_STRING;
m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
m->m_def = "en_US";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "sq";
m++;
*/
m->m_title = "use language weights";
m->m_desc = "Use Language weights to sort query results. "
"This will give results of a similar language a higher "
"priority.";
m->m_cgi = "lsort";
m->m_off = (char *)&cr.m_enableLanguageSorting - x;
m->m_soff = (char *)&si.m_enableLanguageSorting - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "lsort";
m->m_smin = 0;
m++;
m->m_title = "sort language preference";
m->m_desc = "Default language for post query rerank. "
"This should only be used on limited collections. "
"Value should be any language abbreviation, for example "
"\"en\" for English.";
m->m_cgi = "qlang";
m->m_off = (char *)&cr.m_defaultSortLanguage - x;
m->m_soff = (char *)&si.m_defaultSortLanguage - y;
m->m_type = TYPE_STRING;
m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
m->m_def = "en";//_US";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "qlang";
m++;
m->m_title = "sort country preference";
m->m_desc = "Default country for post query rerank. "
"This should only be used on limited collections. "
"Value should be any country code abbreviation, for example "
"\"us\" for United States.";
m->m_cgi = "qcountry";
m->m_off = (char *)&cr.m_defaultSortCountry - x;
m->m_soff = (char *)&si.m_defaultSortCountry - y;
m->m_type = TYPE_STRING;
m->m_size = 2+1;
m->m_def = "us";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "qcountry";
m++;
/*
m->m_title = "language method weights";
m->m_desc = "Language method weights for spider language "
"detection. A string of ascii numerals that "
"should default to 895768712";
m->m_cgi = "lmweights";
m->m_off = (char *)&cr.m_languageMethodWeights - x;
m->m_type = TYPE_STRING;
m->m_size = 10; // up to 9 chars + NULL
m->m_def = "894767812";
m->m_group = 0;
// m->m_sparm = 1;
m++;
m->m_title = "language detection sensitivity";
m->m_desc = "Language detection sensitivity. Higher"
" values mean higher hitrate, but lower accuracy."
" Suggested values are from 2 to 20";
m->m_cgi = "lmbailout";
m->m_off = (char *)&cr.m_languageBailout - x;
m->m_type = TYPE_LONG;
m->m_def = "5";
m->m_group = 0;
// m->m_sparm = 1;
m++;
m->m_title = "language detection threshold";
m->m_desc = "Language detection threshold sensitivity."
" Higher values mean better accuracy, but lower hitrate."
" Suggested values are from 2 to 20";
m->m_cgi = "lmthreshold";
m->m_off = (char *)&cr.m_languageThreshold - x;
m->m_type = TYPE_LONG;
m->m_def = "3";
m->m_group = 0;
// m->m_sparm = 1;
m++;
m->m_title = "language detection samplesize";
m->m_desc = "Language detection size. Higher values"
" mean more accuracy, but longer processing time."
" Suggested values are 300-1000";
m->m_cgi = "lmsamples";
m->m_off = (char *)&cr.m_languageSamples - x;
m->m_type = TYPE_LONG;
m->m_def = "600";
m->m_group = 0;
// m->m_sparm = 1;
m++;
m->m_title = "language detection spider samplesize";
m->m_desc = "Language detection page sample size. "
"Higher values mean more accuracy, but longer "
"spider time."
" Suggested values are 3000-10000";
m->m_cgi = "lpsamples";
m->m_off = (char *)&cr.m_langPageLimit - x;
m->m_type = TYPE_LONG;
m->m_def = "6000";
m->m_group = 0;
// m->m_sparm = 1;
m++;
*/
// for post query reranking
m->m_title = "docs to check for post query demotion";
m->m_desc = "How many search results should we "
"scan for post query demotion? "
"0 disables all post query reranking. ";
m->m_cgi = "pqrds";
m->m_off = (char *)&cr.m_pqr_docsToScan - x;
m->m_soff = (char *)&si.m_docsToScanForReranking - y;
m->m_type = TYPE_LONG;
m->m_def = "50";
m->m_group = 1;
m->m_sparm = 1;
m->m_scgi = "pqrds";
m++;
m->m_title = "demotion for foreign languages";
m->m_desc = "Demotion factor of non-relevant languages. Score "
"will be penalized by this factor as a percent if "
"it's language is foreign. "
"A safe value is probably anywhere from 0.5 to 1. ";
m->m_cgi = "pqrlang";
m->m_off = (char *)&cr.m_languageWeightFactor - x;
m->m_soff = (char *)&si.m_languageWeightFactor - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0.999";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "pqrlang";
m->m_smin = 0;
m++;
m->m_title = "demotion for unknown languages";
m->m_desc = "Demotion factor for unknown languages. "
"Page's score will be penalized by this factor as a percent "
"if it's language is not known. "
"A safe value is 0, as these pages will be reranked by "
"country (see below). "
"0 means no demotion.";
m->m_cgi = "pqrlangunk";
m->m_off = (char *)&cr.m_languageUnknownWeight- x;
m->m_soff = (char *)&si.m_languageUnknownWeight- y;
m->m_type = TYPE_FLOAT;
m->m_def = "0.0";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "pqrlangunk";
m->m_smin = 0;
m++;
m->m_title = "demotion for pages where the country of the page writes "
"in the same language as the country of the query";
m->m_desc = "Demotion for pages where the country of the page writes "
"in the same language as the country of the query. "
"If query language is the same as the language of the page, "
"then if a language written in the country of the page matches "
"a language written by the country of the query, then page's "
"score will be demoted by this factor as a percent. "
"A safe range is between 0.5 and 1. ";
m->m_cgi = "pqrcntry";
m->m_off = (char *)&cr.m_pqr_demFactCountry - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0.98";
m->m_group = 0;
m++;
m->m_title = "demotion for query terms or gigabits in url";
m->m_desc = "Demotion factor for query terms or gigabits "
"in a result's url. "
"Score will be penalized by this factor times the number "
"of query terms or gigabits in the url divided by "
"the max value below such that fewer "
"query terms or gigabits in the url causes the result "
"to be demoted more heavily, depending on the factor. "
"Higher factors demote more per query term or gigabit "
"in the page's url. "
"Generally, a page may not be demoted more than this "
"factor as a percent. Also, how it is demoted is "
"dependant on the max value. For example, "
"a factor of 0.2 will demote the page 20% if it has no "
"query terms or gigabits in its url. And if the max value is "
"10, then a page with 5 query terms or gigabits in its "
"url will be demoted 10%; and 10 or more query terms or "
"gigabits in the url will not be demoted at all. "
"0 means no demotion. "
"A safe range is from 0 to 0.35. ";
m->m_cgi = "pqrqttiu";
m->m_off = (char *)&cr.m_pqr_demFactQTTopicsInUrl - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages with query terms or gigabits "
"in url";
m->m_desc = "Max number of query terms or gigabits in a url. "
"Pages with a number of query terms or gigabits in their "
"urls greater than or equal to this value will not be "
"demoted. "
"This controls the range of values expected to represent "
"the number of query terms or gigabits in a url. It should "
"be set to or near the estimated max number of query terms "
"or topics that can be in a url. Setting to a lower value "
"increases the penalty per query term or gigabit that is "
"not in a url, but decreases the range of values that "
"will be demoted.";
m->m_cgi = "pqrqttium";
m->m_off = (char *)&cr.m_pqr_maxValQTTopicsInUrl - x;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m++;
m->m_title = "demotion for pages that are not high quality";
m->m_desc = "Demotion factor for pages that are not high quality. "
"Score is penalized by this number as a percent times level "
"of quality. A pqge will be demoted by the formula "
"(max quality - page's quality) * this factor / the max "
"value given below. Generally, a page will not be "
"demoted more than this factor as a percent. "
"0 means no demotion. "
"A safe range is between 0 to 1. ";
m->m_cgi = "pqrqual";
m->m_off = (char *)&cr.m_pqr_demFactQual - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages that are not high quality";
m->m_desc = "Max page quality. Pages with a quality level "
"equal to or higher than this value "
"will not be demoted. ";
m->m_cgi = "pqrqualm";
m->m_off = (char *)&cr.m_pqr_maxValQual - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m++;
m->m_title = "demotion for pages that are not "
"root or have many paths in the url";
m->m_desc = "Demotion factor each path in the url. "
"Score will be demoted by this factor as a percent "
"multiplied by the number of paths in the url divided "
"by the max value below. "
"Generally, the page will not be demoted more than this "
"value as a percent. "
"0 means no demotion. "
"A safe range is from 0 to 0.75. ";
m->m_cgi = "pqrpaths";
m->m_off = (char *)&cr.m_pqr_demFactPaths - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages that have many paths in the url";
m->m_desc = "Max number of paths in a url. "
"This should be set to a value representing a very high "
"number of paths for a url. Lower values increase the "
"difference between how much each additional path demotes. ";
m->m_cgi = "pqrpathsm";
m->m_off = (char *)&cr.m_pqr_maxValPaths - x;
m->m_type = TYPE_LONG;
m->m_def = "16";
m->m_group = 0;
m++;
m->m_title = "demotion for pages that do not have a catid";
m->m_desc = "Demotion factor for pages that do not have a catid. "
"Score will be penalized by this factor as a percent. "
"A safe range is from 0 to 0.2. ";
m->m_cgi = "pqrcatid";
m->m_off = (char *)&cr.m_pqr_demFactNoCatId - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "demotion for pages where smallest "
"catid has a lot of super topics";
m->m_desc = "Demotion factor for pages where smallest "
"catid has a lot of super topics. "
"Page will be penalized by the number of super topics "
"multiplied by this factor divided by the max value given "
"below. "
"Generally, the page will not be demoted more than this "
"factor as a percent. "
"Note: pages with no catid are demoted by this factor as "
"a percent so as not to penalize pages with a catid. "
"0 means no demotion. "
"A safe range is between 0 and 0.25. ";
m->m_cgi = "pqrsuper";
m->m_off = (char *)&cr.m_pqr_demFactCatidHasSupers - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages where smallest catid has a lot "
"of super topics";
m->m_desc = "Max number of super topics. "
"Pages whose smallest catid that has more super "
"topics than this will be demoted by the maximum amount "
"given by the factor above as a percent. "
"This should be set to a value representing a very high "
"number of super topics for a category id. "
"Lower values increase the difference between how much each "
"additional path demotes. ";
m->m_cgi = "pqrsuperm";
m->m_off = (char *)&cr.m_pqr_maxValCatidHasSupers - x;
m->m_type = TYPE_LONG;
m->m_def = "11";
m->m_group = 0;
m++;
m->m_title = "demotion for larger pages";
m->m_desc = "Demotion factor for larger pages. "
"Page will be penalized by its size times this factor "
"divided by the max page size below. "
"Generally, a page will not be demoted more than this "
"factor as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.25. ";
m->m_cgi = "pqrpgsz";
m->m_off = (char *)&cr.m_pqr_demFactPageSize - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for larger pages";
m->m_desc = "Max page size. "
"Pages with a size greater than or equal to this will be "
"demoted by the max amount (the factor above as a percent). ";
m->m_cgi = "pqrpgszm";
m->m_off = (char *)&cr.m_pqr_maxValPageSize - x;
m->m_type = TYPE_LONG;
m->m_def = "524288";
m->m_group = 0;
m++;
m->m_title = "demotion for non-location specific queries "
"with a location specific title";
m->m_desc = "Demotion factor for non-location specific queries "
"with a location specific title. "
"Pages which contain a location in their title which is "
"not in the query or the gigabits will be demoted by their "
"population multiplied by this factor divided by the max "
"place population specified below. "
"Generally, a page will not be demoted more than this "
"value as a percent. "
"0 means no demotion. ";
m->m_cgi = "pqrloct";
m->m_off = (char *)&cr.m_pqr_demFactLocTitle - x;
m->m_sparm = 1;
m->m_scgi = "pqrloct";
m->m_soff = (char *)&si.m_pqr_demFactLocTitle - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0.99";
m->m_group = 0;
m++;
m->m_title = "demotion for non-location specific queries "
"with a location specific summary";
m->m_desc = "Demotion factor for non-location specific queries "
"with a location specific summary. "
"Pages which contain a location in their summary which is "
"not in the query or the gigabits will be demoted by their "
"population multiplied by this factor divided by the max "
"place population specified below. "
"Generally, a page will not be demoted more than this "
"value as a percent. "
"0 means no demotion. ";
m->m_cgi = "pqrlocs";
m->m_off = (char *)&cr.m_pqr_demFactLocSummary - x;
m->m_sparm = 1;
m->m_scgi = "pqrlocs";
m->m_soff = (char *)&si.m_pqr_demFactLocSummary - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0.95";
m->m_group = 0;
m++;
m->m_title = "demotion for non-location specific queries "
"with a location specific dmoz category";
m->m_desc = "Demotion factor for non-location specific queries "
"with a location specific dmoz regional category. "
"Pages which contain a location in their dmoz which is "
"not in the query or the gigabits will be demoted by their "
"population multiplied by this factor divided by the max "
"place population specified below. "
"Generally, a page will not be demoted more than this "
"value as a percent. "
"0 means no demotion. ";
m->m_cgi = "pqrlocd";
m->m_off = (char *)&cr.m_pqr_demFactLocDmoz - x;
m->m_sparm = 1;
m->m_scgi = "pqrlocd";
m->m_soff = (char *)&si.m_pqr_demFactLocDmoz - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0.95";
m->m_group = 0;
m++;
m->m_title = "demote locations that appear in gigabits";
m->m_desc = "Demote locations that appear in gigabits.";
m->m_cgi = "pqrlocg";
m->m_off = (char *)&cr.m_pqr_demInTopics - x;
m->m_sparm = 1;
m->m_scgi = "pqrlocg";
m->m_soff = (char *)&si.m_pqr_demInTopics - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "max value for non-location specific queries "
"with location specific results";
m->m_desc = "Max place population. "
"Places with a population greater than or equal to this "
"will be demoted to the maximum amount given by the "
"factor above as a percent. ";
m->m_cgi = "pqrlocm";
m->m_off = (char *)&cr.m_pqr_maxValLoc - x;
m->m_type = TYPE_LONG;
// charlottesville was getting missed when this was 1M
m->m_def = "100000";
m->m_group = 0;
m++;
m->m_title = "demotion for non-html";
m->m_desc = "Demotion factor for content type that is non-html. "
"Pages which do not have an html content type will be "
"demoted by this factor as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.35. ";
m->m_cgi = "pqrhtml";
m->m_off = (char *)&cr.m_pqr_demFactNonHtml - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "demotion for xml";
m->m_desc = "Demotion factor for content type that is xml. "
"Pages which have an xml content type will be "
"demoted by this factor as a percent. "
"0 means no demotion. "
"Any value between 0 and 1 is safe if demotion for non-html "
"is set to 0. Otherwise, 0 should probably be used. ";
m->m_cgi = "pqrxml";
m->m_off = (char *)&cr.m_pqr_demFactXml - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0.95";
m->m_group = 0;
m++;
m->m_title = "demotion for pages with other pages from same "
"hostname";
m->m_desc = "Demotion factor for pages with fewer other pages from "
"same hostname. "
"Pages with results from the same host will be "
"demoted by this factor times each fewer host than the max "
"value given below, divided by the max value. "
"Generally, a page will not be demoted more than this "
"factor as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.35. ";
m->m_cgi = "pqrfsd";
m->m_off = (char *)&cr.m_pqr_demFactOthFromHost - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages with other pages from same "
"domain";
m->m_desc = "Max number of pages from same domain. "
"Pages which have this many or more pages from the same "
"domain will not be demoted. ";
m->m_cgi = "pqrfsdm";
m->m_off = (char *)&cr.m_pqr_maxValOthFromHost - x;
m->m_type = TYPE_LONG;
m->m_def = "12";
m->m_group = 0;
m++;
m->m_title = "initial demotion for pages with common "
"topics in dmoz as other results";
m->m_desc = "Initial demotion factor for pages with common "
"topics in dmoz as other results. "
"Pages will be penalized by the number of common topics "
"in dmoz times this factor divided by the max value "
"given below. "
"Generally, a page will not be demoted by more than this "
"factor as a percent. "
"Note: this factor is decayed by the factor specified in "
"the parm below, decay for pages with common topics in "
"dmoz as other results, as the number of pages with "
"common topics in dmoz increases. "
"0 means no demotion. "
"A safe range is between 0 and 0.35. ";
m->m_cgi = "pqrctid";
m->m_off = (char *)&cr.m_pqr_demFactComTopicInDmoz - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "decay for pages with common topics in dmoz "
"as other results";
m->m_desc = "Decay factor for pages with common topics in "
"dmoz as other results. "
"The initial demotion factor will be decayed by this factor "
"as a percent as the number of common topics increase. "
"0 means no decay. "
"A safe range is between 0 and 0.25. ";
m->m_cgi = "pqrctidd";
m->m_off = (char *)&cr.m_pqr_decFactComTopicInDmoz - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages with common topics in dmoz "
"as other results";
m->m_desc = "Max number of common topics in dmoz as other results. "
"Pages with a number of common topics equal to or greater "
"than this value will be demoted to the maximum as given "
"by the initial factor above as a percent. ";
m->m_cgi = "pqrctidm";
m->m_off = (char *)&cr.m_pqr_maxValComTopicInDmoz - x;
m->m_type = TYPE_LONG;
m->m_def = "32";
m->m_group = 0;
m++;
m->m_title = "demotion for pages where dmoz category names "
"contain query terms or their synonyms";
m->m_desc = "Demotion factor for pages where dmoz category names "
"contain fewer query terms or their synonyms. "
"Pages will be penalized for each query term or synonym of "
"a query term less than the max value given below multiplied "
"by this factor, divided by the max value. "
"Generally, a page will not be demoted more than this value "
"as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.3. ";
m->m_cgi = "pqrdcndcqt";
m->m_off = (char *)&cr.m_pqr_demFactDmozCatNmNoQT - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages where dmoz category names "
"contain query terms or their synonyms";
m->m_desc = "Max number of query terms and their synonyms "
"in a page's dmoz category name. "
"Pages with a number of query terms or their synonyms in all "
"dmoz category names greater than or equal to this value "
"will not be demoted. ";
m->m_cgi = "pqrcndcqtm";
m->m_off = (char *)&cr.m_pqr_maxValDmozCatNmNoQT - x;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m++;
m->m_title = "demotion for pages where dmoz category names "
"contain gigabits";
m->m_desc = "Demotion factor for pages where dmoz category "
"names contain fewer gigabits. "
"Pages will be penalized by the number of gigabits in all "
"dmoz category names fewer than the max value given below "
"divided by the max value. "
"Generally, a page will not be demoted more than than this "
"factor as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.3. ";
m->m_cgi = "pqrdcndcgb";
m->m_off = (char *)&cr.m_pqr_demFactDmozCatNmNoGigabits - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "max value for pages where dmoz category names "
"contain gigabits";
m->m_desc = "Max number of pages where dmoz category names "
"contain a gigabit. "
"Pages with a number of gigabits in all dmoz category names "
"greater than or equal to this value will not be demoted. ";
m->m_cgi = "pqrdcndcgbm";
m->m_off = (char *)&cr.m_pqr_maxValDmozCatNmNoGigabits - x;
m->m_type = TYPE_LONG;
m->m_def = "16";
m->m_group = 0;
m++;
m->m_title = "demotion for pages based on datedb date";
m->m_desc = "Demotion factor for pages based on datedb date. "
"Pages will be penalized for being published earlier than the "
"max date given below. "
"The older the page, the more it will be penalized based on "
"the time difference between the page's date and the max date, "
"divided by the max date. "
"Generally, a page will not be demoted more than this "
"value as a percent. "
"0 means no demotion. "
"A safe range is between 0 and 0.4. ";
m->m_cgi = "pqrdate";
m->m_off = (char *)&cr.m_pqr_demFactDatedbDate - x;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "min value for demotion based on datedb date ";
m->m_desc = "Pages with a publish date equal to or earlier than "
"this date will be demoted to the max (the factor above as "
"a percent). "
"Use this parm in conjunction with the max value below "
"to specify the range of dates where demotion occurs. "
"If you set this parm near the estimated earliest publish "
"date that occurs somewhat frequently, this method can better "
"control the additional demotion per publish day. "
"This number is given as seconds since the epoch, January 1st, "
"1970 divided by 1000. "
"0 means use the epoch. ";
m->m_cgi = "pqrdatei";
m->m_off = (char *)&cr.m_pqr_minValDatedbDate - x;
m->m_type = TYPE_LONG;
m->m_def = "631177"; // Jan 01, 1990
m->m_group = 0;
m++;
m->m_title = "max value for demotion based on datedb date ";
m->m_desc = "Pages with a publish date greater than or equal to "
"this value divided by 1000 will not be demoted. "
"Use this parm in conjunction with the min value above "
"to specify the range of dates where demotion occurs. "
"This number is given as seconds before the current date "
"and time taken from the system clock divided by 1000. "
"0 means use the current time of the current day. ";
m->m_cgi = "pqrdatem";
m->m_off = (char *)&cr.m_pqr_maxValDatedbDate - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "demotion for pages based on proximity";
m->m_desc = "Demotion factor for proximity of query terms in "
"a document. The closer together terms occur in a "
"document, the higher it will score."
"0 means no demotion. ";
m->m_cgi = "pqrprox";
m->m_scgi = "pqrprox";
m->m_sparm = 1;
m->m_off = (char *)&cr.m_pqr_demFactProximity - x;
m->m_soff = (char *)&si.m_pqr_demFactProximity - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "demotion for pages based on query terms section";
m->m_desc = "Demotion factor for where the query terms occur "
"in the document. If the terms only occur in a menu, "
"a link, or a list, the document will be punished."
"0 means no demotion. ";
m->m_cgi = "pqrinsec";
m->m_scgi = "pqrinsec";
m->m_sparm = 1;
m->m_off = (char *)&cr.m_pqr_demFactInSection - x;
m->m_soff = (char *)&si.m_pqr_demFactInSection - y;
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "weight of indexed score on pqr";
m->m_desc = "The proportion that the original score affects "
"its rerank position. A factor of 1 will maintain "
"the original score, 0 will only use the indexed "
"score to break ties.";
m->m_cgi = "pqrorig";
m->m_scgi = "pqrorig";
m->m_sparm = 1;
m->m_off = (char *)&cr.m_pqr_demFactOrigScore - x;
m->m_soff = (char *)&si.m_pqr_demFactOrigScore - y;
m->m_type = TYPE_FLOAT;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "max value for demotion for pages based on proximity";
m->m_desc = "Max summary score where no more demotion occurs above. "
"Pages with a summary score greater than or equal to this "
"value will not be demoted. ";
m->m_cgi = "pqrproxm";
m->m_off = (char *)&cr.m_pqr_maxValProximity - x;
m->m_type = TYPE_LONG;
m->m_def = "100000";
m->m_group = 0;
m++;
m->m_title = "demotion for query being exclusivly in a subphrase";
m->m_desc = "Search result which contains the query terms only"
" as a subphrase of a larger phrase will have its score "
" reduced by this percent.";
m->m_cgi = "pqrspd";
m->m_off = (char *)&cr.m_pqr_demFactSubPhrase - x;
m->m_soff = (char *)&si.m_pqr_demFactSubPhrase - y;
m->m_sparm = 1;
m->m_scgi = "pqrspd";
m->m_type = TYPE_FLOAT;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "demotion based on common inlinks";
m->m_desc = "Based on the number of inlinks a search results has "
"which are in common with another search result.";
m->m_cgi = "pqrcid";
m->m_off = (char *)&cr.m_pqr_demFactCommonInlinks - x;
m->m_soff = (char *)&si.m_pqr_demFactCommonInlinks - y;
m->m_sparm = 1;
m->m_scgi = "pqrcid";
m->m_type = TYPE_FLOAT;
m->m_def = ".5";
m->m_group = 0;
m++;
m->m_title = "number of document calls multiplier";
m->m_desc = "Allows more results to be gathered in the case of "
"an index having a high rate of duplicate results. Generally"
" expressed as 1.2";
m->m_cgi = "ndm";
m->m_off = (char *)&cr.m_numDocsMultiplier - x;
m->m_type = TYPE_FLOAT;
m->m_def = "1.2";
m->m_group = 0;
m++;
/*
m->m_title = "max documents to compute per host";
m->m_desc = "Limit number of documents to search that do not provide"
" the required results.";
m->m_cgi = "mdi";
m->m_off = (char *)&cr.m_maxDocIdsToCompute - x;
m->m_type = TYPE_LONG;
m->m_def = "1000";
m->m_group = 0;
m++;
*/
m->m_title = "max real time inlinks";
m->m_desc = "Limit number of linksdb inlinks requested per result.";
m->m_cgi = "mrti";
m->m_off = (char *)&cr.m_maxRealTimeInlinks - x;
m->m_soff = (char *)&si.m_maxRealTimeInlinks - y;
m->m_type = TYPE_LONG;
m->m_def = "10000";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "mrti";
m->m_smin = 0;
m->m_smax = 100000;
m++;
m->m_title = "percent similar dedup summary";
m->m_desc = "If document summary is this percent similar "
"to a document summary above it, then remove it from the search "
"results. 100 means only to remove if exactly the same. 0 means"
" no summary deduping.";
m->m_cgi = "psds";
m->m_off = (char *)&cr.m_percentSimilarSummary - x;
m->m_soff = (char *)&si.m_percentSimilarSummary - y;
m->m_type = TYPE_LONG;
m->m_def = "90";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "pss";
m->m_smin = 0;
m->m_smax = 100;
m++;
m->m_title = "number of lines to use in summary to dedup";
m->m_desc = "Sets the number of lines to generate for summary deduping."
" This is to help the deduping process not thorw out valid "
"summaries when normally displayed summaries are smaller values."
" Requires percent similar dedup summary to be enabled.";
m->m_cgi = "msld";
m->m_off = (char *)&cr.m_summDedupNumLines - x;
m->m_type = TYPE_LONG;
m->m_def = "4";
m->m_group = 0;
m++;
m->m_title = "bytes of doc to scan for summary generation";
m->m_desc = "Truncating this will miss out on good summaries, but "
"performance will increase.";
m->m_cgi = "clmfs";
m->m_off = (char *)&cr.m_contentLenMaxForSummary - x;
m->m_type = TYPE_LONG;
m->m_def = "70000";
m->m_group = 0;
m++;
m->m_title = "percent topic similar default";
m->m_desc = "Like above, but used for deciding when to cluster "
"results by topic for the news collection.";
m->m_cgi = "ptcd";
m->m_off = (char *)&cr.m_topicSimilarCutoffDefault - x;
m->m_type = TYPE_LONG;
m->m_def = "50";
m->m_group = 0;
m++;
//m->m_title = "max query terms";
//m->m_desc = "Do not allow more than this many query terms. Will "
// "return error in XML feed error tag if breeched.";
//m->m_cgi = "mqt";
//m->m_off = (char *)&cr.m_maxQueryTerms - x;
//m->m_soff = (char *)&si.m_maxQueryTerms - y;
//m->m_type = TYPE_LONG;
//m->m_def = "20"; // 20 for testing, normally 16
//m->m_sparm = 1;
//m->m_spriv = 1;
//m++;
/*
m->m_title = "dictionary site";
m->m_desc = "Where do we send requests for definitions of search "
"terms. Set to the empty string to turn this feature off.";
m->m_cgi = "dictionarySite";
m->m_off = (char *)&cr.m_dictionarySite - x;
m->m_type = TYPE_STRING;
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE;
m->m_def = "http://www.answers.com/";
m++;
*/
m->m_title = "ip restriction for topics";
m->m_desc = "Should Gigablast only get one document per IP domain "
"and per domain for topic generation?";
m->m_cgi = "ipr";
m->m_off = (char *)&cr.m_ipRestrict - x;
m->m_soff = (char *)&si.m_ipRestrictForTopics - y;
m->m_type = TYPE_BOOL;
// default to 0 since newspaperarchive only has docs from same IP dom
m->m_def = "0";
m->m_sparm = 1;
m++;
m->m_title = "remove overlapping topics";
m->m_desc = "Should Gigablast remove overlapping topics?";
m->m_cgi = "rot";
m->m_off = (char *)&cr.m_topicRemoveOverlaps - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "docs to scan for topics";
m->m_desc = "How many search results should we "
"scan for related topics per query?";
m->m_cgi = "dsrt";
m->m_off = (char *)&cr.m_docsToScanForTopics - x;
m->m_soff = (char *)&si.m_docsToScanForTopics - y;
m->m_type = TYPE_LONG;
m->m_def = "300";
m->m_group = 0;
m->m_sparm = 1;
m++;
m->m_title = "number of related topics";
m->m_desc = "What is the number of "
"related topics displayed per query? Set to 0 to save "
"CPU time.";
m->m_cgi = "nrt";
m->m_off = (char *)&cr.m_numTopics - x;
m->m_soff = (char *)&si.m_numTopicsToDisplay - y;
m->m_type = TYPE_LONG;
m->m_def = "11";
m->m_group = 0;
m->m_sparm = 1;
m->m_sprpg = 0; // do not propagate
m->m_sprpp = 0; // do not propagate
m++;
m->m_title = "min topics score";
m->m_desc = "Related topics with scores below this "
"will be excluded. Scores range from 0% to over 100%.";
m->m_cgi = "mts";
m->m_off = (char *)&cr.m_minTopicScore - x;
m->m_soff = (char *)&si.m_minTopicScore - y;
m->m_type = TYPE_LONG;
m->m_def = "5";
m->m_group = 0;
m->m_sparm = 1;
m++;
m->m_title = "min topic doc count";
m->m_desc = "How many documents must contain the topic for it to "
"be displayed.";
m->m_cgi = "mdc";
m->m_off = (char *)&cr.m_minDocCount - x;
m->m_soff = (char *)&si.m_minDocCount - y;
m->m_type = TYPE_LONG;
m->m_def = "2";
m->m_group = 0;
m->m_sparm = 1;
m++;
m->m_title = "dedup doc percent for topics";
m->m_desc = "If a document is this percent similar to another "
"document with a higher score, then it will not contribute "
"to the topic generation.";
m->m_cgi = "dsp";
m->m_off = (char *)&cr.m_dedupSamplePercent - x;
m->m_soff = (char *)&si.m_dedupSamplePercent - y;
m->m_type = TYPE_LONG;
m->m_def = "80";
m->m_group = 0;
m->m_sparm = 1;
m++;
m->m_title = "max words per topic";
m->m_desc = "Maximum number of words a topic can have. Affects "
"raw feeds, too.";
m->m_cgi = "mwpt";
m->m_off = (char *)&cr.m_maxWordsPerTopic - x;
m->m_soff = (char *)&si.m_maxWordsPerTopic - y;
m->m_type = TYPE_LONG;
m->m_def = "6";
m->m_group = 0;
m->m_sparm = 1;
m++;
m->m_title = "topic max sample size";
m->m_desc = "Max chars to sample from each doc for topics.";
m->m_cgi = "tmss";
m->m_off = (char *)&cr.m_topicSampleSize - x;
m->m_type = TYPE_LONG;
m->m_def = "4096";
m->m_group = 0;
m++;
m->m_title = "topic max punct len";
m->m_desc = "Max sequential punct chars allowed in a topic. "
"Set to 1 for speed, 5 or more for best topics but twice as "
"slow.";
m->m_cgi = "tmpl";
m->m_off = (char *)&cr.m_topicMaxPunctLen - x;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "do spell checking";
m->m_desc = "If enabled while using the XML feed, "
"when Gigablast finds a spelling recommendation it will be "
"included in the XML tag. Default is 0 if using an "
"XML feed, 1 otherwise.";
m->m_cgi = "spell";
m->m_off = (char *)&cr.m_spellCheck - x;
m->m_soff = (char *)&si.m_spellCheck - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_sparm = 1;
m->m_group = 0;
m++;
/*
m->m_title = "allow links: searches";
m->m_desc = "Allows anyone access to perform links: searches on this "
"collection.";
m->m_cgi = "als";
m->m_off = (char *)&cr.m_allowLinksSearch - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
// REFERENCE PAGES CONTROLS
m->m_title = "number of reference pages to generate";
m->m_desc = "What is the number of "
"reference pages to generate per query? Set to 0 to save "
"CPU time.";
m->m_cgi = "nrp";
m->m_off = (char *)&cr.m_refs_numToGenerate - x;
m->m_soff = (char *)&si.m_refs_numToGenerate - y;
m->m_smaxc = (char *)&cr.m_refs_numToGenerateCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_priv = 0;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "number of reference pages to display";
m->m_desc = "What is the number of "
"reference pages to display per query?";
m->m_cgi = "nrpdd";
m->m_off = (char *)&cr.m_refs_numToDisplay - x;
m->m_soff = (char *)&si.m_refs_numToDisplay - y;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 0; // allow the (more) link
m->m_sparm = 1;
m->m_sprpg = 0; // do not propagate
m->m_sprpp = 0; // do not propagate
m++;
m->m_title = "docs to scan for reference pages";
m->m_desc = "How many search results should we "
"scan for reference pages per query?";
m->m_cgi = "dsrp";
m->m_off = (char *)&cr.m_refs_docsToScan - x;
m->m_soff = (char *)&si.m_refs_docsToScan - y;
m->m_smaxc = (char *)&cr.m_refs_docsToScanCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "30";
m->m_group = 0;
m->m_priv = 0;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "min references quality";
m->m_desc = "References with page quality below this "
"will be excluded. (set to 101 to disable references while "
"still generating related pages.";
m->m_cgi = "mrpq";
m->m_off = (char *)&cr.m_refs_minQuality - x;
m->m_soff = (char *)&si.m_refs_minQuality - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "min links per references";
m->m_desc = "References need this many links to results to "
"be included.";
m->m_cgi = "mlpr";
m->m_off = (char *)&cr.m_refs_minLinksPerReference - x;
m->m_soff = (char *)&si.m_refs_minLinksPerReference - y;
m->m_type = TYPE_LONG;
m->m_def = "2";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "max linkers to consider for references per page";
m->m_desc = "Stop processing referencing pages after hitting this "
"limit.";
m->m_cgi = "mrpl";
m->m_off = (char *)&cr.m_refs_maxLinkers - x;
m->m_soff = (char *)&si.m_refs_maxLinkers - y;
m->m_smaxc = (char *)&cr.m_refs_maxLinkersCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "500";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "page fetch multiplier for references";
m->m_desc = "Use this multiplier to fetch more than the required "
"number of reference pages. fetches N * (this parm) "
"references and displays the top scoring N.";
m->m_cgi = "ptrfr";
m->m_off = (char *)&cr.m_refs_additionalTRFetch - x;
m->m_soff = (char *)&si.m_refs_additionalTRFetch - y;
m->m_smaxc = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
m->m_type = TYPE_FLOAT;
m->m_def = "1.5";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "number of links coefficient";
m->m_desc = "A in A * numLinks + B * quality + C * "
"numLinks/totalLinks.";
m->m_cgi = "nlc";
m->m_off = (char *)&cr.m_refs_numLinksCoefficient - x;
m->m_soff = (char *)&si.m_refs_numLinksCoefficient - y;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "quality coefficient";
m->m_desc = "B in A * numLinks + B * quality + C * "
"numLinks/totalLinks.";
m->m_cgi = "qc";
m->m_off = (char *)&cr.m_refs_qualityCoefficient - x;
m->m_soff = (char *)&si.m_refs_qualityCoefficient - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "link density coefficient";
m->m_desc = "C in A * numLinks + B * quality + C * "
"numLinks/totalLinks.";
m->m_cgi = "ldc";
m->m_off = (char *)&cr.m_refs_linkDensityCoefficient - x;
m->m_soff = (char *)&si.m_refs_linkDensityCoefficient - y;
m->m_type = TYPE_LONG;
m->m_def = "1000";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "add or multipy quality times link density";
m->m_desc = "[+|*] in A * numLinks + B * quality [+|*]"
" C * numLinks/totalLinks.";
m->m_cgi = "mrs";
m->m_off = (char *)&cr.m_refs_multiplyRefScore - x;
m->m_soff = (char *)&si.m_refs_multiplyRefScore - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
// reference pages ceiling parameters
m->m_title = "maximum allowed value for "
"numReferences parameter";
m->m_desc = "maximum allowed value for "
"numReferences parameter";
m->m_cgi = "nrpc";
m->m_off = (char *)&cr.m_refs_numToGenerateCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "maximum allowed value for "
"docsToScanForReferences parameter";
m->m_desc = "maximum allowed value for "
"docsToScanForReferences parameter";
m->m_cgi = "dsrpc";
m->m_off = (char *)&cr.m_refs_docsToScanCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "maximum allowed value for "
"maxLinkers parameter";
m->m_desc = "maximum allowed value for "
"maxLinkers parameter";
m->m_cgi = "mrplc";
m->m_off = (char *)&cr.m_refs_maxLinkersCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "5000";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "maximum allowed value for "
"additionalTRFetch";
m->m_desc = "maximum allowed value for "
"additionalTRFetch parameter";
m->m_cgi = "ptrfrc";
m->m_off = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
m->m_type = TYPE_FLOAT;
m->m_def = "10";
m->m_group = 0;
m->m_priv = 2;
m++;
// related pages parameters
m->m_title = "number of related pages to generate";
m->m_desc = "number of related pages to generate.";
m->m_cgi = "nrpg";
m->m_off = (char *)&cr.m_rp_numToGenerate - x;
m->m_soff = (char *)&si.m_rp_numToGenerate - y;
m->m_smaxc = (char *)&cr.m_rp_numToGenerateCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_priv = 0;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "number of related pages to display";
m->m_desc = "number of related pages to display.";
m->m_cgi = "nrpd";
m->m_off = (char *)&cr.m_rp_numToDisplay - x;
m->m_soff = (char *)&si.m_rp_numToDisplay - y;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 0; // allow the (more) link
m->m_sparm = 1;
m->m_sprpg = 0; // do not propagate
m->m_sprpp = 0; // do not propagate
m++;
m->m_title = "number of links to scan for related pages";
m->m_desc = "number of links per reference page to scan for related "
"pages.";
m->m_cgi = "nlpd";
m->m_off = (char *)&cr.m_rp_numLinksPerDoc - x;
m->m_soff = (char *)&si.m_rp_numLinksPerDoc - y;
m->m_smaxc = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "1024";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "min related page quality";
m->m_desc = "related pages with a quality lower than this will be "
"ignored.";
m->m_cgi = "merpq";
m->m_off = (char *)&cr.m_rp_minQuality - x;
m->m_soff = (char *)&si.m_rp_minQuality - y;
m->m_type = TYPE_LONG;
m->m_def = "30";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "min related page score";
m->m_desc = "related pages with an adjusted score lower than this "
"will be ignored.";
m->m_cgi = "merps";
m->m_off = (char *)&cr.m_rp_minScore - x;
m->m_soff = (char *)&si.m_rp_minScore - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "min related page links";
m->m_desc = "related pages with less than this number of links"
" will be ignored.";
m->m_cgi = "merpl";
m->m_off = (char *)&cr.m_rp_minLinks - x;
m->m_soff = (char *)&si.m_rp_minLinks - y;
m->m_type = TYPE_LONG;
m->m_def = "2";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "coefficient for number of links in related pages score "
"calculation";
m->m_desc = "A in A * numLinks + B * avgLnkrQlty + C * PgQlty"
" + D * numSRPLinks.";
m->m_cgi = "nrplc";
m->m_off = (char *)&cr.m_rp_numLinksCoeff - x;
m->m_soff = (char *)&si.m_rp_numLinksCoeff - y;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "coefficient for average linker quality in related pages "
"score calculation";
m->m_desc = "B in A * numLinks + B * avgLnkrQlty + C * PgQlty"
" + D * numSRPLinks.";
m->m_cgi = "arplqc";
m->m_off = (char *)&cr.m_rp_avgLnkrQualCoeff - x;
m->m_soff = (char *)&si.m_rp_avgLnkrQualCoeff - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "coefficient for page quality in related pages "
"score calculation";
m->m_desc = "C in A * numLinks + B * avgLnkrQlty + C * PgQlty"
" + D * numSRPLinks";
m->m_cgi = "qrpc";
m->m_off = (char *)&cr.m_rp_qualCoeff - x;
m->m_soff = (char *)&si.m_rp_qualCoeff - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "coefficient for search result links in related pages "
"score calculation";
m->m_desc = "D in A * numLinks + B * avgLnkrQlty + C * PgQlty"
" + D * numSRPLinks.";
m->m_cgi = "srprpc";
m->m_off = (char *)&cr.m_rp_srpLinkCoeff - x;
m->m_soff = (char *)&si.m_rp_srpLinkCoeff - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "number of related page summary excerpts";
m->m_desc = "What is the maximum number of "
"excerpts displayed in the summary of a related page?";
m->m_cgi = "nrps";
m->m_off = (char *)&cr.m_rp_numSummaryLines - x;
m->m_soff = (char *)&si.m_rp_numSummaryLines - y;
m->m_smaxc = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m->m_smin = 0;
m++;
m->m_title = "highlight query terms in related pages summary";
m->m_desc = "Highlight query terms in related pages summary.";
m->m_cgi = "hqtirps";
m->m_off = (char *)&cr.m_rp_doRelatedPageSumHighlight - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "number of characters to display in title before "
"truncating";
m->m_desc = "Truncates a related page title after this many "
"charaters and adds ...";
m->m_cgi = "ttl";
m->m_off = (char *)&cr.m_rp_titleTruncateLimit - x;
m->m_soff = (char *)&si.m_rp_titleTruncateLimit - y;
m->m_type = TYPE_LONG;
m->m_def = "50";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "use results pages as references";
m->m_desc = "Use the search results' links in order to generate "
"related pages.";
m->m_cgi = "urar";
m->m_off = (char *)&cr.m_rp_useResultsAsReferences - x;
m->m_soff = (char *)&si.m_rp_useResultsAsReferences - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "get related pages from other cluster";
m->m_desc = "Say yes here to make Gigablast check another Gigablast "
"cluster for title rec for related pages. Gigablast will "
"use the hosts2.conf file in the working directory to "
"tell it what hosts belong to the other cluster.";
m->m_cgi = "erp"; // external related pages
m->m_off = (char *)&cr.m_rp_getExternalPages - x;
m->m_soff = (char *)&si.m_rp_getExternalPages - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "collection for other related pages cluster";
m->m_desc = "Gigablast will fetch the related pages title record "
"from this collection in the other cluster.";
m->m_cgi = "erpc"; // external related pages collection
m->m_off = (char *)&cr.m_rp_externalColl - x;
m->m_soff = (char *)&si.m_rp_externalColl - y;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN;
m->m_def = "main";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
// relate pages ceiling parameters
m->m_title = "maximum allowed value for numToGenerate parameter";
m->m_desc = "maximum allowed value for numToGenerate parameter";
m->m_cgi = "nrpgc";
m->m_off = (char *)&cr.m_rp_numToGenerateCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "maximum allowed value for numRPLinksPerDoc parameter";
m->m_desc = "maximum allowed value for numRPLinksPerDoc parameter";
m->m_cgi = "nlpdc";
m->m_off = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "5000";
m->m_group = 0;
m->m_priv = 2;
m++;
m->m_title = "maximum allowed value for numSummaryLines parameter";
m->m_desc = "maximum allowed value for numSummaryLines parameter";
m->m_cgi = "nrpsc";
m->m_off = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m->m_priv = 2;
m++;
// import search results controls
m->m_title = "how many imported results should we insert";
m->m_desc = "Gigablast will import X search results from the "
"external cluster given by hosts2.conf and merge those "
"search results into the current set of search results. "
"Set to 0 to disable.";
m->m_cgi = "imp";
m->m_off = (char *)&cr.m_numResultsToImport - x;
m->m_soff = (char *)&si.m_numResultsToImport - y;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "imported score weight";
m->m_desc = "The score of all imported results will be multiplied "
"by this number. Since results are mostly imported from "
"a large collection they will usually have higher scores "
"because of having more link texts or whatever, so tone it "
"down a bit to put it on par with the integrating collection.";
m->m_cgi = "impw";
m->m_off = (char *)&cr.m_importWeight - x;
m->m_soff = (char *)&si.m_importWeight - y;
m->m_type = TYPE_FLOAT;
m->m_def = ".80";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "how many linkers must each imported result have";
m->m_desc = "The urls of imported search results must be linked to "
"by at least this many documents in the primary collection.";
m->m_cgi = "impl";
m->m_off = (char *)&cr.m_minLinkersPerImportedResult - x;
m->m_soff = (char *)&si.m_minLinkersPerImportedResult - y;
m->m_type = TYPE_LONG;
m->m_def = "3";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "num linkers weight";
m->m_desc = "The number of linkers an imported result has from "
"the base collection is multiplied by this weight and then "
"added to the final score. The higher this is the more an "
"imported result with a lot of linkers will be boosted. "
"Currently, 100 is the max number of linkers permitted.";
m->m_cgi = "impnlw";
m->m_off = (char *)&cr.m_numLinkerWeight - x;
m->m_soff = (char *)&si.m_numLinkerWeight - y;
m->m_type = TYPE_LONG;
m->m_def = "50";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
m->m_title = "the name of the collection to import from";
m->m_desc = "Gigablast will import X search results from this "
"external collection and merge them into the current search "
"results.";
m->m_cgi = "impc";
m->m_off = (char *)&cr.m_importColl - x;
m->m_soff = (char *)&si.m_importColl - y;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN;
m->m_def = "main";
m->m_group = 0;
m->m_priv = 2;
m->m_sparm = 1;
m++;
// more general parameters
m->m_title = "max search results";
m->m_desc = "What is the limit to the total number "
"of returned search results.";
m->m_cgi = "msr";
m->m_off = (char *)&cr.m_maxSearchResults - x;
m->m_type = TYPE_LONG;
m->m_def = "1000";
m++;
m->m_title = "max search results per query";
m->m_desc = "What is the limit to the total number "
"of returned search results per query?";
m->m_cgi = "msrpq";
m->m_off = (char *)&cr.m_maxSearchResultsPerQuery - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m++;
m->m_title = "max search results for paying clients";
m->m_desc = "What is the limit to the total number "
"of returned search results for clients.";
m->m_cgi = "msrfpc";
m->m_off = (char *)&cr.m_maxSearchResultsForClients - x;
m->m_type = TYPE_LONG;
m->m_def = "1000";
m++;
m->m_title = "max search results per query for paying clients";
m->m_desc = "What is the limit to the total number "
"of returned search results per query for paying clients? "
"Auto ban must be enabled for this to work.";
m->m_cgi = "msrpqfc";
m->m_off = (char *)&cr.m_maxSearchResultsPerQueryForClients - x;
m->m_type = TYPE_LONG;
m->m_def = "1000";
m->m_group = 0;
m++;
m->m_title = "max similar results for cluster by topic";
m->m_desc = "Max similar results to show when clustering by topic.";
m->m_cgi = "ncbt";
m->m_off = (char *)&cr.m_maxClusterByTopicResults - x;
m->m_type = TYPE_LONG;
m->m_def = "10";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "ncbt";
m->m_soff = (char *)&si.m_maxClusterByTopicResults - y;
m++;
m->m_title = "number of extra results to get for cluster by topic";
m->m_desc = "number of extra results to get for cluster by topic";
m->m_cgi = "ntwo";
m->m_off = (char *)&cr.m_numExtraClusterByTopicResults - x;
m->m_type = TYPE_LONG;
m->m_def = "100";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "ntwo";
m->m_soff = (char *)&si.m_numExtraClusterByTopicResults - y;
m++;
m->m_title = "max title len";
m->m_desc = "What is the maximum number of "
"characters allowed in titles displayed in the search "
"results?";
m->m_cgi = "tml";
m->m_off = (char *)&cr.m_titleMaxLen - x;
m->m_type = TYPE_LONG;
m->m_def = "80";
m++;
m->m_title = "Minimum number of in linkers required to consider getting"
" the title from in linkers";
m->m_desc = "Minimum number of in linkers required to consider getting"
" the title from in linkers";
m->m_cgi = "mininlinkers";
m->m_off = (char *)&cr.m_minTitleInLinkers - x;
m->m_type = TYPE_LONG;
m->m_def = "10";
m++;
m->m_title = "Max number of in linkers to consider";
m->m_desc = "Max number of in linkers to consider for getting in "
"linkers titles.";
m->m_cgi = "maxinlinkers";
m->m_off = (char *)&cr.m_maxTitleInLinkers - x;
m->m_type = TYPE_LONG;
m->m_def = "128";
m++;
/*
m->m_title = "use new summary generator";
m->m_desc = "Also used for gigabits and titles.";
m->m_cgi = "uns"; // external related pages
m->m_off = (char *)&cr.m_useNewSummaries - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_sparm = 1;
m->m_scgi = "uns";
m->m_soff = (char *)&si.m_useNewSummaries - y;
m++;
*/
m->m_title = "get docid scoring info";
m->m_desc = "Get docid scoring info?";
m->m_cgi = "scores"; // dedupResultsByDefault";
m->m_off = (char *)&cr.m_getDocIdScoringInfo - x;
m->m_soff = (char *)&si.m_getDocIdScoringInfo - y;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "scores";
m++;
m->m_title = "summary mode";
m->m_desc = "0 = old compatibility mode, 1 = UTF-8 mode, "
"2 = fast ASCII mode, "
"3 = Ascii Proximity Summary, "
"4 = Utf8 Proximity Summary, "
"5 = Ascii Pre Proximity Summary, "
"6 = Utf8 Pre Proximity Summary:";
m->m_cgi = "smd";
m->m_off = (char *)&cr.m_summaryMode - x;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_sparm = 1;
m->m_scgi = "smd";
m->m_soff = (char*) &si.m_summaryMode - y;
m++;
m->m_title = "max summary len";
m->m_desc = "What is the maximum number of "
"characters displayed in a summary for a search result?";
m->m_cgi = "sml";
m->m_off = (char *)&cr.m_summaryMaxLen - x;
m->m_type = TYPE_LONG;
m->m_def = "512";
m->m_group = 0;
m++;
m->m_title = "max summary excerpts";
m->m_desc = "What is the maximum number of "
"excerpts displayed in the summary of a search result?";
m->m_cgi = "smnl";
m->m_off = (char *)&cr.m_summaryMaxNumLines - x;
m->m_type = TYPE_LONG;
m->m_def = "4";
m->m_group = 0;
m++;
m->m_title = "max summary excerpt length";
m->m_desc = "What is the maximum number of "
"characters allowed per summary excerpt?";
m->m_cgi = "smxcpl";
m->m_off = (char *)&cr.m_summaryMaxNumCharsPerLine - x;
m->m_type = TYPE_LONG;
m->m_def = "90";
m->m_group = 0;
m++;
m->m_title = "default number of summary excerpts";
m->m_desc = "What is the default number of "
"summary excerpts displayed per search result?";
m->m_cgi = "sdnl";
m->m_off = (char *)&cr.m_summaryDefaultNumLines - x;
m->m_type = TYPE_LONG;
m->m_def = "3";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "ns";
m->m_soff = (char *)&si.m_numLinesInSummary - y;
m++;
m->m_title = "max summary line width";
m->m_desc = " tags are inserted to keep the number "
"of chars in the summary per line at or below this width. "
"Strings without spaces that exceed this "
"width are not split.";
m->m_cgi = "smw";
m->m_off = (char *)&cr.m_summaryMaxWidth - x;
m->m_type = TYPE_LONG;
m->m_def = "80";
m->m_group = 0;
m->m_sparm = 1;
m->m_scgi = "sw";
m->m_soff = (char *)&si.m_summaryMaxWidth - y;
m++;
m->m_title = "Prox summary carver radius";
m->m_desc = "Maximum number of characters to allow in between "
"search terms.";
m->m_cgi = "pscr";
m->m_off = (char *)&cr.m_proxCarveRadius - x;
m->m_type = TYPE_LONG;
m->m_def = "256";
m->m_group = 0;
m++;
m->m_title = "front highlight tag";
m->m_desc = "Front html tag used for highlightig query terms in the "
"summaries displated in the search results.";
m->m_cgi = "sfht";
m->m_off = (char *)cr.m_summaryFrontHighlightTag - x;
m->m_type = TYPE_STRING;
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE ;
m->m_def = "";
m->m_group = 0;
m++;
m->m_title = "back highlight tag";
m->m_desc = "Front html tag used for highlightig query terms in the "
"summaries displated in the search results.";
m->m_cgi = "sbht";
m->m_off = (char *)cr.m_summaryBackHighlightTag - x;
m->m_type = TYPE_STRING;
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE ;
m->m_def = " ";
m->m_group = 0;
m++;
/*
m->m_title = "enable page turk";
m->m_desc = "If enabled, search results shall feed the page turk "
"is used to mechanically rank websites.";
m->m_cgi = "ept";
m->m_def = "0";
m->m_off = (char *)&cr.m_pageTurkEnabled - x;
m->m_type = TYPE_BOOL;
m++;
*/
m->m_title = "do query expansion";
m->m_desc = "Query expansion will include word stems and synonyms in "
"its search results.";
m->m_def = "1";
m->m_off = (char *)&cr.m_queryExpansion - x;
m->m_soff = (char *)&si.m_queryExpansion - y;
m->m_type = TYPE_BOOL;
m->m_sparm = 1;
m->m_cgi = "qe";
m->m_scgi = "qe";
m++;
m->m_title = "consider titles from body";
m->m_desc = "Can Gigablast make titles from the document content? "
"Used mostly for the news collection where the title tags "
"are not very reliable.";
m->m_cgi = "gtfb";
m->m_off = (char *)&cr.m_considerTitlesFromBody - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m->m_sparm = 1;
m->m_soff = (char *)&si.m_considerTitlesFromBody - y;
m++;
m->m_title = "display dmoz categories in results";
m->m_desc = "If enabled, results in dmoz will display their "
"categories on the results page.";
m->m_cgi = "ddc";
m->m_off = (char *)&cr.m_displayDmozCategories - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "display indirect dmoz categories in results";
m->m_desc = "If enabled, results in dmoz will display their "
"indirect categories on the results page.";
m->m_cgi = "didc";
m->m_off = (char *)&cr.m_displayIndirectDmozCategories - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "display Search Category link to query category of result";
m->m_desc = "If enabled, a link will appear next to each category "
"on each result allowing the user to perform their query "
"on that entire category.";
m->m_cgi = "dscl";
m->m_off = (char *)&cr.m_displaySearchCategoryLink - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
m->m_title = "use dmoz for untitled";
m->m_desc = "Yes to use DMOZ given title when a page is untitled but "
"is in DMOZ.";
m->m_cgi = "udfu";
m->m_off = (char *)&cr.m_useDmozForUntitled - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "show dmoz summaries";
m->m_desc = "Yes to always show DMOZ summaries with search results "
"that are in DMOZ.";
m->m_cgi = "udsm";
m->m_off = (char *)&cr.m_showDmozSummary - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "show adult category on top";
m->m_desc = "Yes to display the Adult category in the Top category";
m->m_cgi = "sacot";
m->m_off = (char *)&cr.m_showAdultCategoryOnTop - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 0;
m++;
/*
m->m_title = "show sensitive info in xml feed";
m->m_desc = "If enabled, we show certain tagb tags for each "
"search result, allow &inlinks=1 cgi parms, show "
", etc. in the xml feed. Created for buzzlogic.";
m->m_cgi = "sss";
m->m_off = (char *)&cr.m_showSensitiveStuff - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
*/
m->m_title = "display indexed date";
m->m_desc = "Display the indexed date along with results.";
m->m_cgi = "didt";
m->m_off = (char *)&cr.m_displayIndexedDate - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m++;
m->m_title = "display last modified date";
m->m_desc = "Display the last modified date along with results.";
m->m_cgi = "dlmdt";
m->m_off = (char *)&cr.m_displayLastModDate - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_group = 0;
m++;
m->m_title = "display published date";
m->m_desc = "Display the published (datedb) date along with results.";
m->m_cgi = "dipt";
m->m_off = (char *)&cr.m_displayPublishDate - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "enable click 'n' scroll";
m->m_desc = "The [cached] link on results pages loads click n scroll.";
m->m_cgi = "ecns";
m->m_off = (char *)&cr.m_clickNScrollEnabled - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "use data feed account server";
m->m_desc = "Enable/disable the use of a remote account verification "
"for Data Feed Customers.";
m->m_cgi = "dfuas";
m->m_off = (char *)&cr.m_useDFAcctServer - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m++;
m->m_title = "data feed server ip";
m->m_desc = "The ip address of the Gigablast data feed server to "
"retrieve customer account information from.";
m->m_cgi = "dfip";
m->m_off = (char *)&cr.m_dfAcctIp - x;
m->m_type = TYPE_IP;
m->m_def = "2130706433";
m->m_group = 0;
m++;
m->m_title = "data feed server port";
m->m_desc = "The port of the Gigablast data feed server to retrieve "
"customer account information from.";
m->m_cgi = "dfport";
m->m_off = (char *)&cr.m_dfAcctPort - x;
m->m_type = TYPE_LONG;
m->m_def = "8040";
m->m_group = 0;
m++;
/*
m->m_title = "data feed server collection";
m->m_desc = "The collection on the Gigablast data feed server to "
"retrieve customer account information from.";
m->m_cgi = "dfcoll";
m->m_off = (char *)&cr.m_dfAcctColl - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_COLL_LEN;
m->m_def = "customers";
m->m_group = 0;
m++;
*/
//
// not sure cols=x goes here or not
//
/*
m->m_title = "Number Of Columns(1-6)";
m->m_desc = "How many columns results should be shown in. (1-6)";
m->m_cgi = "cols";
m->m_smin = 1;
m->m_smax = 6;
m->m_off = (char *)&cr.m_numCols - x;
m->m_soff = (char *)&si.m_numCols - y;
m->m_type = TYPE_LONG;
m->m_def = "1";
m->m_group = 0;
m->m_sparm = 1;
m++;
*/
//
// Gets the screen width
//
/*
m->m_title = "Screen Width";
m->m_desc = "screen size of browser window";
m->m_cgi = "ws";
m->m_smin = 600;
m->m_off = (char *)&cr.m_screenWidth - x;
m->m_soff = (char *)&si.m_screenWidth - y;
m->m_type = TYPE_LONG;
m->m_def = "1100";
m->m_group = 0;
m->m_sparm = 1;
m++;
*/
/*
m->m_title = "collection hostname";
m->m_desc = "Hostname that will default to this collection. Blank"
" for none or default collection.";
m->m_cgi = "chstn";
m->m_off = (char *)cr.m_collectionHostname - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_def = "";
m++;
m->m_title = "collection hostname (1)";
m->m_desc = "Hostname that will default to this collection. Blank"
" for none or default collection.";
m->m_cgi = "chstna";
m->m_off = (char *)cr.m_collectionHostname1 - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_def = "";
m->m_group = 0;
m++;
m->m_title = "collection hostname (2)";
m->m_desc = "Hostname that will default to this collection. Blank"
" for none or default collection.";
m->m_cgi = "chstnb";
m->m_off = (char *)cr.m_collectionHostname2 - x;
m->m_type = TYPE_STRING;
m->m_size = MAX_URL_LEN;
m->m_def = "";
m->m_group = 0;
m++;
*/
/*
m->m_title = "html head";
m->m_desc = "Html to display before the search results. Convenient "
"for changing colors and displaying logos. Use the variable, "
"%q, to represent the query to display in a text box. "
"Use %e to display it in a url. "
"Use %e to print the page encoding.Use %D to print a drop down "
"menu for the number of search results to return. Use %S "
"to print sort by date or relevance link. Use %L to "
"display the logo. Use %R to display radio buttons for site "
"search. Use %F to begin the form. and use %H to insert "
"hidden text "
"boxes of parameters, both %F and %H are necessary. "
"Use %f to display "
"the family filter radio buttons. "
"Directory: Use %s to display the directory "
"search type options. Use %l to specify the location of "
"dir=rtl in the body tag for RTL pages. "
"Use %where and %when to substitute the where and when of "
"the query. These values may be set based on the cookie if "
"none was explicitly given. "
"IMPORTANT: In the xml configuration file, this html "
"must be encoded (less thans mapped to <, etc.).";
m->m_cgi = "hh";
m->m_off = (char *)cr.m_htmlHead - x;
m->m_plen = (char *)&cr.m_htmlHeadLen - x; // length of string
m->m_type = TYPE_STRINGBOX;
m->m_size = MAX_HTML_LEN + 1;
m->m_def =
"\n"
"\n"
"\n"
"Gigablast Search Results \n"
" \n"
"\n"
"\n"
"\n"
//"