diff --git a/Blaster.cpp b/Blaster.cpp
index 685362eb..b94fcdab 100644
--- a/Blaster.cpp
+++ b/Blaster.cpp
@@ -651,7 +651,10 @@ void Blaster::gotDoc2 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		      TITLEREC_CURRENT_VERSION ,
+		      true , // set parents
+		      0 , // niceness 
+		      CT_XML )){ // content type
 		log(LOG_WARN,"blaster: Couldn't set XML1 Class in gotDoc2");
 	}
 	Links links1;
@@ -679,7 +682,10 @@ void Blaster::gotDoc2 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		      TITLEREC_CURRENT_VERSION,
+		      true , // setparents
+		      0 , // niceness
+		      CT_XML )){
 		log(LOG_WARN,"blaster: Couldn't set XML2 Class in gotDoc2");
 	}
 	Links links2;
@@ -1170,7 +1176,10 @@ void Blaster::gotDoc4 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		     TITLEREC_CURRENT_VERSION,
+		     true, // setparents
+		     0, // niceness
+		     CT_XML )){
 		log(LOG_WARN,"blaster: Couldn't set XML Class in gotDoc4");
 	}
 	Links links;
diff --git a/Linkdb.cpp b/Linkdb.cpp
index 998f65c2..7d885384 100644
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@@ -3648,7 +3648,8 @@ bool Inlink::setXmlFromRSS ( Xml *xml , long niceness ) {
 			  true                     , // pure xml?
 			  TITLEREC_CURRENT_VERSION ,
 			  false                    , // no need to now
-			  niceness                 );
+			  niceness                 ,
+			  CT_XML );
 }
 
 // only Title.cpp uses this right now
diff --git a/PageGet.cpp b/PageGet.cpp
index b8cf5d01..0e9908a8 100644
--- a/PageGet.cpp
+++ b/PageGet.cpp
@@ -712,7 +712,7 @@ bool processLoop ( void *state ) {
 		//Words *ww = xd->getWords();
 		if ( ! xml.set ( content , contentLen , false ,
 				 0 , false , TITLEREC_CURRENT_VERSION ,
-				 false , 0 ) ) { // niceness is 0
+				 false , 0 , CT_HTML ) ) { // niceness is 0
 			//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
 			return sendErrorReply ( st , g_errno );
 		}			
diff --git a/Parms.cpp b/Parms.cpp
index d6d8724a..8b2d138b 100644
--- a/Parms.cpp
+++ b/Parms.cpp
@@ -4226,7 +4226,10 @@ bool Parms::setXmlFromFile(Xml *xml, char *filename, char *buf, long bufSize){
 			  false   , // ownData
 			  0       , // allocSize
 			  false   , // pureXml?
-			  0       );// version
+			  0       , // version
+			  true    , // setParents
+			  0       , // niceness
+			  CT_XML  );
 }
 
 #define MAX_CONF_SIZE 200000
diff --git a/Xml.cpp b/Xml.cpp
index f9153ccb..7f6f4920 100644
--- a/Xml.cpp
+++ b/Xml.cpp
@@ -196,6 +196,8 @@ void Xml::reset ( ) {
 	m_allocSize   = 0;
 }
 
+#include "HttpMime.h" // CT_JSON
+
 // "s" must be in utf8
 bool Xml::set ( char  *s             , 
 	        long   slen          , 
@@ -204,7 +206,8 @@ bool Xml::set ( char  *s             ,
 	        bool   pureXml       ,
 	        long   version       ,
 	        bool   setParentsArg ,
-	        long   niceness      ) {
+	        long   niceness      ,
+		char   contentType ) {
 
 	// just in case
 	reset();
@@ -234,6 +237,28 @@ bool Xml::set ( char  *s             ,
 		return false;
 	}
 
+	// if json go no further. TODO: also do this for CT_TEXT etc.
+	if ( contentType == CT_JSON ) {
+		m_numNodes = 0;
+		// make the array
+		m_maxNumNodes = 1;
+		m_nodes =(XmlNode *)mmalloc(sizeof(XmlNode)*m_maxNumNodes,"x");
+		if ( ! m_nodes ) return false;
+		XmlNode *xd = &m_nodes[m_numNodes];
+		// hack the node
+		xd->m_node       = s;
+		xd->m_nodeLen    = slen;
+		xd->m_isSelfLink = 0;
+		// . nodeId for text nodes is 0
+		xd->m_nodeId     = 0;
+		xd->m_hasBackTag = false;
+		xd->m_hash       = 0;
+		xd->m_pairTagNum = -1;
+		m_numNodes++;
+		return true;
+	}
+
+
 	QUICKPOLL((niceness));
 	long i;
 
diff --git a/Xml.h b/Xml.h
index 30abac2a..795b74a7 100644
--- a/Xml.h
+++ b/Xml.h
@@ -30,8 +30,10 @@ class Xml {
 		 bool ownData , long allocSize, //=0, 
 		 bool pureXml, // =false );
 		 long version ,
-		 bool setParents = true,
-		 long niceness = 0);
+		 bool setParents , // = true,
+		 long niceness , // = 0
+		 char contentType );
+
 
 
 	void  reset ( );
diff --git a/XmlDoc.cpp b/XmlDoc.cpp
index 041322f9..960fb52a 100644
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@@ -4727,7 +4727,8 @@ void XmlDoc::gotWikiResults ( UdpSlot *slot ) {
 			 false                    ,
 			 TITLEREC_CURRENT_VERSION ,
 			 false                    , // setParents?
-			 m_niceness               ))
+			 m_niceness               ,
+			 CT_HTML                  ))
 		// return if g_errno got set
 		return;
 
@@ -5206,6 +5207,10 @@ Xml *XmlDoc::getXml ( ) {
 	char **u8 = getUtf8Content();
 	if ( ! u8 || u8 == (char **)-1 ) return (Xml *)u8;
 	long u8len = size_utf8Content - 1;
+	
+	uint8_t *ct = getContentType();
+	if ( ! ct || ct == (void *)-1 ) return (Xml *)ct;
+
 	// note it
 	setStatus ( "getting xml");
 	// set it
@@ -5216,7 +5221,8 @@ Xml *XmlDoc::getXml ( ) {
 			   false      ,  // pure xml?
 			   m_version  ,
 			   false      ,  // setParentsArg? 
-			   m_niceness ) )
+			   m_niceness ,
+			   *ct ) )
 		// return NULL on error with g_errno set
 		return NULL;
 	// set just once
@@ -5813,13 +5819,12 @@ Sections *XmlDoc::getExplicitSections ( ) {
 	if ( m_explicitSectionsValid ) return &m_sections;
 
 	// if json forget this it is only html
-	uint8_t *ct = getContentType();
-	if ( ! ct || ct == (void *)-1 ) return (Sections *)ct;
-	if ( *ct != CT_HTML && *ct != CT_TEXT && *ct != CT_XML ) {
-		m_sectionsValid = true;
-		return &m_sections;
-	}
-
+	//uint8_t *ct = getContentType();
+	//if ( ! ct || ct == (void *)-1 ) return (Sections *)ct;
+	//if ( *ct != CT_HTML && *ct != CT_TEXT && *ct != CT_XML ) {
+	//	m_sectionsValid = true;
+	//	return &m_sections;
+	//}
 
 	setStatus ( "getting explicit sections" );
 	// use the old title rec to make sure we parse consistently!
@@ -5856,8 +5861,8 @@ Sections *XmlDoc::getExplicitSections ( ) {
 	long long *d = getDocId();
 	if ( ! d || d == (long long *)-1 ) return (Sections *)d;
 	// get the content type
-	//uint8_t *ct = getContentType();
-	//if ( ! ct ) return NULL;
+	uint8_t *ct = getContentType();
+	if ( ! ct ) return NULL;
 
 	CollectionRec *cr = getCollRec();
 	if ( ! cr ) return NULL;
@@ -5873,6 +5878,8 @@ Sections *XmlDoc::getExplicitSections ( ) {
 	// this uses the sectionsReply to see which sections are "text", etc.
 	// rather than compute it expensively
 	if ( ! m_calledSections &&
+	     // we get malformed sections error for some diffbot replies
+	     //*ct != CT_JSON &&
 	     ! m_sections.set ( &m_words      ,
 				&m_phrases    ,
 				bits          ,
@@ -5918,14 +5925,6 @@ Sections *XmlDoc::getExplicitSections ( ) {
 Sections *XmlDoc::getImpliedSections ( ) {
 	if ( m_impliedSectionsValid ) return &m_sections;
 
-	// if json forget this it is only html
-	uint8_t *ct = getContentType();
-	if ( ! ct || ct == (void *)-1 ) return (Sections *)ct;
-	if ( *ct != CT_HTML && *ct != CT_TEXT && *ct != CT_XML ) {
-		m_sectionsValid = true;
-		return &m_sections;
-	}
-
 	// get the sections without implied sections
 	Sections *sections = getExplicitSections();
 	if ( ! sections || sections==(void *)-1) return (Sections *)sections;
@@ -5944,8 +5943,8 @@ Sections *XmlDoc::getImpliedSections ( ) {
 	// bail on error
 	if ( ! bits ) return NULL;
 	// get the content type
-	//uint8_t *ct = getContentType();
-	//if ( ! ct ) return NULL;
+	uint8_t *ct = getContentType();
+	if ( ! ct ) return NULL;
 
 	if ( ! m_firstUrlValid ) { char *xx=NULL;*xx=0; }
 
@@ -6006,14 +6005,6 @@ Sections *XmlDoc::getImpliedSections ( ) {
 // add in Section::m_sentFlags bits having to do with our voting tables
 Sections *XmlDoc::getSections ( ) {
 
-	// if json forget this it is only html
-	uint8_t *ct = getContentType();
-	if ( ! ct || ct == (void *)-1 ) return (Sections *)ct;
-	if ( *ct != CT_HTML && *ct != CT_TEXT && *ct != CT_XML ) {
-		m_sectionsValid = true;
-		return &m_sections;
-	}
-
 	// get the sections without implied sections
 	Sections *ss = getImpliedSections();
 	if ( ! ss || ss==(void *)-1) return (Sections *)ss;
@@ -17865,6 +17856,7 @@ bool XmlDoc::logIt ( ) {
 	// coll
 	//
 	sb.safePrintf("coll=%s ",coll);
+	sb.safePrintf("collnum=%li ",(long)m_collnum);
 
 	//
 	// print ip
@@ -25077,6 +25069,9 @@ bool XmlDoc::hashRSSInfo ( HashTableX *tt ) {
 
 	setStatus ( "hashing rss info" );
 
+	uint8_t *ct = getContentType();
+	if ( ! ct || ct == (void *)-1 ) { char *xx=NULL;*xx=0; }
+
 	// . finally hash in the linkText terms from the LinkInfo
 	// . the LinkInfo class has all the terms of hashed anchor text for us
 	// . if we're using an old TitleRec linkTermList is just a ptr to
@@ -25190,7 +25185,10 @@ bool XmlDoc::hashRSSInfo ( HashTableX *tt ) {
 				  false            , // own data?
 				  0                , // allocSize
 				  false            , // pure xml?
-				  m_version ) )
+				  m_version ,
+				  true , // set parents?
+				  m_niceness ,
+				  *ct ) )
 			return false;
 		// set the words class from the xml, returns false and sets
 		// g_errno on error
diff --git a/fctypes.cpp b/fctypes.cpp
index 9aa88566..34ede819 100644
--- a/fctypes.cpp
+++ b/fctypes.cpp
@@ -2046,6 +2046,7 @@ char* getNextNum(char* input, char** numPtr) {
 	return nextspace;
 }
 
+#include "HttpMime.h" // CT_HTML
 
 // returns length of stripped content, but will set g_errno and return -1
 // on error
@@ -2066,7 +2067,7 @@ long stripHtml( char *content, long contentLen, long version, long strip ) {
 	// . parse as utf8 since all we are doing is messing with 
 	//   the tags...content manipulation comes later
 	if ( ! tmpXml.set ( content , contentLen,
-			    false, 0, false, version ) )
+			    false, 0, false, version , true , 0 , CT_HTML ) )
 		return -1;
 
 	//if( strip == 4 )
diff --git a/main.cpp b/main.cpp
index 346fe533..d12f71df 100644
--- a/main.cpp
+++ b/main.cpp
@@ -11464,7 +11464,10 @@ bool parseTest ( char *coll , long long docId , char *query ) {
 	t = gettimeofdayInMilliseconds();
 	for ( long i = 0 ; i < 100 ; i++ ) 
 		if ( ! xml.set ( content , contentLen , 
-				 false, 0, false, xd.m_version ) )
+				 false, 0, false, xd.m_version ,
+				 true , // setparents
+				 0 , // niceness 
+				 CT_HTML ) )
 			return log("build: speedtestxml: xml set: %s",
 				   mstrerror(g_errno));
 	// print time it took
@@ -11480,7 +11483,8 @@ bool parseTest ( char *coll , long long docId , char *query ) {
 	t = gettimeofdayInMilliseconds();
 	for ( long i = 0 ; i < 100 ; i++ ) 
 		if ( ! xml.set ( content , contentLen , 
-				 false, 0, false, xd.m_version , false ) )
+				 false, 0, false, xd.m_version , false ,
+				 0 , CT_HTML ) )
 			return log("build: xml(setparents=false): %s",
 				   mstrerror(g_errno));
 	// print time it took
@@ -11842,7 +11846,10 @@ bool summaryTest1   ( char *rec , long listSize, char *coll , long long docId ,
 		// now parse into xhtml (takes 15ms on lenny)
 		Xml xml;
 		xml.set ( content, contentLen , 
-			  false/*ownData?*/, 0, false, xd.m_version );
+			  false/*ownData?*/, 0, false, xd.m_version ,
+			  true , // setparents
+			  0 , // niceness
+			  CT_HTML );
 
 		xd.getSummary();