--- misc/raptor-1.4.18/src/raptor.h.old 2008-06-20 07:47:38.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor.h 2012-02-15 16:54:21.000000000 +0100 @@ -376,6 +376,7 @@ typedef struct { * @RAPTOR_FEATURE_JSON_EXTRA_DATA: JSON serializer extra top-level data * @RAPTOR_FEATURE_RSS_TRIPLES: Atom/RSS serializer writes extra RDF triples it finds (none, rdf-xml, atom-triples) * @RAPTOR_FEATURE_ATOM_ENTRY_URI: Atom entry URI. If given, generate an Atom Entry Document with the item having the given URI, otherwise generate an Atom Feed Document with any items found. + * @RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: When reading XML, load external entities. * @RAPTOR_FEATURE_LAST: Internal * * Raptor parser, serializer or XML writer features. @@ -416,7 +417,8 @@ typedef enum { RAPTOR_FEATURE_JSON_EXTRA_DATA, RAPTOR_FEATURE_RSS_TRIPLES, RAPTOR_FEATURE_ATOM_ENTRY_URI, - RAPTOR_FEATURE_LAST=RAPTOR_FEATURE_ATOM_ENTRY_URI + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + RAPTOR_FEATURE_LAST=RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES } raptor_feature; --- misc/raptor-1.4.18/src/raptor_feature.c.old 2008-06-05 08:54:16.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_feature.c 2012-02-15 16:55:09.000000000 +0100 @@ -89,7 +89,8 @@ static const struct { RAPTOR_FEATURE_JSON_CALLBACK , 6, "jsonCallback", "JSON serializer callback" }, { RAPTOR_FEATURE_JSON_EXTRA_DATA , 6, "jsonExtraData", "JSON serializer extra data" }, { RAPTOR_FEATURE_RSS_TRIPLES , 6, "rssTriples", "Atom/RSS serializer writes extra RDF triples" }, - { RAPTOR_FEATURE_ATOM_ENTRY_URI , 6, "atomEntryUri", "Atom serializer Entry URI" } + { RAPTOR_FEATURE_ATOM_ENTRY_URI , 6, "atomEntryUri", "Atom serializer Entry URI" }, + { RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, 1, "loadExternalEntities", "Load external XML entities." } }; --- misc/raptor-1.4.18/src/raptor_internal.h.old 2008-06-03 07:04:09.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_internal.h 2012-02-15 16:52:08.000000000 +0100 @@ -983,6 +983,14 @@ struct raptor_sax2_s { /* base URI for resolving relative URIs or xml:base URIs */ raptor_uri* base_uri; + + /* call SAX2 handlers if non-0 */ + int enabled; + + /* FEATURE: + * non 0 if XML entities should be loaded + */ + int feature_load_external_entities; }; int raptor_sax2_init(void); --- misc/raptor-1.4.18/src/raptor_libxml.c.old 2008-06-14 05:35:27.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_libxml.c 2012-02-15 16:52:08.000000000 +0100 @@ -142,18 +142,126 @@ raptor_libxml_hasExternalSubset (void* u static xmlParserInputPtr raptor_libxml_resolveEntity(void* user_data, - const xmlChar *publicId, const xmlChar *systemId) { - raptor_sax2* sax2=(raptor_sax2*)user_data; - return libxml2_resolveEntity(sax2->xc, publicId, systemId); + const xmlChar *publicId, const xmlChar *systemId) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr ctxt = sax2->xc; + const unsigned char *uri_string = NULL; + xmlParserInputPtr entity_input; + int load_entity = 0; + + if(ctxt->input) + uri_string = (const unsigned char *)ctxt->input->filename; + + if(!uri_string) + uri_string = (const unsigned char *)ctxt->directory; + + load_entity = sax2->feature_load_external_entities; + + if(load_entity) { + entity_input = xmlLoadExternalEntity((const char*)uri_string, + (const char*)publicId, + ctxt); + } else { + RAPTOR_DEBUG4("Not loading entity URI %s by policy for publicId '%s' systemId '%s'\n", uri_string, publicId, systemId); + } + + return entity_input; } static xmlEntityPtr -raptor_libxml_getEntity(void* user_data, const xmlChar *name) { - raptor_sax2* sax2=(raptor_sax2*)user_data; - return libxml2_getEntity(sax2->xc, name); -} +raptor_libxml_getEntity(void* user_data, const xmlChar *name) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr xc = sax2->xc; + xmlEntityPtr ret = NULL; + + if(!xc) + return NULL; + + if(!xc->inSubset) { + /* looks for hardcoded set of entity names - lt, gt etc. */ + ret = xmlGetPredefinedEntity(name); + if(ret) { + RAPTOR_DEBUG2("Entity '%s' found in predefined set\n", name); + return ret; + } + } + /* This section uses xmlGetDocEntity which looks for entities in + * memory only, never from a file or URI + */ + if(xc->myDoc && (xc->myDoc->standalone == 1)) { + RAPTOR_DEBUG2("Entity '%s' document is standalone\n", name); + /* Document is standalone: no entities are required to interpret doc */ + if(xc->inSubset == 2) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + if(!ret) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } + } + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + } + + if(ret && !ret->children && + (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + /* Entity is an external general parsed entity. It may be in a + * catalog file, user file or user URI + */ + int val = 0; + xmlNodePtr children; + int load_entity = 0; + + load_entity = sax2->feature_load_external_entities; + + if(!load_entity) { + RAPTOR_DEBUG2("Not getting entity URI %s by policy\n", ret->URI); + children = xmlNewText((const xmlChar*)""); + } else { + /* Disable SAX2 handlers so that the SAX2 events do not all get + * sent to callbacks during dealing with the entity parsing. + */ + sax2->enabled = 0; + val = xmlParseCtxtExternalEntity(xc, ret->URI, ret->ExternalID, &children); + sax2->enabled = 1; + } + + if(!val) { + xmlAddChildList((xmlNodePtr)ret, children); + } else { + xc->validate = 0; + return NULL; + } + + ret->owner = 1; + +/* ret->checked was added with commit a37a6ad91a61d168ecc4b29263def3363fff4da6 + in libxml2 before 2.6.27 it does not exist and ret->children != 0 will be + tested instead, which is true due to xmlAddChildList above */ +#if LIBXML_VERSION >= 20627 || !defined(__APPLE__) + /* Mark this entity as having been checked - never do this again */ + if(!ret->checked) + ret->checked = 1; +#else + if (atoi(xmlParserVersion) >= 20627) { + int *const pChecked = (&ret->owner) + 1; + if (!*pChecked) /* owner precedes checked and is also of type int */ + *pChecked = 1; + } +#endif + } + + return ret; +} + static xmlEntityPtr raptor_libxml_getParameterEntity(void* user_data, const xmlChar *name) { --- misc/raptor-1.4.18/src/raptor_parse.c.old 2008-06-15 09:18:50.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_parse.c 2012-02-15 16:52:08.000000000 +0100 @@ -1294,6 +1294,7 @@ raptor_set_feature(raptor_parser *parser case RAPTOR_FEATURE_MICROFORMATS: case RAPTOR_FEATURE_HTML_LINK: case RAPTOR_FEATURE_WWW_TIMEOUT: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: parser->features[(int)feature]=value; break; @@ -1414,6 +1415,7 @@ raptor_get_feature(raptor_parser *parser case RAPTOR_FEATURE_MICROFORMATS: case RAPTOR_FEATURE_HTML_LINK: case RAPTOR_FEATURE_WWW_TIMEOUT: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: result=(parser->features[(int)feature] != 0); break; --- misc/raptor-1.4.18/src/raptor_rdfxml.c.old 2008-06-15 10:12:06.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_rdfxml.c 2012-02-15 16:52:08.000000000 +0100 @@ -1124,6 +1124,9 @@ raptor_rdfxml_parse_start(raptor_parser* raptor_sax2_set_feature(rdf_xml_parser->sax2, RAPTOR_FEATURE_NO_NET, rdf_parser->features[RAPTOR_FEATURE_NO_NET]); + raptor_sax2_set_feature(rdf_xml_parser->sax2, + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); --- misc/raptor-1.4.18/src/raptor_rss.c.old 2008-05-21 22:25:57.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_rss.c 2012-02-15 16:52:08.000000000 +0100 @@ -251,6 +251,9 @@ raptor_rss_parse_start(raptor_parser *rd raptor_sax2_set_feature(rss_parser->sax2, RAPTOR_FEATURE_NO_NET, rdf_parser->features[RAPTOR_FEATURE_NO_NET]); + raptor_sax2_set_feature(rss_parser->sax2, + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); raptor_sax2_parse_start(rss_parser->sax2, uri); --- misc/raptor-1.4.18/src/raptor_sax2.c.old 2008-06-15 10:12:20.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_sax2.c 2012-02-15 16:52:08.000000000 +0100 @@ -96,6 +96,8 @@ raptor_new_sax2(void* user_data, raptor_ sax2->user_data=user_data; + sax2->enabled = 1; + sax2->locator=error_handlers->locator; sax2->error_handlers=error_handlers; @@ -687,6 +689,10 @@ raptor_sax2_set_feature(raptor_sax2 *sax sax2->feature_no_net=value; break; + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + sax2->feature_load_external_entities=value; + break; + case RAPTOR_FEATURE_SCANNING: case RAPTOR_FEATURE_ASSUME_IS_RDF: case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES: @@ -767,6 +773,9 @@ raptor_sax2_start_element(void* user_dat unsigned char *xml_language=NULL; raptor_uri *xml_base=NULL; + if(!sax2->enabled) + return; + #ifdef RAPTOR_XML_EXPAT #ifdef EXPAT_UTF8_BOM_CRASH sax2->tokens_count++; @@ -990,6 +999,9 @@ raptor_sax2_end_element(void* user_data, raptor_sax2* sax2=(raptor_sax2*)user_data; raptor_xml_element* xml_element; + if(!sax2->enabled) + return; + #ifdef RAPTOR_XML_EXPAT #ifdef EXPAT_UTF8_BOM_CRASH sax2->tokens_count++; @@ -1025,6 +1037,10 @@ void raptor_sax2_characters(void* user_data, const unsigned char *s, int len) { raptor_sax2* sax2=(raptor_sax2*)user_data; + + if(!sax2->enabled) + return; + if(sax2->characters_handler) sax2->characters_handler(sax2->user_data, sax2->current_element, s, len); } @@ -1035,6 +1051,10 @@ void raptor_sax2_cdata(void* user_data, const unsigned char *s, int len) { raptor_sax2* sax2=(raptor_sax2*)user_data; + + if(!sax2->enabled) + return; + #ifdef RAPTOR_XML_EXPAT #ifdef EXPAT_UTF8_BOM_CRASH sax2->tokens_count++; @@ -1051,6 +1071,10 @@ void raptor_sax2_comment(void* user_data, const unsigned char *s) { raptor_sax2* sax2=(raptor_sax2*)user_data; + + if(!sax2->enabled) + return; + if(sax2->comment_handler) sax2->comment_handler(sax2->user_data, sax2->current_element, s); } @@ -1066,6 +1090,10 @@ raptor_sax2_unparsed_entity_decl(void* u const unsigned char* notationName) { raptor_sax2* sax2=(raptor_sax2*)user_data; + + if(!sax2->enabled) + return; + if(sax2->unparsed_entity_decl_handler) sax2->unparsed_entity_decl_handler(sax2->user_data, entityName, base, systemId, @@ -1082,6 +1110,10 @@ raptor_sax2_external_entity_ref(void* us const unsigned char* publicId) { raptor_sax2* sax2=(raptor_sax2*)user_data; + + if(!sax2->enabled) + return 0; + if(sax2->external_entity_ref_handler) return sax2->external_entity_ref_handler(sax2->user_data, context, base, systemId, publicId); --- misc/raptor-1.4.18/src/raptor_serialize.c.old 2008-06-20 02:55:31.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_serialize.c 2012-02-15 16:52:08.000000000 +0100 @@ -859,6 +859,7 @@ raptor_serializer_set_feature(raptor_ser /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -965,6 +966,7 @@ raptor_serializer_set_feature_string(rap /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -1102,6 +1104,7 @@ raptor_serializer_get_feature(raptor_ser /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -1201,6 +1204,7 @@ raptor_serializer_get_feature_string(rap /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: --- misc/raptor-1.4.18/src/raptor_turtle_writer.c.old 2008-06-20 07:47:48.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_turtle_writer.c 2012-02-15 16:52:08.000000000 +0100 @@ -723,6 +723,7 @@ raptor_turtle_writer_set_feature(raptor_ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: @@ -836,6 +837,7 @@ raptor_turtle_writer_get_feature(raptor_ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: --- misc/raptor-1.4.18/src/raptor_xml_writer.c.old 2008-06-03 07:05:56.000000000 +0200 +++ misc/raptor-1.4.18/src/raptor_xml_writer.c 2012-02-15 16:52:08.000000000 +0100 @@ -906,6 +906,7 @@ raptor_xml_writer_set_feature(raptor_xml /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: @@ -1026,6 +1027,7 @@ raptor_xml_writer_get_feature(raptor_xml /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: