From 8bdd57bdeb61e2af87a6bd5d05352d03902b646a Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Mon, 7 Aug 2023 18:48:06 -0400 Subject: [PATCH] io-svg.c: use libxml2 to implement the parser --- io-svg.c | 266 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 171 insertions(+), 95 deletions(-) diff --git a/io-svg.c b/io-svg.c index fde78ca..68f8b4d 100644 --- a/io-svg.c +++ b/io-svg.c @@ -4,6 +4,8 @@ #include #include /* includes glib.h */ #include +#include +#include #include /* @@ -16,11 +18,6 @@ #define VIEWPORT_WIDTH 512 #define VIEWPORT_HEIGHT 512 -/* The start of an XInclude that was inserted by - * gtk-encode-symbolic-svg */ -#define XI_SIGNATURE " elements in an SVG buffer. + * @brief Replace one GTK element by its data. + * + * @param node + * A pointer to an element node. + * + * @return TRUE if we replaced the node, and FALSE otherwise. + * + */ +static gboolean process_one_xinclude(xmlNode* node) { + xmlChar* href; + + href = xmlGetProp(node, BAD_CAST "href"); + if (href == NULL) { + /* We only process XIncludes with base64 data hrefs */ + return FALSE; + } + + if (xmlStrncmp(href, BAD_CAST "data:text/xml;base64,", 21)) { + /* We only process XIncludes with base64 data hrefs */ + return FALSE; + } + + xmlChar* hrefdata = href+21; + + /* Verify that hrefdata is base64-encoded (and that it's safe to + cast to a signed gchar pointer). I'm assuming that everyone is + using the RFC 4648 encoding? */ + for (unsigned int i=0; i < xmlStrlen(hrefdata); i++) { + if (hrefdata[i] > 'z') { + return FALSE; + } + if (hrefdata[i] < '0' && hrefdata[i] != '+' && hrefdata[i] != '/') { + return FALSE; + } + } + + /* WARNING: the xmlChar and guchar types here are compatible, but + the decoded data is not necessarily NULL-terminated, while all of + the libxml2 functions that operate on a xmlChar pointer assume + that they are. */ + gsize decoded_size; + xmlChar* decoded = g_base64_decode((const gchar*)hrefdata, &decoded_size); + + /* This cast is safe because signed and unsigned chars are the same size, + and xmlReadMemory is going to treat the data as binary anyway. */ + xmlDoc* xinc_doc = xmlReadMemory((const char*)decoded, + decoded_size, + "xinclude.xml", + NULL, + 0); + g_free(decoded); + + if (xinc_doc == NULL) { + return FALSE; + } + + xmlNode* xinc_root = xmlDocGetRootElement(xinc_doc); + if (xinc_root == NULL || xmlStrcmp(xinc_root->name, BAD_CAST "svg")) { + return FALSE; + } + + /* Replace the original xinclude "node" with the children of this + "svg" node. Do the order of the nodes in an SVG matter? I don't + know, but we go to a little bit of extra trouble here to ensure + that we put the replacement in the right place, i.e. after its + previous sibling (if there is one). */ + + xmlNode* p = xmlPreviousElementSibling(node); + xmlNode* cur_node; + + /* If there is no previous sibling element, do one AddChild() + first. Then we're back to the case of a previous sibling. */ + if (p) { + cur_node = xmlFirstElementChild(xinc_root); + } + else { + p = node->parent; + cur_node = xmlFirstElementChild(xinc_root); + if (cur_node) { + /* Without the xmlCopyNode, I get segfaults, and I don't care to + investigate why. */ + p = xmlAddChild(p, xmlCopyNode(cur_node,1)); + xmlReconciliateNs(p->doc, p); + + cur_node = xmlNextElementSibling(cur_node); + } + } + + g_assert(p != NULL); /* xmlAddChild didn't fail */ + + xmlUnlinkNode(node); + xmlFreeNode(node); + + while (cur_node) { + p = xmlAddNextSibling(p, xmlCopyNode(cur_node,1)); + xmlReconciliateNs(p->doc, p); + cur_node = xmlNextElementSibling(cur_node); + } + + xmlFreeDoc(xinc_doc); + + return TRUE; +} + +/** + * @brief Replace all GTK elements in a tree by their data. + * + * @param node + * A node pointer, to the root of the tree. + * + * @return TRUE if we replaced any element nodes, and + * FALSE otherwise. + * + */ +static gboolean process_child_xincludes(xmlNode* a_node) { + gboolean result = FALSE; + xmlNode* cur_node = a_node; + xmlNode* next_node; + + g_assert(cur_node == NULL || cur_node->type == XML_ELEMENT_NODE); + + while (cur_node) { + if (!xmlStrcmp(cur_node->name, BAD_CAST "include")) { + /* process_one_xinclude() clobbers this node, so we need + to get its successor before calling that function. */ + next_node = xmlNextElementSibling(cur_node); + if (process_one_xinclude(cur_node)) { + result = TRUE; + } + cur_node = next_node; + continue; + } + + if (process_child_xincludes(xmlFirstElementChild(cur_node))) { + result = TRUE; + } + cur_node = xmlNextElementSibling(cur_node); + } + + return result; +} + + +/** + * @brief Process GTK elements in an SVG buffer. * * GTK is very cute. Its gtk-encode-symbolic-svg tool wraps your SVG * in its own boilerplate, but then rather than including your SVG @@ -427,104 +569,35 @@ static void emit_prepared(SvgTinyContext* context, GdkPixbuf* pixbuf) { */ static gchar* process_gtk_symbolic_svg_xinclude(const gchar* buffer, gsize buf_size, - gsize* new_size) { - gchar* xi_start; - gchar* xi; - gchar* xi_stop; - gchar* after_xi_element; - - xi_start = g_strstr_len(buffer, buf_size, XI_SIGNATURE); - if (xi_start == NULL) { - return NULL; - } - - xi = xi_start + strlen(XI_SIGNATURE); - xi_stop = g_strstr_len(xi, (buffer + buf_size) - xi, "\""); + gsize* new_size) { - if(xi_stop == NULL) { - /* We found the start of an XInclude, but not the end of its - base64-encoded data? Play it safe and do nothing. */ + xmlDoc* doc = xmlReadMemory(buffer,buf_size,"symbolic.xml",NULL,0); + if (doc == NULL) { return NULL; } - /* g_base64_decode needs a NULL-terminated string, so let's make - "xi" into one */ - *xi_stop = 0; - gsize decoded_size; - - /* All files are ASCII, right? In GTK's gdkpixbufutils.c, our base64 - * data is encoded from a binary file stream, i.e. bytes, without - * any regard for what the text inside represents. Elsewhere we are - * pretending that gchar is a reasonable data type to use for the - * contents of an SVG file; here we are saying that again out loud. - */ - gchar* decoded = (gchar*)g_base64_decode(xi, &decoded_size); - *xi_stop = '"'; - - /* We need another round of processing to strip the and - * elements out of "decoded", but it's simpler to just overwrite - * them with spaces before we proceed. We'll wind up with a document - * that has a conspicuous chunk of whitespace in the middle of it, - * but whatever. Note that we don't need to worry about the - * element so much, because if one exists, it has to come before the - * . As a result, we just need to strip everything up to the - * leading tag. */ - gchar* svg_open_start = g_strstr_len(decoded, decoded_size, ""); - if (svg_open_end == NULL) { - /* The decoded data is not what we were expecting. Give up. */ - g_free(decoded); - return NULL; + gchar* result = NULL; + if (process_child_xincludes(root_element)) { + /* If we actually replaced something, we need to return the new + document in a buffer. */ + xmlChar *xmlbuf; + int xmlbuf_size; + xmlDocDumpFormatMemory(doc, &xmlbuf, &xmlbuf_size, 1); + /* We're going to free() this later on with g_free() instead of + xmlFree(), so the two "byte" types had better be the same + size. */ + g_assert(sizeof(xmlChar) == sizeof(gchar)); + *new_size = (gsize)xmlbuf_size; + result = (gchar*)xmlbuf; } - /* Keep in mind that we want to wipe everything up to and - including the tag; we'll usually overwrite an tag - too. */ - memset(decoded, ' ', (1 + (svg_open_end - decoded))); - - gchar* svg_close_start = g_strstr_len(svg_open_end, - (decoded+decoded_size)-svg_open_end, - ""); - if (svg_close_start == NULL ) { - /* The decoded data is not what we were expecting. Give up. */ - g_free(decoded); - return NULL; - } - memset(svg_close_start, ' ', 6); - - /* We're going to keep everything up to xi_start. If the */ - if (after_xi_element >= buffer+buf_size) { - /* The document ends right after the XInclude (with no closing - tag or anything)? Bail. */ - g_free(decoded); - return NULL; - } - - gsize keep_after_size = (buffer+buf_size) - after_xi_element; - - *new_size = keep_before_size + decoded_size + keep_after_size; - gchar* result = g_malloc(*new_size); - memcpy(result, buffer, keep_before_size); - memcpy(result+keep_before_size, decoded, decoded_size); - memcpy(result+keep_before_size+decoded_size, - after_xi_element, - keep_after_size); - g_free(decoded); + xmlFreeDoc(doc); + xmlCleanupParser(); return result; } @@ -619,6 +692,9 @@ void fill_info(GdkPixbufFormat* info) { } +/** + * @brief Entry point of the svg2png test program. + */ int main(int argc, char** argv) { char* svgpath; char* pngpath; -- 2.43.2