]> gitweb.michael.orlitzky.com - libsvgtiny.git/commitdiff
Rework poly point parsing
authorVincent Sanders <vince@kyllikki.org>
Thu, 13 Jun 2024 10:48:20 +0000 (11:48 +0100)
committerVincent Sanders <vince@kyllikki.org>
Mon, 24 Jun 2024 08:57:24 +0000 (09:57 +0100)
Change parsing of poly points to use a specific number parser insteads of sscanf

src/Makefile
src/svgtiny.c
src/svgtiny_internal.h
src/svgtiny_parse.c [new file with mode: 0644]

index 751a12abe3cf79559ce6461763fec7506b92b461..15cdbe78572e4100c19bc928e9130f7b90540df2 100644 (file)
@@ -1,5 +1,5 @@
 # Sources
-DIR_SOURCES := svgtiny.c svgtiny_gradient.c svgtiny_list.c
+DIR_SOURCES := svgtiny.c svgtiny_gradient.c svgtiny_list.c svgtiny_parse.c
 
 SOURCES := $(SOURCES)
 
index 6859fbdbeaf88ad80c269c28df5e5b580774089e..a450985086ad3fe0a8f5624d6c04e551158e7614 100644 (file)
@@ -1576,6 +1576,7 @@ svgtiny_code svgtiny_parse_line(dom_element *line,
 }
 
 
+
 /**
  * Parse a <polyline> or <polygon> element node.
  *
@@ -1589,9 +1590,8 @@ svgtiny_code svgtiny_parse_poly(dom_element *poly,
        svgtiny_code err;
        dom_string *points_str;
        dom_exception exc;
-       char *s, *points;
-       float *p;
-       unsigned int i;
+       float *pointv;
+       unsigned int pointc;
 
        svgtiny_setup_state_local(&state);
 
@@ -1613,50 +1613,36 @@ svgtiny_code svgtiny_parse_poly(dom_element *poly,
                return svgtiny_SVG_ERROR;
        }
 
-       s = points = strndup(dom_string_data(points_str),
-                            dom_string_byte_length(points_str));
-       dom_string_unref(points_str);
-       /* read points attribute */
-       if (s == NULL) {
-               svgtiny_cleanup_state_local(&state);
-               return svgtiny_OUT_OF_MEMORY;
-       }
-       /* allocate space for path: it will never have more elements than s */
-       p = malloc(sizeof p[0] * strlen(s));
-       if (!p) {
-               free(points);
+       /* allocate space for path: it will never have more elements than bytes
+        * in the string.
+        */
+       pointc = dom_string_byte_length(points_str);
+       pointv = malloc(sizeof pointv[0] * pointc);
+       if (pointv == NULL) {
                svgtiny_cleanup_state_local(&state);
                return svgtiny_OUT_OF_MEMORY;
        }
 
-       /* parse s and build path */
-       for (i = 0; s[i]; i++)
-               if (s[i] == ',')
-                       s[i] = ' ';
-       i = 0;
-       while (*s) {
-               float x, y;
-               int n;
-
-               if (sscanf(s, "%f %f %n", &x, &y, &n) == 2) {
-                       if (i == 0)
-                               p[i++] = svgtiny_PATH_MOVE;
-                       else
-                               p[i++] = svgtiny_PATH_LINE;
-                       p[i++] = x;
-                       p[i++] = y;
-                       s += n;
-                } else {
-                       break;
-                }
-        }
-        if (polygon)
-               p[i++] = svgtiny_PATH_CLOSE;
-
-       free(points);
-
-       err = svgtiny_add_path(p, i, &state);
+       err = svgtiny_parse_poly_points(dom_string_data(points_str),
+                                       dom_string_byte_length(points_str),
+                                       pointv,
+                                       &pointc);
+       dom_string_unref(points_str);
+       if (err != svgtiny_OK) {
+               free(pointv);
+               state.diagram->error_line = -1; /* poly->line; */
+               state.diagram->error_message =
+                               "polyline/polygon: failed to parse points";
+       } else {
+               if (pointc > 0) {
+                       pointv[0] = svgtiny_PATH_MOVE;
+               }
+               if (polygon) {
+                       pointv[pointc++] = svgtiny_PATH_CLOSE;
+               }
 
+               err = svgtiny_add_path(pointv, pointc, &state);
+       }
        svgtiny_cleanup_state_local(&state);
 
        return err;
index 6bf5d64d99277f836fb9d59cad878977d8ed7571..c04ae66597aa3cf16ac99bc7acddab2f452e4477 100644 (file)
@@ -85,6 +85,10 @@ char *svgtiny_strndup(const char *s, size_t n);
 #define strndup svgtiny_strndup
 #endif
 
+/* svgtiny_parse.c */
+svgtiny_code svgtiny_parse_poly_points(const char *data, size_t datalen,
+               float *pointv, unsigned int *pointc);
+
 /* svgtiny_gradient.c */
 void svgtiny_find_gradient(const char *id,
                struct svgtiny_parse_state_gradient *grad,
diff --git a/src/svgtiny_parse.c b/src/svgtiny_parse.c
new file mode 100644 (file)
index 0000000..890ee1b
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ * This file is part of Libsvgtiny
+ * Licensed under the MIT License,
+ *                http://opensource.org/licenses/mit-license.php
+ * Copyright 2024 Vincent Sanders <vince@netsurf-browser.org>
+ */
+
+#include <stddef.h>
+#include <math.h>
+#include <float.h>
+
+#include "svgtiny.h"
+#include "svgtiny_internal.h"
+
+#define SIGNIFICAND_MAX 100000000
+#define EXPONENT_MAX 38
+#define EXPONENT_MIN -38
+
+/**
+ * parse text string into a float
+ *
+ * A number is started by 0 (started by sign) or more spaces (0x20), tabs (0x09),
+ * carridge returns (0xD) and newlines (0xA) followed by a decimal number.
+ * A number is defined as https://www.w3.org/TR/css-syntax-3/#typedef-number-token
+ *
+ * This state machine parses number text into a sign, significand and exponent
+ * then builds a single precision float from those values.
+ *
+ * The significand stores the first nine decimal digits of the number (floats
+ * only have seven thus ensuring nothing is lost in conversion).
+ *
+ * The exponent is limited to 10^38 (again the float limit) and results in
+ * FLT_MAX being returned with a range error.
+ *
+ * An exponent below 10^-38 will result in emitting the smallest value possible
+ * FLT_MIN with a range error.
+ *
+ * This is not a strtof clone because it has an input length limit instead of
+ * needing null terminated input, is not locale dependent and only processes
+ * decimal numbers (not hex etc.). These limitations are necessary to process
+ * the input correctly.
+ */
+static svgtiny_code
+svgtiny_parse_number(const char *text,
+                    size_t textlen,
+                    const char **textend,
+                    float *value)
+{
+       enum b10sign {
+               SPOSITIVE,
+               SNEGATIVE,
+       };
+       const char *dataend;
+       const char *cur; /* text cursor */
+       enum {
+               STATE_WHITESPACE, /* processing whitespace */
+               STATE_NUMBER, /* processing whole number */
+               STATE_FRACT, /* processing fractional part */
+               STATE_SIGNEXPONENT, /* processing exponent part */
+               STATE_EXPONENT, /* processing exponent part have seen sign */
+       } state = STATE_WHITESPACE;
+       enum b10sign sign = SPOSITIVE; /* sign of number being constructed */
+       unsigned int significand = 0; /* significand of number being constructed */
+       int exponent = 0; /* exponent of the significand (distinct from exponent part) */
+       enum b10sign exp_sign = SPOSITIVE; /* sign of exponent part */
+       unsigned int exp_value = 0; /* value of the exponent part */
+       unsigned int digit_count = 0; /* has an actual digit been seen */
+
+       dataend = text + textlen;
+
+       for (cur=text; cur < dataend ; cur++) {
+               switch (state) {
+               case STATE_WHITESPACE:
+                       switch (*cur) {
+                       case 0x9: case 0xA: case 0xD: case 0x20:
+                               /* skip whitespace */
+                               continue;
+
+                       case '.':
+                               /* new number with fraction part */
+                               digit_count = 0;
+                               state = STATE_FRACT;
+                               continue;
+
+                       case '-':
+                               sign = SNEGATIVE;
+                               digit_count = 0;
+                               state = STATE_NUMBER;
+                               continue;
+
+                       case '+':
+                               digit_count = 0;
+                               state = STATE_NUMBER;
+                               continue;
+
+                       case '0': case '1': case '2': case '3': case '4':
+                       case '5': case '6': case '7': case '8': case '9':
+                               significand = (*cur - '0');
+                               digit_count = 1;
+                               state = STATE_NUMBER;
+                               continue;
+
+                       default:
+                               /* anything else completes conversion */
+                               goto svgtiny_parse_number_end;
+                       }
+                       break;
+
+               case STATE_NUMBER:
+                       switch(*cur) {
+                       case '.':
+                               state = STATE_FRACT;
+                               continue;
+
+                       case '0': case '1': case '2': case '3': case '4':
+                       case '5': case '6': case '7': case '8': case '9':
+                               digit_count += 1;
+                               if (significand < SIGNIFICAND_MAX) {
+                                       /* still space to acumulate digits in the significand */
+                                       significand = (significand * 10) + (*cur - '0');
+                               } else {
+                                       /* significand has accumulated all the
+                                        * digits it can so just extend the
+                                        * exponent */
+                                       exponent += 1;
+                               }
+                               continue;
+
+                       case 'e':
+                       case 'E':
+                               if (digit_count == 0) {
+                                       /* number has no digits before exponent which is a syntax error */
+                                       goto svgtiny_parse_number_end;
+
+                               }
+                               state = STATE_SIGNEXPONENT;
+                               continue;
+
+                       default:
+                               /* anything else completes conversion */
+                               goto svgtiny_parse_number_end;
+                       }
+
+                       break;
+
+               case STATE_FRACT:
+                       switch(*cur) {
+                       case '0': case '1': case '2': case '3': case '4':
+                       case '5': case '6': case '7': case '8': case '9':
+                               digit_count += 1;
+                               if (significand < SIGNIFICAND_MAX) {
+                                       /* still space to acumulate digits in the significand */
+                                       significand = (significand * 10) + (*cur - '0');
+                                       exponent -= 1;
+                               }
+
+                               continue;
+
+                       case 'e':
+                       case 'E':
+                               if (digit_count == 0) {
+                                       /* number has no digits before exponent which is a syntax error */
+                                       goto svgtiny_parse_number_end;
+
+                               }
+                               state = STATE_SIGNEXPONENT;
+                               continue;
+
+                       default:
+                               /* anything else completes conversion */
+                               goto svgtiny_parse_number_end;
+
+                       }
+                       break;
+
+               case STATE_SIGNEXPONENT:
+                       switch(*cur) {
+                       case '-':
+                               exp_sign = SNEGATIVE;
+                               state = STATE_EXPONENT;
+                               continue;
+
+                       case '+':
+                               state = STATE_EXPONENT;
+                               continue;
+
+                       case '0': case '1': case '2': case '3': case '4':
+                       case '5': case '6': case '7': case '8': case '9':
+                               if (exp_value < 1000) {
+                                       /* still space to acumulate digits in the exponent value */
+                                       exp_value = (exp_value * 10) + (*cur - '0');
+                               }
+                               state = STATE_EXPONENT;
+                               continue;
+
+                       default:
+                               /* anything else completes conversion */
+                               goto svgtiny_parse_number_end;
+
+                       }
+                       break;
+
+               case STATE_EXPONENT:
+                       switch(*cur) {
+                       case '0': case '1': case '2': case '3': case '4':
+                       case '5': case '6': case '7': case '8': case '9':
+                               if (exp_value < 1000) {
+                                       /* still space to acumulate digits in the exponent value */
+                                       exp_value = (exp_value * 10) + (*cur - '0');
+                               }
+
+                               continue;
+
+                       default:
+                               /* anything else completes conversion */
+                               goto svgtiny_parse_number_end;
+
+                       }
+                       break;
+               }
+       }
+
+svgtiny_parse_number_end:
+       *textend = cur;
+
+       if (state == STATE_WHITESPACE) {
+               /* no characters except whitespace */
+               return svgtiny_SVG_ERROR;
+       }
+
+       if (digit_count==0) {
+               /* number had no digits (only +-.) which is a syntax error */
+               return svgtiny_SVG_ERROR;
+       }
+
+       /* deal with exponent value */
+       if (exp_sign == SNEGATIVE) {
+               exponent -= exp_value;
+       } else {
+               exponent += exp_value;
+       }
+
+       /* deal with number too large to represent */
+       if (exponent > EXPONENT_MAX) {
+               if (sign == SPOSITIVE) {
+                       *value = FLT_MAX;
+               } else {
+                       *value = -FLT_MAX;
+               }
+               return svgtiny_OK;
+               /*return svgtiny_RANGE;*/
+       }
+
+       /* deal with number too small to represent */
+       if (exponent < EXPONENT_MIN) {
+               if (sign == SPOSITIVE) {
+                       *value = FLT_MIN;
+               } else {
+                       *value = -FLT_MIN;
+               }
+               return svgtiny_OK;
+               /*return svgtiny_RANGE;*/
+       }
+
+       if (sign == SPOSITIVE) {
+               *value = (float)significand * powf(10, exponent);
+       } else {
+               *value = -(float)significand * powf(10, exponent);
+       }
+
+       return svgtiny_OK;
+}
+
+
+/**
+ * parse text points into path points
+ *
+ * \param data Source text to parse
+ * \param datalen Length of source text
+ * \param pointv output vector of path elements.
+ * \param pointc on input has number of path elements in pointv on exit has
+ *               the number of elements placed in the output vector.
+ * \return svgtiny_OK on success else error code.
+ *
+ * parses a poly[line|gon] points text into a series of path elements.
+ * The syntax is defined in https://www.w3.org/TR/SVG11/shapes.html#PointsBNF or
+ * https://svgwg.org/svg2-draft/shapes.html#DataTypePoints
+ *
+ * This is a series of numbers separated by 0 (started by sign)
+ * or more tabs (0x9), spaces (0x20), carrige returns (0xD) and newlines (0xA)
+ * there may also be a comma in the separating whitespace after the preamble
+ * A number is defined as https://www.w3.org/TR/css-syntax-3/#typedef-number-token
+ *
+ */
+svgtiny_code
+svgtiny_parse_poly_points(const char *text,
+                         size_t textlen,
+                         float *pointv,
+                         unsigned int *pointc)
+{
+       const char *textend = text + textlen;
+       const char *numberend = NULL;
+       const char *cursor = text; /* text cursor */
+       int even = 0; /* is the current point even */
+       float point = 0; /* the odd point of the coordinate pair */
+       float oddpoint = 0;
+       svgtiny_code err;
+
+       *pointc = 0;
+
+       while (cursor < textend) {
+               err = svgtiny_parse_number(cursor,
+                                          textend - cursor,
+                                          &numberend,
+                                          &point);
+               if (err != svgtiny_OK) {
+                       break;
+               }
+               cursor = numberend;
+
+               if (even) {
+                       even = 0;
+                       pointv[(*pointc)++] = svgtiny_PATH_LINE;
+                       pointv[(*pointc)++] = oddpoint;
+                       pointv[(*pointc)++] = point;
+               } else {
+                       even = 1;
+                       oddpoint=point;
+               }
+
+               /* skip whitespace or comma */
+               while(cursor < textend) {
+                       if ((*cursor != 0x20) &&
+                           (*cursor != 0x09) &&
+                           (*cursor != 0x0A) &&
+                           (*cursor != 0x0D) &&
+                           (*cursor != 0x2C)) {
+                               break;
+                       }
+                       cursor++;
+               }
+       }
+
+       return svgtiny_OK;
+}