From 83c3a3ca94fd8169c41da1dd9e1d0e86f8f1a43f Mon Sep 17 00:00:00 2001
From: lushi <lushi@huawei.com>
Date: Thu, 24 Feb 2022 17:53:23 +0800
Subject: [PATCH 1/5] fix cve-2022-25236

Signed-off-by: lushi <lushi@huawei.com>
Change-Id: Ib238722c0075720c416d362874daf41bc19f6289
---
 lib/xmlparse.c   | 17 +++++++++++++----
 tests/runtests.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index f19e368..a286335 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -716,8 +716,7 @@ XML_ParserCreate(const XML_Char *encodingName) {
 
 XML_Parser XMLCALL
 XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
-  XML_Char tmp[2];
-  *tmp = nsSep;
+  XML_Char tmp[2] = {nsSep, 0};
   return XML_ParserCreate_MM(encodingName, NULL, tmp);
 }
 
@@ -1342,8 +1341,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
      would be otherwise.
   */
   if (parser->m_ns) {
-    XML_Char tmp[2];
-    *tmp = parser->m_namespaceSeparator;
+    XML_Char tmp[2] = {parser->m_namespaceSeparator, 0};
     parser = parserCreate(encodingName, &parser->m_mem, tmp, newDtd);
   } else {
     parser = parserCreate(encodingName, &parser->m_mem, NULL, newDtd);
@@ -3754,6 +3752,17 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
     if (! mustBeXML && isXMLNS
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
+
+    // NOTE: While Expat does not validate namespace URIs against RFC 3986,
+    //       we have to at least make sure that the XML processor on top of
+    //       Expat (that is splitting tag names by namespace separator into
+    //       2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
+    //       by an attacker putting additional namespace separator characters
+    //       into namespace declarations.  That would be ambiguous and not to
+    //       be expected.
+    if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+      return XML_ERROR_SYNTAX;
+    }
   }
   isXML = isXML && len == xmlLen;
   isXMLNS = isXMLNS && len == xmlnsLen;
diff --git a/tests/runtests.c b/tests/runtests.c
index 45ba5d5..d64187e 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -7195,6 +7195,35 @@ START_TEST(test_ns_double_colon_doctype) {
 }
 END_TEST
 
+START_TEST(test_ns_separator_in_uri) {
+  struct test_case {
+    enum XML_Status expectedStatus;
+    const char *doc;
+  };
+  struct test_case cases[] = {
+      {XML_STATUS_OK, "<doc xmlns='one_two' />"},
+      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />"},
+  };
+
+  size_t i = 0;
+  size_t failCount = 0;
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
+    if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
+                  /*isFinal*/ XML_TRUE)
+        != cases[i].expectedStatus) {
+      failCount++;
+    }
+    XML_ParserFree(parser);
+  }
+
+  if (failCount) {
+    fail("Namespace separator handling is broken");
+  }
+}
+END_TEST
+
 /* Control variable; the number of times duff_allocator() will successfully
  * allocate */
 #define ALLOC_ALWAYS_SUCCEED (-1)
@@ -11877,6 +11906,7 @@ make_suite(void) {
   tcase_add_test(tc_namespace, test_ns_utf16_doctype);
   tcase_add_test(tc_namespace, test_ns_invalid_doctype);
   tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
+  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
 
   suite_add_tcase(s, tc_misc);
   tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
-- 
Gitee


From e09c2419ece153d39b5e0d430ba575ee13883a3d Mon Sep 17 00:00:00 2001
From: lushi <lushi@huawei.com>
Date: Thu, 24 Feb 2022 18:17:41 +0800
Subject: [PATCH 2/5] fix cve-2022-25235

Signed-off-by: lushi <lushi@huawei.com>
Change-Id: I4a8861ea5d593532d75d6fd97777b410fa7f2211
---
 lib/xmltok.c      |   5 ---
 lib/xmltok_impl.c |  18 +++++---
 tests/runtests.c  | 109 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 12 deletions(-)

diff --git a/lib/xmltok.c b/lib/xmltok.c
index 5114016..53ec1d0 100644
--- a/lib/xmltok.c
+++ b/lib/xmltok.c
@@ -99,11 +99,6 @@
         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
    & (1u << (((byte)[2]) & 0x1F)))
 
-#define UTF8_GET_NAMING(pages, p, n)                                           \
-  ((n) == 2                                                                    \
-       ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))                   \
-       : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
-
 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
    with the additional restriction of not allowing the Unicode
diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
index 0430591..84ff35f 100644
--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
@@ -69,7 +69,7 @@
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
-    if (! IS_NAME_CHAR(enc, ptr, n)) {                                         \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
       *nextTokPtr = ptr;                                                       \
       return XML_TOK_INVALID;                                                  \
     }                                                                          \
@@ -98,7 +98,7 @@
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
-    if (! IS_NMSTRT_CHAR(enc, ptr, n)) {                                       \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
       *nextTokPtr = ptr;                                                       \
       return XML_TOK_INVALID;                                                  \
     }                                                                          \
@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
   case BT_LEAD##n:                                                             \
     if (end - ptr < n)                                                         \
       return XML_TOK_PARTIAL_CHAR;                                             \
+    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
+      *nextTokPtr = ptr;                                                       \
+      return XML_TOK_INVALID;                                                  \
+    }                                                                          \
     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
       ptr += n;                                                                \
       tok = XML_TOK_NAME;                                                      \
@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
       state = inName;                                                          \
     }
 #  define LEAD_CASE(n)                                                         \
-  case BT_LEAD##n:                                                             \
+  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
     START_NAME ptr += (n - MINBPC(enc));                                       \
     break;
       LEAD_CASE(2)
@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
     break;
       LEAD_CASE(2)
       LEAD_CASE(3)
@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
     switch (BYTE_TYPE(enc, ptr)) {
 #  define LEAD_CASE(n)                                                         \
   case BT_LEAD##n:                                                             \
-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
     pos->columnNumber++;                                                       \
     break;
       LEAD_CASE(2)
diff --git a/tests/runtests.c b/tests/runtests.c
index d64187e..7453038 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -5973,6 +5973,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
 }
 END_TEST
 
+START_TEST(test_utf8_in_start_tags) {
+  struct test_case {
+    bool goodName;
+    bool goodNameStart;
+    const char *tagName;
+  };
+
+  // The idea with the tests below is this:
+  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
+  // go to isNever and are hence not a concern.
+  //
+  // We start with a character that is a valid name character
+  // (or even name-start character, see XML 1.0r4 spec) and then we flip
+  // single bits at places where (1) the result leaves the UTF-8 encoding space
+  // and (2) we stay in the same n-byte sequence family.
+  //
+  // The flipped bits are highlighted in angle brackets in comments,
+  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
+  // the most significant bit to 1 to leave UTF-8 encoding space.
+  struct test_case cases[] = {
+      // 1-byte UTF-8: [0xxx xxxx]
+      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
+      {false, false, "\xBA"}, // [<1>011 1010]
+      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
+      {false, false, "\xB9"}, // [<1>011 1001]
+
+      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
+      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
+                                  // Arabic small waw U+06E5
+      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
+      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
+      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
+      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
+                                  // combining char U+0301
+      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
+      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
+      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
+
+      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
+      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
+                                      // Devanagari Letter A U+0905
+      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
+      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
+      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
+      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
+      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
+      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
+                                      // combining char U+0901
+      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
+      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
+      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
+      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
+      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
+  };
+  const bool atNameStart[] = {true, false};
+
+  size_t i = 0;
+  char doc[1024];
+  size_t failCount = 0;
+
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    size_t j = 0;
+    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
+      const bool expectedSuccess
+          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
+      sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
+      XML_Parser parser = XML_ParserCreate(NULL);
+
+      const enum XML_Status status
+          = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
+
+      bool success = true;
+      if ((status == XML_STATUS_OK) != expectedSuccess) {
+        success = false;
+      }
+      if ((status == XML_STATUS_ERROR)
+          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
+        success = false;
+      }
+
+      if (! success) {
+        fprintf(
+            stderr,
+            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
+            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
+            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
+        failCount++;
+      }
+
+      XML_ParserFree(parser);
+    }
+  }
+
+  if (failCount > 0) {
+    fail("UTF-8 regression detected");
+  }
+}
+END_TEST
+
 /* Test trailing spaces in elements are accepted */
 static void XMLCALL
 record_element_end_handler(void *userData, const XML_Char *name) {
@@ -6150,6 +6249,14 @@ START_TEST(test_bad_doctype) {
 }
 END_TEST
 
+START_TEST(test_bad_doctype_utf8) {
+  const char *text = "<!DOCTYPE \xDB\x25"
+                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
+  expect_failure(text, XML_ERROR_INVALID_TOKEN,
+                 "Invalid UTF-8 in DOCTYPE not faulted");
+}
+END_TEST
+
 START_TEST(test_bad_doctype_utf16) {
   const char text[] =
       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
@@ -11842,6 +11949,7 @@ make_suite(void) {
   tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
+  tcase_add_test(tc_basic, test_utf8_in_start_tags);
   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
   tcase_add_test(tc_basic, test_utf16_attribute);
   tcase_add_test(tc_basic, test_utf16_second_attr);
@@ -11850,6 +11958,7 @@ make_suite(void) {
   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
   tcase_add_test(tc_basic, test_bad_doctype);
+  tcase_add_test(tc_basic, test_bad_doctype_utf8);
   tcase_add_test(tc_basic, test_bad_doctype_utf16);
   tcase_add_test(tc_basic, test_bad_doctype_plus);
   tcase_add_test(tc_basic, test_bad_doctype_star);
-- 
Gitee


From 7b300f91edb722ec88b6e342f05b300022464eb8 Mon Sep 17 00:00:00 2001
From: lushi <lushi@huawei.com>
Date: Thu, 24 Feb 2022 19:51:34 +0800
Subject: [PATCH 3/5] fix CVE-2022-25314

Signed-off-by: lushi <lushi@huawei.com>
Change-Id: Id2e8c914794b87e2629972af35b89fe3ec740e6c
---
 lib/xmlparse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index a286335..dac3ffc 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7419,7 +7419,7 @@ getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr,
 
 static XML_Char *
 copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
-  int charsRequired = 0;
+  size_t charsRequired = 0;
   XML_Char *result;
 
   /* First determine how long the string is */
-- 
Gitee


From d52ee044545ef25136acff7c9c6c4bd0ec91ae06 Mon Sep 17 00:00:00 2001
From: lushi <lushi@huawei.com>
Date: Thu, 24 Feb 2022 20:29:27 +0800
Subject: [PATCH 4/5] fix CVE-2022-25315

Signed-off-by: lushi <lushi@huawei.com>
Change-Id: I36275aca43b71385adfce732af8c11800e489c86
---
 lib/xmlparse.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index dac3ffc..5c31f75 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -2559,6 +2559,7 @@ storeRawNames(XML_Parser parser) {
   while (tag) {
     int bufSize;
     int nameLen = sizeof(XML_Char) * (tag->name.strLen + 1);
+    size_t rawNameLen;
     char *rawNameBuf = tag->buf + nameLen;
     /* Stop if already stored.  Since m_tagStack is a stack, we can stop
        at the first entry that has already been copied; everything
@@ -2570,7 +2571,11 @@ storeRawNames(XML_Parser parser) {
     /* For re-use purposes we need to ensure that the
        size of tag->buf is a multiple of sizeof(XML_Char).
     */
-    bufSize = nameLen + ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+    rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
+    /* Detect and prevent integer overflow. */
+    if (rawNameLen > (size_t)INT_MAX - nameLen)
+      return XML_FALSE;
+    bufSize = nameLen + (int)rawNameLen;
     if (bufSize > tag->bufEnd - tag->buf) {
       char *temp = (char *)REALLOC(parser, tag->buf, bufSize);
       if (temp == NULL)
-- 
Gitee


From 82d2e63720b81cbda6b57ac34cc813c51c7f901f Mon Sep 17 00:00:00 2001
From: lushi <lushi@huawei.com>
Date: Thu, 24 Feb 2022 20:35:27 +0800
Subject: [PATCH 5/5] fix CVE-2022-25313

Signed-off-by: lushi <lushi@huawei.com>
Change-Id: Id42cc85aff67d4248b332e06deeb1e1f80dff699
---
 lib/xmlparse.c | 116 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 79 insertions(+), 37 deletions(-)

diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 5c31f75..22186fa 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -7329,44 +7329,15 @@ nextScaffoldPart(XML_Parser parser) {
   return next;
 }
 
-static void
-build_node(XML_Parser parser, int src_node, XML_Content *dest,
-           XML_Content **contpos, XML_Char **strpos) {
-  DTD *const dtd = parser->m_dtd; /* save one level of indirection */
-  dest->type = dtd->scaffold[src_node].type;
-  dest->quant = dtd->scaffold[src_node].quant;
-  if (dest->type == XML_CTYPE_NAME) {
-    const XML_Char *src;
-    dest->name = *strpos;
-    src = dtd->scaffold[src_node].name;
-    for (;;) {
-      *(*strpos)++ = *src;
-      if (! *src)
-        break;
-      src++;
-    }
-    dest->numchildren = 0;
-    dest->children = NULL;
-  } else {
-    unsigned int i;
-    int cn;
-    dest->numchildren = dtd->scaffold[src_node].childcnt;
-    dest->children = *contpos;
-    *contpos += dest->numchildren;
-    for (i = 0, cn = dtd->scaffold[src_node].firstchild; i < dest->numchildren;
-         i++, cn = dtd->scaffold[cn].nextsib) {
-      build_node(parser, cn, &(dest->children[i]), contpos, strpos);
-    }
-    dest->name = NULL;
-  }
-}
-
 static XML_Content *
 build_model(XML_Parser parser) {
+  /* Function build_model transforms the existing parser->m_dtd->scaffold
+   * array of CONTENT_SCAFFOLD tree nodes into a new array of
+   * XML_Content tree nodes followed by a gapless list of zero-terminated
+   * strings. */
   DTD *const dtd = parser->m_dtd; /* save one level of indirection */
   XML_Content *ret;
-  XML_Content *cpos;
-  XML_Char *str;
+  XML_Char *str; /* the current string writing location */
 
   /* Detect and prevent integer overflow.
    * The preprocessor guard addresses the "always false" warning
@@ -7392,10 +7363,81 @@ build_model(XML_Parser parser) {
   if (! ret)
     return NULL;
 
-  str = (XML_Char *)(&ret[dtd->scaffCount]);
-  cpos = &ret[1];
+  /* What follows is an iterative implementation (of what was previously done
+   * recursively in a dedicated function called "build_node".  The old recursive
+   * build_node could be forced into stack exhaustion from input as small as a
+   * few megabyte, and so that was a security issue.  Hence, a function call
+   * stack is avoided now by resolving recursion.)
+   *
+   * The iterative approach works as follows:
+   *
+   * - We use space in the target array for building a temporary stack structure
+   *   while that space is still unused.
+   *   The stack grows from the array's end downwards and the "actual data"
+   *   grows from the start upwards, sequentially.
+   *   (Because stack grows downwards, pushing onto the stack is a decrement
+   *   while popping off the stack is an increment.)
+   *
+   * - A stack element appears as a regular XML_Content node on the outside,
+   *   but only uses a single field -- numchildren -- to store the source
+   *   tree node array index.  These are the breadcrumbs leading the way back
+   *   during pre-order (node first) depth-first traversal.
+   *
+   * - The reason we know the stack will never grow into (or overlap with)
+   *   the area with data of value at the start of the array is because
+   *   the overall number of elements to process matches the size of the array,
+   *   and the sum of fully processed nodes and yet-to-be processed nodes
+   *   on the stack, cannot be more than the total number of nodes.
+   *   It is possible for the top of the stack and the about-to-write node
+   *   to meet, but that is safe because we get the source index out
+   *   before doing any writes on that node.
+   */
+  XML_Content *dest = ret; /* tree node writing location, moves upwards */
+  XML_Content *const destLimit = &ret[dtd->scaffCount];
+  XML_Content *const stackBottom = &ret[dtd->scaffCount];
+  XML_Content *stackTop = stackBottom; /* i.e. stack is initially empty */
+  str = (XML_Char *)&ret[dtd->scaffCount];
+
+  /* Push source tree root node index onto the stack */
+  (--stackTop)->numchildren = 0;
+
+  for (; dest < destLimit; dest++) {
+    /* Pop source tree node index off the stack */
+    const int src_node = (int)(stackTop++)->numchildren;
+
+    /* Convert item */
+    dest->type = dtd->scaffold[src_node].type;
+    dest->quant = dtd->scaffold[src_node].quant;
+    if (dest->type == XML_CTYPE_NAME) {
+      const XML_Char *src;
+      dest->name = str;
+      src = dtd->scaffold[src_node].name;
+      for (;;) {
+        *str++ = *src;
+        if (! *src)
+          break;
+        src++;
+      }
+      dest->numchildren = 0;
+      dest->children = NULL;
+    } else {
+      unsigned int i;
+      int cn;
+      dest->name = NULL;
+      dest->numchildren = dtd->scaffold[src_node].childcnt;
+      dest->children = &dest[1];
+
+      /* Push children to the stack
+       * in a way where the first child ends up at the top of the
+       * (downwards growing) stack, in order to be processed first. */
+      stackTop -= dest->numchildren;
+      for (i = 0, cn = dtd->scaffold[src_node].firstchild;
+           i < dest->numchildren; i++, cn = dtd->scaffold[cn].nextsib) {
+        (stackTop + i)->numchildren = (unsigned int)cn;
+      }
+    }
+  }
 
-  build_node(parser, 0, ret, &cpos, &str);
   return ret;
 }
 
-- 
Gitee