From 3f3d13b724e685e39f4c843719c45cabe54d7c05 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Mon, 12 Sep 2016 19:25:40 +0200 Subject: Add sxmlc --- Makefile | 7 +- sxmlc.c | 20 + sxmlclib.c | 2282 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sxmlclib.h | 827 ++++++++++++++++++++++ 4 files changed, 3135 insertions(+), 1 deletion(-) create mode 100644 sxmlc.c create mode 100644 sxmlclib.c create mode 100644 sxmlclib.h diff --git a/Makefile b/Makefile index 57b1738..e621e95 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ CC = gcc CFLAGS = -Wall -O2 -all: mxml ezxml yxml +all: mxml ezxml yxml sxmlc mxml: mxml.c $(CC) $(CFLAGS) -lmxml -pthread -o mxml mxml.c @@ -13,10 +13,15 @@ ezxml: ezxml.c ezxmllib.o yxml: yxml.c yxmllib.o $(CC) $(CFLAGS) -o yxml yxml.c yxmllib.o +sxmlc: sxmlc.c sxmlclib.o + $(CC) $(CFLAGS) -o sxmlc sxmlc.c sxmlclib.o + ezxmllib.o: ezxmllib.c yxmllib.o: yxmllib.c +sxmlclib.o: sxmlclib.c + debug: $(CC) $(CFLAGS) -g -lmxml -pthread -o mxml mxml.c $(CC) $(CFLAGS) -g -o ezxml ezxml.c ezxmllib.o diff --git a/sxmlc.c b/sxmlc.c new file mode 100644 index 0000000..1bbf13c --- /dev/null +++ b/sxmlc.c @@ -0,0 +1,20 @@ +#include +#include + +#include "sxmlclib.h" + +int process(char *fn) { + + printf("\n"); + return 0; +} + +int main(int argc, char *argv[]) { + + for (int i = 1; i < argc; i++) { + process(argv[i]); + } + + return 0; +} + diff --git a/sxmlclib.c b/sxmlclib.c new file mode 100644 index 0000000..4b30d3f --- /dev/null +++ b/sxmlclib.c @@ -0,0 +1,2282 @@ +/* + Copyright (c) 2010, Matthieu Labas + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + OF SUCH DAMAGE. + + The views and conclusions contained in the software and documentation are those of the + authors and should not be interpreted as representing official policies, either expressed + or implied, of the FreeBSD Project. +*/ +#if defined(WIN32) || defined(WIN64) +#pragma warning(disable : 4996) +#endif + +#include +#include +#include +#include +#include "sxmlclib.h" + +/* + Struct defining "special" tags such as "" or "". + These tags are considered having a start and an end with some data in between that will + be stored in the 'tag' member of an XMLNode. + The 'tag_type' member is a constant that is associated to such tag. + All 'len_*' members are basically the "sx_strlen()" of 'start' and 'end' members. + */ +typedef struct _Tag { + TagType tag_type; + SXML_CHAR* start; + int len_start; + SXML_CHAR* end; + int len_end; +} _TAG; + +typedef struct _SpecialTag { + _TAG *tags; + int n_tags; +} SPECIAL_TAG; + +/* + List of "special" tags handled by sxmlc. + NB the "' or ']>'). + */ +static _TAG _spec[] = { + { TAG_INSTR, C2SX(""), 2 }, + { TAG_COMMENT, C2SX(""), 3 }, + { TAG_CDATA, C2SX(""), 3 } +}; +static int NB_SPECIAL_TAGS = (int)(sizeof(_spec) / sizeof(_TAG)); /* Auto computation of number of special tags */ + +/* + User-registered tags. + */ +static SPECIAL_TAG _user_tags = { NULL, 0 }; + +int XML_register_user_tag(TagType tag_type, SXML_CHAR* start, SXML_CHAR* end) +{ + _TAG* p; + int i, n, le; + + if (tag_type < TAG_USER) + return -1; + + if (start == NULL || end == NULL || *start != C2SX('<')) + return -1; + + le = sx_strlen(end); + if (end[le-1] != C2SX('>')) + return -1; + + i = _user_tags.n_tags; + n = i + 1; + p = (_TAG*)__realloc(_user_tags.tags, n * sizeof(_TAG)); + if (p == NULL) + return -1; + + p[i].tag_type = tag_type; + p[i].start = start; + p[i].end = end; + p[i].len_start = sx_strlen(start); + p[i].len_end = le; + _user_tags.tags = p; + _user_tags.n_tags = n; + + return i; +} + +int XML_unregister_user_tag(int i_tag) +{ + _TAG* pt; + + if (i_tag < 0 || i_tag >= _user_tags.n_tags) + return -1; + + if (_user_tags.n_tags == 1) + pt = NULL; + else { + pt = (_TAG*)__malloc((_user_tags.n_tags - 1) * sizeof(_TAG)); + if (pt == NULL) + return -1; + } + + if (pt != NULL) { + memcpy(pt, _user_tags.tags, i_tag * sizeof(_TAG)); + memcpy(&pt[i_tag], &_user_tags.tags[i_tag + 1], (_user_tags.n_tags - i_tag - 1) * sizeof(_TAG)); + } + if (_user_tags.tags != NULL) + __free(_user_tags.tags); + _user_tags.tags = pt; + _user_tags.n_tags--; + + return _user_tags.n_tags; +} + +int XML_get_nb_registered_user_tags(void) +{ + return _user_tags.n_tags; +} + +int XML_get_registered_user_tag(TagType tag_type) +{ + int i; + + for (i = 0; i < _user_tags.n_tags; i++) + if (_user_tags.tags[i].tag_type == tag_type) + return i; + + return -1; +} + +/* --- XMLNode methods --- */ + +/* + Add 'node' to given '*children_array' of '*len_array' elements. + '*len_array' is overwritten with the number of elements in '*children_array' after its reallocation. + Return the index of the newly added 'node' in '*children_array', or '-1' for memory error. + */ +static int _add_node(XMLNode*** children_array, int* len_array, XMLNode* node) +{ + XMLNode** pt = (XMLNode**)__realloc(*children_array, (*len_array+1) * sizeof(XMLNode*)); + + if (pt == NULL) + return -1; + + pt[*len_array] = node; + *children_array = pt; + + return (*len_array)++; +} + +int XMLNode_init(XMLNode* node) +{ + if (node == NULL) + return false; + + if (node->init_value == XML_INIT_DONE) + return true; /*(void)XMLNode_free(node);*/ + + node->tag = NULL; + node->text = NULL; + + node->attributes = NULL; + node->n_attributes = 0; + + node->father = NULL; + node->children = NULL; + node->n_children = 0; + + node->tag_type = TAG_NONE; + node->active = true; + + node->init_value = XML_INIT_DONE; + + return true; +} + +XMLNode* XMLNode_allocN(int n) +{ + int i; + XMLNode* p; + + if (n <= 0) + return NULL; + + p = (XMLNode*)__calloc(n, sizeof(XMLNode)); + if (p == NULL) + return NULL; + + for (i = 0; i < n; i++) + (void)XMLNode_init(&p[i]); + + return p; +} + +XMLNode* XMLNode_dup(const XMLNode* node, int copy_children) +{ + XMLNode* n; + + if (node == NULL) + return NULL; + + n = (XMLNode*)__calloc(1, sizeof(XMLNode)); + if (n == NULL) + return NULL; + + XMLNode_init(n); + if (!XMLNode_copy(n, node, copy_children)) { + XMLNode_free(n); + + return NULL; + } + + return n; +} + +int XMLNode_free(XMLNode* node) +{ + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + if (node->tag != NULL) { + __free(node->tag); + node->tag = NULL; + } + + XMLNode_remove_text(node); + XMLNode_remove_all_attributes(node); + XMLNode_remove_children(node); + + node->tag_type = TAG_NONE; + + return true; +} + +int XMLNode_copy(XMLNode* dst, const XMLNode* src, int copy_children) +{ + int i; + + if (dst == NULL || (src != NULL && src->init_value != XML_INIT_DONE)) + return false; + + (void)XMLNode_free(dst); /* 'dst' is freed first */ + + /* NULL 'src' resets 'dst' */ + if (src == NULL) + return true; + + /* Tag */ + if (src->tag != NULL) { + dst->tag = sx_strdup(src->tag); + if (dst->tag == NULL) goto copy_err; + } + + /* Text */ + if (dst->text != NULL) { + dst->text = sx_strdup(src->text); + if (dst->text == NULL) goto copy_err; + } + + /* Attributes */ + if (src->n_attributes > 0) { + dst->attributes = (XMLAttribute*)__calloc(src->n_attributes, sizeof(XMLAttribute)); + if (dst->attributes== NULL) goto copy_err; + dst->n_attributes = src->n_attributes; + for (i = 0; i < src->n_attributes; i++) { + dst->attributes[i].name = sx_strdup(src->attributes[i].name); + dst->attributes[i].value = sx_strdup(src->attributes[i].value); + if (dst->attributes[i].name == NULL || dst->attributes[i].value == NULL) goto copy_err; + dst->attributes[i].active = src->attributes[i].active; + } + } + + dst->tag_type = src->tag_type; + dst->father = src->father; + dst->user = src->user; + dst->active = src->active; + + /* Copy children if required (and there are any) */ + if (copy_children && src->n_children > 0) { + dst->children = (XMLNode**)__calloc(src->n_children, sizeof(XMLNode*)); + if (dst->children == NULL) goto copy_err; + dst->n_children = src->n_children; + for (i = 0; i < src->n_children; i++) { + if (!XMLNode_copy(dst->children[i], src->children[i], true)) goto copy_err; + } + } + + return true; + +copy_err: + (void)XMLNode_free(dst); + + return false; +} + +int XMLNode_set_active(XMLNode* node, int active) +{ + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + node->active = active; + + return true; +} + +int XMLNode_set_tag(XMLNode* node, const SXML_CHAR* tag) +{ + SXML_CHAR* newtag; + if (node == NULL || tag == NULL || node->init_value != XML_INIT_DONE) + return false; + + newtag = sx_strdup(tag); + if (newtag == NULL) + return false; + if (node->tag != NULL) __free(node->tag); + node->tag = newtag; + + return true; +} + +int XMLNode_set_type(XMLNode* node, const TagType tag_type) +{ + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + switch (tag_type) { + case TAG_ERROR: + case TAG_END: + case TAG_PARTIAL: + case TAG_NONE: + return false; + + default: + node->tag_type = tag_type; + return true; + } +} + +int XMLNode_set_attribute(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value) +{ + XMLAttribute* pt; + int i; + + if (node == NULL || attr_name == NULL || attr_name[0] == NULC || node->init_value != XML_INIT_DONE) + return -1; + + i = XMLNode_search_attribute(node, attr_name, 0); + if (i >= 0) { /* Attribute found: update it */ + SXML_CHAR* value = NULL; + if (attr_value != NULL && (value = sx_strdup(attr_value)) == NULL) + return -1; + pt = node->attributes; + if (pt[i].value != NULL) + __free(pt[i].value); + pt[i].value = value; + } else { /* Attribute not found: add it */ + SXML_CHAR* name = sx_strdup(attr_name); + SXML_CHAR* value = (attr_value == NULL ? NULL : sx_strdup(attr_value)); + if (name == NULL || (value == NULL && attr_value != NULL)) { + if (value != NULL) + __free(value); + if (name != NULL) + __free(name); + return -1; + } + i = node->n_attributes; + pt = (XMLAttribute*)__realloc(node->attributes, (i+1) * sizeof(XMLAttribute)); + if (pt == NULL) { + if (value != NULL) + __free(value); + __free(name); + return -1; + } + + pt[i].name = name; + pt[i].value = value; + pt[i].active = true; + node->attributes = pt; + node->n_attributes = i + 1; + } + + return node->n_attributes; +} + +int XMLNode_get_attribute_with_default(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR** attr_value, const SXML_CHAR* default_attr_value) +{ + XMLAttribute* pt; + int i; + + if (node == NULL || attr_name == NULL || attr_name[0] == NULC || attr_value == NULL || node->init_value != XML_INIT_DONE) + return false; + + i = XMLNode_search_attribute(node, attr_name, 0); + if (i >= 0) { + pt = node->attributes; + if (pt[i].value != NULL) { + *attr_value = sx_strdup(pt[i].value); + if (*attr_value == NULL) + return false; + } else + *attr_value = NULL; /* NULL but returns 'true' as 'NULL' is the actual attribute value */ + } else if (default_attr_value != NULL) { + *attr_value = sx_strdup(default_attr_value); + if (*attr_value == NULL) + return false; + } else + *attr_value = NULL; + + return true; +} + +int XMLNode_get_attribute_count(const XMLNode* node) +{ + int i, n; + + if (node == NULL || node->init_value != XML_INIT_DONE) + return -1; + + for (i = n = 0; i < node->n_attributes; i++) + if (node->attributes[i].active) n++; + + return n; +} + +int XMLNode_search_attribute(const XMLNode* node, const SXML_CHAR* attr_name, int i_search) +{ + int i; + + if (node == NULL || attr_name == NULL || attr_name[0] == NULC || i_search < 0 || i_search >= node->n_attributes) + return -1; + + for (i = i_search; i < node->n_attributes; i++) + if (node->attributes[i].active && !sx_strcmp(node->attributes[i].name, attr_name)) + return i; + + return -1; +} + +int XMLNode_remove_attribute(XMLNode* node, int i_attr) +{ + XMLAttribute* pt; + if (node == NULL || node->init_value != XML_INIT_DONE || i_attr < 0 || i_attr >= node->n_attributes) + return -1; + + /* Before modifying first see if we run out of memory */ + if (node->n_attributes == 1) + pt = NULL; + else { + pt = (XMLAttribute*)__malloc((node->n_attributes - 1) * sizeof(XMLAttribute)); + if (pt == NULL) + return -1; + } + + /* Can't fail anymore, free item */ + if (node->attributes[i_attr].name != NULL) __free(node->attributes[i_attr].name); + if (node->attributes[i_attr].value != NULL) __free(node->attributes[i_attr].value); + + if (pt != NULL) { + memcpy(pt, node->attributes, i_attr * sizeof(XMLAttribute)); + memcpy(&pt[i_attr], &node->attributes[i_attr + 1], (node->n_attributes - i_attr - 1) * sizeof(XMLAttribute)); + } + if (node->attributes != NULL) + __free(node->attributes); + node->attributes = pt; + node->n_attributes--; + + return node->n_attributes; +} + +int XMLNode_remove_all_attributes(XMLNode* node) +{ + int i; + + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + if (node->attributes != NULL) { + for (i = 0; i < node->n_attributes; i++) { + if (node->attributes[i].name != NULL) + __free(node->attributes[i].name); + if (node->attributes[i].value != NULL) + __free(node->attributes[i].value); + } + __free(node->attributes); + node->attributes = NULL; + } + node->n_attributes = 0; + + return true; +} + +int XMLNode_set_text(XMLNode* node, const SXML_CHAR* text) +{ + SXML_CHAR* p; + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + if (text == NULL) { /* We want to remove it => free node text */ + if (node->text != NULL) { + __free(node->text); + node->text = NULL; + } + + return true; + } + + p = (SXML_CHAR*)__realloc(node->text, (sx_strlen(text) + 1)*sizeof(SXML_CHAR)); /* +1 for '\0' */ + if (p == NULL) + return false; + node->text = p; + + sx_strcpy(node->text, text); + + return true; +} + +int XMLNode_add_child(XMLNode* node, XMLNode* child) +{ + if (node == NULL || child == NULL || node->init_value != XML_INIT_DONE || child->init_value != XML_INIT_DONE) + return false; + + if (_add_node(&node->children, &node->n_children, child) >= 0) { + node->tag_type = TAG_FATHER; + child->father = node; + return true; + } else + return false; +} + +int XMLNode_get_children_count(const XMLNode* node) +{ + int i, n; + + if (node == NULL || node->init_value != XML_INIT_DONE) + return -1; + + for (i = n = 0; i < node->n_children; i++) + if (node->children[i]->active) n++; + + return n; +} + +XMLNode* XMLNode_get_child(const XMLNode* node, int i_child) +{ + int i; + + if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children) + return NULL; + + for (i = 0; i < node->n_children; i++) { + if (!node->children[i]->active) + i_child++; + else if (i == i_child) + return node->children[i]; + } + + return NULL; +} + +int XMLNode_remove_child(XMLNode* node, int i_child, int free_child) +{ + int i; + XMLNode** pt; + + if (node == NULL || node->init_value != XML_INIT_DONE || i_child < 0 || i_child >= node->n_children) + return -1; + + /* Lookup 'i_child'th active child */ + for (i = 0; i < node->n_children; i++) { + if (!node->children[i]->active) + i_child++; + else if (i == i_child) + break; + } + if (i >= node->n_children) + return -1; /* Children is not found */ + + /* Before modifying first see if we run out of memory */ + if (node->n_children == 1) + pt = NULL; + else { + pt = (XMLNode**)__malloc((node->n_children - 1) * sizeof(XMLNode*)); + if (pt == NULL) + return -1; + } + + /* Can't fail anymore, free item */ + (void)XMLNode_free(node->children[i_child]); + if (free_child) + __free(node->children[i_child]); + + if (pt != NULL) { + memcpy(pt, node->children, i_child * sizeof(XMLNode*)); + memcpy(&pt[i_child], &node->children[i_child + 1], (node->n_children - i_child - 1) * sizeof(XMLNode*)); + } + if (node->children != NULL) + __free(node->children); + node->children = pt; + node->n_children--; + if (node->n_children == 0) + node->tag_type = TAG_SELF; + + return node->n_children; +} + +int XMLNode_remove_children(XMLNode* node) +{ + int i; + + if (node == NULL || node->init_value != XML_INIT_DONE) + return false; + + if (node->children != NULL) { + for (i = 0; i < node->n_children; i++) + if (node->children[i] != NULL) { + (void)XMLNode_free(node->children[i]); + __free(node->children[i]); + } + __free(node->children); + node->children = NULL; + } + node->n_children = 0; + + return true; +} + +int XMLNode_equal(const XMLNode* node1, const XMLNode* node2) +{ + int i, j; + + if (node1 == node2) + return true; + + if (node1 == NULL || node2 == NULL || node1->init_value != XML_INIT_DONE || node2->init_value != XML_INIT_DONE) + return false; + + if (sx_strcmp(node1->tag, node2->tag)) + return false; + + /* Test all attributes from 'node1' */ + for (i = 0; i < node1->n_attributes; i++) { + if (!node1->attributes[i].active) + continue; + j = XMLNode_search_attribute(node2, node1->attributes[i].name, 0); + if (j < 0) + return false; + if (sx_strcmp(node1->attributes[i].value, node2->attributes[j].value)) + return false; + } + + /* Test other attributes from 'node2' that might not be in 'node1' */ + for (i = 0; i < node2->n_attributes; i++) { + if (!node2->attributes[i].active) + continue; + j = XMLNode_search_attribute(node1, node2->attributes[i].name, 0); + if (j < 0) + return false; + if (sx_strcmp(node2->attributes[i].name, node1->attributes[j].name)) + return false; + } + + return true; +} + +XMLNode* XMLNode_next_sibling(const XMLNode* node) +{ + int i; + XMLNode* father; + + if (node == NULL || node->init_value != XML_INIT_DONE || node->father == NULL) + return NULL; + + father = node->father; + for (i = 0; i < father->n_children && father->children[i] != node; i++) ; + i++; /* father->children[i] is now 'node' next sibling */ + + return i < father->n_children ? father->children[i] : NULL; +} + +static XMLNode* _XMLNode_next(const XMLNode* node, int in_children) +{ + XMLNode* node2; + + if (node == NULL || node->init_value != XML_INIT_DONE) + return NULL; + + /* Check first child */ + if (in_children && node->n_children > 0) + return node->children[0]; + + /* Check next sibling */ + if ((node2 = XMLNode_next_sibling(node)) != NULL) + return node2; + + /* Check next uncle */ + return _XMLNode_next(node->father, false); +} + +XMLNode* XMLNode_next(const XMLNode* node) +{ + return _XMLNode_next(node, true); +} + +/* --- XMLDoc methods --- */ + +int XMLDoc_init(XMLDoc* doc) +{ + if (doc == NULL) + return false; + + doc->filename[0] = NULC; +#ifdef SXMLC_UNICODE + memset(&doc->bom, 0, sizeof(doc->bom)); +#endif + doc->nodes = NULL; + doc->n_nodes = 0; + doc->i_root = -1; + doc->init_value = XML_INIT_DONE; + + return true; +} + +int XMLDoc_free(XMLDoc* doc) +{ + int i; + + if (doc == NULL || doc->init_value != XML_INIT_DONE) + return false; + + for (i = 0; i < doc->n_nodes; i++) { + (void)XMLNode_free(doc->nodes[i]); + __free(doc->nodes[i]); + } + __free(doc->nodes); + doc->nodes = NULL; + doc->n_nodes = 0; + doc->i_root = -1; + + return true; +} + +int XMLDoc_set_root(XMLDoc* doc, int i_root) +{ + if (doc == NULL || doc->init_value != XML_INIT_DONE || i_root < 0 || i_root >= doc->n_nodes) + return false; + + doc->i_root = i_root; + + return true; +} + +int XMLDoc_add_node(XMLDoc* doc, XMLNode* node) +{ + if (doc == NULL || node == NULL || doc->init_value != XML_INIT_DONE) + return -1; + + if (_add_node(&doc->nodes, &doc->n_nodes, node) < 0) + return -1; + + if (node->tag_type == TAG_FATHER) + doc->i_root = doc->n_nodes - 1; /* Main root node is the last father node */ + + return doc->n_nodes; +} + +int XMLDoc_remove_node(XMLDoc* doc, int i_node, int free_node) +{ + XMLNode** pt; + if (doc == NULL || doc->init_value != XML_INIT_DONE || i_node < 0 || i_node > doc->n_nodes) + return false; + + /* Before modifying first see if we run out of memory */ + if (doc->n_nodes == 1) + pt = NULL; + else { + pt = (XMLNode**)__malloc((doc->n_nodes - 1) * sizeof(XMLNode*)); + if (pt == NULL) + return false; + } + + /* Can't fail anymore, free item */ + (void)XMLNode_free(doc->nodes[i_node]); + if (free_node) __free(doc->nodes[i_node]); + + if (pt != NULL) { + memcpy(pt, &doc->nodes[i_node], i_node * sizeof(XMLNode*)); + memcpy(&pt[i_node], &doc->nodes[i_node + 1], (doc->n_nodes - i_node - 1) * sizeof(XMLNode*)); + } + + if (doc->nodes != NULL) + __free(doc->nodes); + doc->nodes = pt; + doc->n_nodes--; + + return true; +} + +/* + Helper functions to print formatting before a new tag. + Returns the new number of characters in the line. + */ +static int _count_new_char_line(const SXML_CHAR* str, int nb_char_tab, int cur_sz_line) +{ + for (; *str; str++) { + if (*str == C2SX('\n')) + cur_sz_line = 0; + else if (*str == C2SX('\t')) + cur_sz_line += nb_char_tab; + else + cur_sz_line++; + } + + return cur_sz_line; +} +static int _print_formatting(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, int nb_char_tab, int cur_sz_line) +{ + if (tag_sep != NULL) { + sx_fprintf(f, tag_sep); + cur_sz_line = _count_new_char_line(tag_sep, nb_char_tab, cur_sz_line); + } + if (child_sep != NULL) { + for (node = node->father; node != NULL; node = node->father) { + sx_fprintf(f, child_sep); + cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line); + } + } + + return cur_sz_line; +} + +static int _XMLNode_print_header(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int sz_line, int cur_sz_line, int nb_char_tab) +{ + int i; + SXML_CHAR* p; + + if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC) + return -1; + + /* Special handling of DOCTYPE */ + if (node->tag_type == TAG_DOCTYPE) { + /* Search for an unescaped '[' in the DOCTYPE definition, in which case the end delimiter should be ']>' instead of '>' */ + for (p = sx_strchr(node->tag, C2SX('[')); p != NULL && *(p-1) == C2SX('\\'); p = sx_strchr(p+1, C2SX('['))) ; + cur_sz_line += sx_fprintf(f, C2SX(""), node->tag, p != NULL ? C2SX("]") : C2SX("")); + return cur_sz_line; + } + + /* Check for special tags first */ + for (i = 0; i < NB_SPECIAL_TAGS; i++) { + if (node->tag_type == _spec[i].tag_type) { + sx_fprintf(f, C2SX("%s%s%s"), _spec[i].start, node->tag, _spec[i].end); + cur_sz_line += sx_strlen(_spec[i].start) + sx_strlen(node->tag) + sx_strlen(_spec[i].end); + return cur_sz_line; + } + } + + /* Check for user tags */ + for (i = 0; i < _user_tags.n_tags; i++) { + if (node->tag_type == _user_tags.tags[i].tag_type) { + sx_fprintf(f, C2SX("%s%s%s"), _user_tags.tags[i].start, node->tag, _user_tags.tags[i].end); + cur_sz_line += sx_strlen(_user_tags.tags[i].start) + sx_strlen(node->tag) + sx_strlen(_user_tags.tags[i].end); + return cur_sz_line; + } + } + + /* Print tag name */ + cur_sz_line += sx_fprintf(f, C2SX("<%s"), node->tag); + + /* Print attributes */ + for (i = 0; i < node->n_attributes; i++) { + if (!node->attributes[i].active) + continue; + cur_sz_line += sx_strlen(node->attributes[i].name) + sx_strlen(node->attributes[i].value) + 3; + if (sz_line > 0 && cur_sz_line > sz_line) { + cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); + /* Add extra separator, as if new line was a child of the previous one */ + if (child_sep != NULL) { + sx_fprintf(f, child_sep); + cur_sz_line = _count_new_char_line(child_sep, nb_char_tab, cur_sz_line); + } + } + /* Attribute name */ + if (attr_sep != NULL) { + sx_fprintf(f, attr_sep); + cur_sz_line = _count_new_char_line(attr_sep, nb_char_tab, cur_sz_line); + sx_fprintf(f, C2SX("%s="), node->attributes[i].name); + } else + sx_fprintf(f, C2SX(" %s="), node->attributes[i].name); + + /* Attribute value */ + (void)sx_fputc(XML_DEFAULT_QUOTE, f); + cur_sz_line += fprintHTML(f, node->attributes[i].value) + 2; + (void)sx_fputc(XML_DEFAULT_QUOTE, f); + } + + /* End the tag if there are no children and no text */ + if (node->n_children == 0 && (node->text == NULL || node->text[0] == NULC)) { + cur_sz_line += sx_fprintf(f, C2SX("/>")); + } else { + (void)sx_fputc(C2SX('>'), f); + cur_sz_line++; + } + + return cur_sz_line; +} + +int XMLNode_print_header(const XMLNode* node, FILE* f, int sz_line, int nb_char_tab) +{ + return _XMLNode_print_header(node, f, NULL, NULL, NULL, sz_line, 0, nb_char_tab) < 0 ? false : true; +} + +static int _XMLNode_print(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int cur_sz_line, int nb_char_tab, int depth) +{ + int i; + SXML_CHAR* p; + + if (node != NULL && node->tag_type==TAG_TEXT) { /* Text has to be printed: check if it is only spaces */ + if (!keep_text_spaces) { + for (p = node->text; *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */ + } else + p = node->text; /* '*p' won't be '\0' */ + if (*p != NULC) + cur_sz_line += fprintHTML(f, node->text); + return cur_sz_line; + } + + if (node == NULL || f == NULL || !node->active || node->tag == NULL || node->tag[0] == NULC) + return -1; + + if (nb_char_tab <= 0) + nb_char_tab = 1; + + /* Print formatting */ + if (depth < 0) /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n" when pretty-printing) */ + depth = 0; + else + cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); + + _XMLNode_print_header(node, f, tag_sep, child_sep, attr_sep, sz_line, cur_sz_line, nb_char_tab); + + if (node->text != NULL && node->text[0] != NULC) { + /* Text has to be printed: check if it is only spaces */ + if (!keep_text_spaces) { + for (p = node->text; *p != NULC && sx_isspace(*p); p++) ; /* 'p' points to first non-space character, or to '\0' if only spaces */ + } else + p = node->text; /* '*p' won't be '\0' */ + if (*p != NULC) cur_sz_line += fprintHTML(f, node->text); + } else if (node->n_children <= 0) /* Everything has already been printed */ + return cur_sz_line; + + /* Recursively print children */ + for (i = 0; i < node->n_children; i++) + (void)_XMLNode_print(node->children[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth+1); + + /* Print tag end after children */ + /* Print formatting */ + if (node->n_children > 0) + cur_sz_line = _print_formatting(node, f, tag_sep, child_sep, nb_char_tab, cur_sz_line); + cur_sz_line += sx_fprintf(f, C2SX(""), node->tag); + + return cur_sz_line; +} + +int XMLNode_print_attr_sep(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab) +{ + return _XMLNode_print(node, f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, 0, nb_char_tab, 0); +} + +int XMLDoc_print_attr_sep(const XMLDoc* doc, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab) +{ + int i, depth, cur_sz_line; + + if (doc == NULL || f == NULL || doc->init_value != XML_INIT_DONE) + return false; + +#ifdef SXMLC_UNICODE + /* Write BOM if it exist */ + if (doc->sz_bom > 0) fwrite(doc->bom, sizeof(unsigned char), doc->sz_bom, f); +#endif + + depth = -1; /* UGLY HACK: 'depth' forced negative on very first line so we don't print an extra 'tag_sep' (usually "\n") */ + for (i = 0, cur_sz_line = 0; i < doc->n_nodes; i++) { + cur_sz_line = _XMLNode_print(doc->nodes[i], f, tag_sep, child_sep, attr_sep, keep_text_spaces, sz_line, cur_sz_line, nb_char_tab, depth); + depth = 0; + } + /* TODO: Find something more graceful than 'depth=-1', even though everyone knows I probably never will ;) */ + + return true; +} + +/* --- */ + +int XML_parse_attribute_to(const SXML_CHAR* str, int to, XMLAttribute* xmlattr) +{ + const SXML_CHAR *p; + int i, n0, n1, remQ = 0; + int ret = 1; + SXML_CHAR quote; + + if (str == NULL || xmlattr == NULL) + return 0; + + if (to < 0) + to = sx_strlen(str) - 1; + + /* Search for the '=' */ + /* 'n0' is where the attribute name stops, 'n1' is where the attribute value starts */ + for (n0 = 0; n0 != to && str[n0] != C2SX('=') && !sx_isspace(str[n0]); n0++) ; /* Search for '=' or a space */ + for (n1 = n0; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */ + if (str[n1] != C2SX('=')) + return 0; /* '=' not found: malformed string */ + for (n1++; n1 != to && sx_isspace(str[n1]); n1++) ; /* Search for something not a space */ + if (isquote(str[n1])) { /* Remove quotes */ + quote = str[n1]; + remQ = 1; + } + + xmlattr->name = (SXML_CHAR*)__malloc((n0+1)*sizeof(SXML_CHAR)); + xmlattr->value = (SXML_CHAR*)__malloc((to+1 - n1 - remQ + 1) * sizeof(SXML_CHAR)); + xmlattr->active = true; + if (xmlattr->name != NULL && xmlattr->value != NULL) { + /* Copy name */ + sx_strncpy(xmlattr->name, str, n0); + xmlattr->name[n0] = NULC; + /* (void)str_unescape(xmlattr->name); do not unescape the name */ + /* Copy value (p starts after the quote (if any) and stops at the end of 'str' + (skipping the quote if any, hence the '*(p+remQ)') */ + for (i = 0, p = str + n1 + remQ; i + n1 + remQ < to && *(p+remQ) != NULC; i++, p++) + xmlattr->value[i] = *p; + xmlattr->value[i] = NULC; + (void)html2str(xmlattr->value, NULL); /* Convert HTML escape sequences, do not str_unescape(xmlattr->value) */ + if (remQ && *p != quote) + ret = 2; /* Quote at the beginning but not at the end: probable presence of '>' inside attribute value, so we need to read more data! */ + } else + ret = 0; + + if (ret == 0) { + if (xmlattr->name != NULL) { + __free(xmlattr->name); + xmlattr->name = NULL; + } + if (xmlattr->value != NULL) { + __free(xmlattr->value); + xmlattr->value = NULL; + } + } + + return ret; +} + +static TagType _parse_special_tag(const SXML_CHAR* str, int len, _TAG* tag, XMLNode* node) +{ + if (sx_strncmp(str, tag->start, tag->len_start)) + return TAG_NONE; + + if (sx_strncmp(str + len - tag->len_end, tag->end, tag->len_end)) /* There probably is a '>' inside the tag */ + return TAG_PARTIAL; + + node->tag = (SXML_CHAR*)__malloc((len - tag->len_start - tag->len_end + 1)*sizeof(SXML_CHAR)); + if (node->tag == NULL) + return TAG_NONE; + sx_strncpy(node->tag, str + tag->len_start, len - tag->len_start - tag->len_end); + node->tag[len - tag->len_start - tag->len_end] = NULC; + node->tag_type = tag->tag_type; + + return node->tag_type; +} + +/* + Reads a string that is supposed to be an xml tag like '' or ''. + Fills the 'xmlnode' structure with the tag name and its attributes. + Returns 'TAG_ERROR' if an error occurred (malformed 'str' or memory). 'TAG_*' when string is recognized. + */ +TagType XML_parse_1string(const SXML_CHAR* str, XMLNode* xmlnode) +{ + SXML_CHAR *p; + XMLAttribute* pt; + int n, nn, len, rc, tag_end = 0; + + if (str == NULL || xmlnode == NULL) + return TAG_ERROR; + len = sx_strlen(str); + + /* Check for malformed string */ + if (str[0] != C2SX('<') || str[len-1] != C2SX('>')) + return TAG_ERROR; + + for (nn = 0; nn < NB_SPECIAL_TAGS; nn++) { + n = (int)_parse_special_tag(str, len, &_spec[nn], xmlnode); + switch (n) { + case TAG_NONE: break; /* Nothing found => do nothing */ + default: return (TagType)n; /* Tag found => return it */ + } + } + + /* "" instead of ">" if a '[' is found inside */ + if (str[1] == C2SX('!')) { + /* DOCTYPE */ + if (!sx_strncmp(str, C2SX("" tag end */ + nn = 0; + if (str[n]) { /* '[' was found */ + if (sx_strncmp(str+len-2, C2SX("]>"), 2)) /* There probably is a '>' inside the DOCTYPE */ + return TAG_PARTIAL; + nn = 1; + } + xmlnode->tag = (SXML_CHAR*)__malloc((len - 9 - nn)*sizeof(SXML_CHAR)); /* 'len' - "" + '\0' */ + if (xmlnode->tag == NULL) + return TAG_ERROR; + sx_strncpy(xmlnode->tag, &str[9], len - 10 - nn); + xmlnode->tag[len - 10 - nn] = NULC; + xmlnode->tag_type = TAG_DOCTYPE; + + return TAG_DOCTYPE; + } + } + + /* Test user tags */ + for (nn = 0; nn < _user_tags.n_tags; nn++) { + n = _parse_special_tag(str, len, &_user_tags.tags[nn], xmlnode); + switch (n) { + case TAG_ERROR: return TAG_NONE; /* Error => exit */ + case TAG_NONE: break; /* Nothing found => do nothing */ + default: return (TagType)n; /* Tag found => return it */ + } + } + + if (str[1] == C2SX('/')) + tag_end = 1; + + /* tag starts at index 1 (or 2 if tag end) and ends at the first space or '/>' */ + for (n = 1 + tag_end; str[n] != NULC && str[n] != C2SX('>') && str[n] != C2SX('/') && !sx_isspace(str[n]); n++) ; + xmlnode->tag = (SXML_CHAR*)__malloc((n - tag_end)*sizeof(SXML_CHAR)); + if (xmlnode->tag == NULL) + return TAG_ERROR; + sx_strncpy(xmlnode->tag, &str[1 + tag_end], n - 1 - tag_end); + xmlnode->tag[n - 1 - tag_end] = NULC; + if (tag_end) { + xmlnode->tag_type = TAG_END; + return TAG_END; + } + + /* Here, 'n' is the position of the first space after tag name */ + while (n < len) { + /* Skips spaces */ + while (sx_isspace(str[n])) n++; + + /* Check for XML end ('>' or '/>') */ + if (str[n] == C2SX('>')) { /* Tag with children */ + int type = (str[n-1] == '/' ? TAG_SELF : TAG_FATHER); // TODO: Find something better to cope with + xmlnode->tag_type = type; + return type; + } + if (!sx_strcmp(str+n, C2SX("/>"))) { /* Tag without children */ + xmlnode->tag_type = TAG_SELF; + return TAG_SELF; + } + + /* New attribute found */ + p = sx_strchr(str+n, C2SX('=')); + if (p == NULL) goto parse_err; + pt = (XMLAttribute*)__realloc(xmlnode->attributes, (xmlnode->n_attributes + 1) * sizeof(XMLAttribute)); + if (pt == NULL) goto parse_err; + + pt[xmlnode->n_attributes].name = NULL; + pt[xmlnode->n_attributes].value = NULL; + pt[xmlnode->n_attributes].active = false; + xmlnode->n_attributes++; + xmlnode->attributes = pt; + while (*p != NULC && sx_isspace(*++p)) ; /* Skip spaces */ + if (isquote(*p)) { /* Attribute value starts with a quote, look for next one, ignoring protected ones with '\' */ + for (nn = p-str+1; str[nn] && str[nn] != *p; nn++) { // CHECK UNICODE "nn = p-str+1" + /* if (str[nn] == C2SX('\\')) nn++; [bugs:#7]: '\' is valid in values */ + } + } else { /* Attribute value stops at first space or end of XML string */ + for (nn = p-str+1; str[nn] != NULC && !sx_isspace(str[nn]) && str[nn] != C2SX('/') && str[nn] != C2SX('>'); nn++) ; /* Go to the end of the attribute value */ // CHECK UNICODE + } + + /* Here 'str[nn]' is the character after value */ + /* the attribute definition ('attrName="attrVal"') is between 'str[n]' and 'str[nn]' */ + rc = XML_parse_attribute_to(&str[n], nn - n, &xmlnode->attributes[xmlnode->n_attributes - 1]); + if (!rc) goto parse_err; + if (rc == 2) { /* Probable presence of '>' inside attribute value, which is legal XML. Remove attribute to re-parse it later */ + XMLNode_remove_attribute(xmlnode, xmlnode->n_attributes - 1); + return TAG_PARTIAL; + } + + n = nn + 1; + } + + sx_fprintf(stderr, C2SX("\nWE SHOULD NOT BE HERE!\n[%s]\n\n"), str); + +parse_err: + (void)XMLNode_free(xmlnode); + + return TAG_ERROR; +} + +static int _parse_data_SAX(void* in, const DataSourceType in_type, const SAX_Callbacks* sax, SAX_Data* sd) +{ + SXML_CHAR *line, *txt_end, *p; + XMLNode node; + int ret, exit, sz, n0, ncr; + TagType tag_type; + int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))feof); + + if (sax->start_doc != NULL && !sax->start_doc(sd)) + return true; + if (sax->all_event != NULL && !sax->all_event(XML_EVENT_START_DOC, NULL, (SXML_CHAR*)sd->name, 0, sd)) + return true; + + ret = true; + exit = false; + sd->line_num = 1; /* Line counter, starts at 1 */ + sz = 0; /* 'line' buffer size */ + (void)XMLNode_init(&node); + while ((n0 = read_line_alloc(in, in_type, &line, &sz, 0, NULC, C2SX('>'), true, C2SX('\n'), &ncr)) != 0) { + (void)XMLNode_free(&node); + for (p = line; *p != NULC && sx_isspace(*p); p++) ; /* Checks if text is only spaces */ + if (*p == NULC) + break; + sd->line_num += ncr; + + /* Get text for 'father' (i.e. what is before '<') */ + while ((txt_end = sx_strchr(line, C2SX('<'))) == NULL) { /* '<' was not found, indicating a probable '>' inside text (should have been escaped with '>' but we'll handle that ;) */ + n0 = read_line_alloc(in, in_type, &line, &sz, n0, 0, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */ + sd->line_num += ncr; + if (!n0) { + ret = false; + if (sax->on_error == NULL && sax->all_event == NULL) + sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num); + else { + if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd)) + break; + if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd)) + break; + } + break; /* 'txt_end' is still NULL here so we'll display the syntax error below */ + } + } + if (txt_end == NULL) { /* Missing tag start */ + ret = false; + if (sax->on_error == NULL && sax->all_event == NULL) + sx_fprintf(stderr, C2SX("%s:%d: ERROR: Unexpected end character '>', without matching '<'!\n"), sd->name, sd->line_num); + else { + if (sax->on_error != NULL && !sax->on_error(PARSE_ERR_UNEXPECTED_TAG_END, sd->line_num, sd)) + break; + if (sax->all_event != NULL && !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_UNEXPECTED_TAG_END, sd)) + break; + } + break; + } + /* First part of 'line' (before '<') is to be added to 'father->text' */ + *txt_end = NULC; /* Have 'line' be the text for 'father' */ + if (*line != NULC && (sax->new_text != NULL || sax->all_event != NULL)) { + if (sax->new_text != NULL && (exit = !sax->new_text(line, sd))) /* no str_unescape(line) */ + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_TEXT, NULL, line, sd->line_num, sd))) + break; + } + *txt_end = '<'; /* Restores tag start */ + + switch (tag_type = XML_parse_1string(txt_end, &node)) { + case TAG_ERROR: /* Memory error */ + ret = false; + if (sax->on_error == NULL && sax->all_event == NULL) + sx_fprintf(stderr, C2SX("%s:%d: MEMORY ERROR.\n"), sd->name, sd->line_num); + else { + if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_MEMORY, sd->line_num, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_MEMORY, sd))) + break; + } + break; + + case TAG_NONE: /* Syntax error */ + ret = false; + p = sx_strchr(txt_end, C2SX('\n')); + if (p != NULL) + *p = NULC; + if (sax->on_error == NULL && sax->all_event == NULL) { + sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR (%s%s).\n"), sd->name, sd->line_num, txt_end, p == NULL ? C2SX("") : C2SX("...")); + if (p != NULL) + *p = C2SX('\n'); + } else { + if (sax->on_error != NULL && (exit = !sax->on_error(PARSE_ERR_SYNTAX, sd->line_num, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, PARSE_ERR_SYNTAX, sd))) + break; + } + break; + + case TAG_END: + if (sax->end_node != NULL || sax->all_event != NULL) { + if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd))) + break; + } + break; + + default: /* Add 'node' to 'father' children */ + /* If the line looks like a comment (or CDATA) but is not properly finished, loop until we find the end. */ + while (tag_type == TAG_PARTIAL) { + n0 = read_line_alloc(in, in_type, &line, &sz, n0, NULC, C2SX('>'), true, C2SX('\n'), &ncr); /* Go on reading the file from current position until next '>' */ + sd->line_num += ncr; + if (n0 == 0) { + ret = false; + if (sax->on_error == NULL && sax->all_event == NULL) + sx_fprintf(stderr, C2SX("%s:%d: SYNTAX ERROR.\n"), sd->name, sd->line_num); + else { + if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd->line_num, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_MEMORY, sd))) + break; + } + break; + } + txt_end = sx_strchr(line, C2SX('<')); /* In case 'line' has been moved by the '__realloc' in 'read_line_alloc' */ + tag_type = XML_parse_1string(txt_end, &node); + if (tag_type == TAG_ERROR) { + ret = false; + if (sax->on_error == NULL && sax->all_event == NULL) + sx_fprintf(stderr, C2SX("%s:%d: PARSE ERROR.\n"), sd->name, sd->line_num); + else { + if (sax->on_error != NULL && (exit = !sax->on_error(meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd->line_num, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_ERROR, NULL, (SXML_CHAR*)sd->name, meos(in) ? PARSE_ERR_EOF : PARSE_ERR_SYNTAX, sd))) + break; + } + break; + } + } + if (ret == false) + break; + if (sax->start_node != NULL && (exit = !sax->start_node(&node, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_START_NODE, &node, NULL, sd->line_num, sd))) + break; + if (node.tag_type != TAG_FATHER && (sax->end_node != NULL || sax->all_event != NULL)) { + if (sax->end_node != NULL && (exit = !sax->end_node(&node, sd))) + break; + if (sax->all_event != NULL && (exit = !sax->all_event(XML_EVENT_END_NODE, &node, NULL, sd->line_num, sd))) + break; + } + break; + } + if (exit == true || ret == false || meos(in)) + break; + } + __free(line); + (void)XMLNode_free(&node); + + if (sax->end_doc != NULL && !sax->end_doc(sd)) + return ret; + if (sax->all_event != NULL) + (void)sax->all_event(XML_EVENT_END_DOC, NULL, (SXML_CHAR*)sd->name, sd->line_num, sd); + + return ret; +} + +int SAX_Callbacks_init(SAX_Callbacks* sax) +{ + if (sax == NULL) + return false; + + sax->start_doc = NULL; + sax->start_node = NULL; + sax->end_node = NULL; + sax->new_text = NULL; + sax->on_error = NULL; + sax->end_doc = NULL; + sax->all_event = NULL; + + return true; +} + +int DOMXMLDoc_doc_start(SAX_Data* sd) +{ + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + + dom->current = NULL; + dom->error = PARSE_ERR_NONE; + dom->line_error = 0; + + return true; +} + +int DOMXMLDoc_node_start(const XMLNode* node, SAX_Data* sd) +{ + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + XMLNode* new_node; + int i; + + if ((new_node = XMLNode_dup(node, true)) == NULL) goto node_start_err; /* No real need to put 'true' for 'XMLNode_dup', but cleaner */ + + if (dom->current == NULL) { + if ((i = _add_node(&dom->doc->nodes, &dom->doc->n_nodes, new_node)) < 0) goto node_start_err; + + if (dom->doc->i_root < 0 && (node->tag_type == TAG_FATHER || node->tag_type == TAG_SELF)) + dom->doc->i_root = i; + } else { + if (_add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) goto node_start_err; + } + + new_node->father = dom->current; + dom->current = new_node; + + return true; + +node_start_err: + dom->error = PARSE_ERR_MEMORY; + dom->line_error = sd->line_num; + (void)XMLNode_free(new_node); + __free(new_node); + + return false; +} + +int DOMXMLDoc_node_end(const XMLNode* node, SAX_Data* sd) +{ + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + + if (dom->current == NULL || sx_strcmp(dom->current->tag, node->tag)) { + sx_fprintf(stderr, C2SX("%s:%d: ERROR - End tag was unexpected"), sd->name, sd->line_num, node->tag); + if (dom->current != NULL) + sx_fprintf(stderr, C2SX(" ( was expected)\n"), dom->current->tag); + else + sx_fprintf(stderr, C2SX(" (no node to end)\n")); + + dom->error = PARSE_ERR_UNEXPECTED_NODE_END; + dom->line_error = sd->line_num; + + return false; + } + + dom->current = dom->current->father; + + return true; +} + +int DOMXMLDoc_node_text(SXML_CHAR* text, SAX_Data* sd) +{ + SXML_CHAR* p = text; + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + + /* Keep text, even if it is only spaces */ +#if 0 + while(*p != NULC && sx_isspace(*p++)) ; + if (*p == NULC) return true; /* Only spaces */ +#endif + + /* If there is no current node to add text to, raise an error, except if text is only spaces, in which case it is probably just formatting */ + if (dom->current == NULL) { + while(*p != NULC && sx_isspace(*p++)) ; + if (*p == NULC) /* Only spaces => probably pretty-printing */ + return true; + dom->error = PARSE_ERR_TEXT_OUTSIDE_NODE; + dom->line_error = sd->line_num; + return false; /* There is some "real" text => raise an error */ + } + + if (dom->text_as_nodes) { + XMLNode* new_node = XMLNode_allocN(1); + if (new_node == NULL || (new_node->text = sx_strdup(text)) == NULL + || _add_node(&dom->current->children, &dom->current->n_children, new_node) < 0) { + dom->error = PARSE_ERR_MEMORY; + dom->line_error = sd->line_num; + (void)XMLNode_free(new_node); + __free(new_node); + return false; + } + new_node->tag_type = TAG_TEXT; + new_node->father = dom->current; + //dom->current->tag_type = TAG_FATHER; // OS: should parent field be forced to be TAG_FATHER? now it has at least one TAG_TEXT child. I decided not to enforce this to enforce backward-compatibility related to tag_types + return true; + } else { /* Old behaviour: concatenate text to the previous one */ + /* 'p' will point at the new text */ + if (dom->current->text == NULL) { + p = sx_strdup(text); + } else { + p = (SXML_CHAR*)__realloc(dom->current->text, (sx_strlen(dom->current->text) + sx_strlen(text) + 1)*sizeof(SXML_CHAR)); + if (p != NULL) + sx_strcat(p, text); + } + if (p == NULL) { + dom->error = PARSE_ERR_MEMORY; + dom->line_error = sd->line_num; + return false; + } + + dom->current->text = p; + } + + return true; +} + +int DOMXMLDoc_parse_error(ParseError error_num, int line_number, SAX_Data* sd) +{ + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + + dom->error = error_num; + dom->line_error = line_number; + + /* Complete error message will be displayed in 'DOMXMLDoc_doc_end' callback */ + + return false; /* Stop on error */ +} + +int DOMXMLDoc_doc_end(SAX_Data* sd) +{ + DOM_through_SAX* dom = (DOM_through_SAX*)sd->user; + + if (dom->error != PARSE_ERR_NONE) { + SXML_CHAR* msg; + + switch (dom->error) { + case PARSE_ERR_MEMORY: msg = C2SX("MEMORY"); break; + case PARSE_ERR_UNEXPECTED_TAG_END: msg = C2SX("UNEXPECTED_TAG_END"); break; + case PARSE_ERR_SYNTAX: msg = C2SX("SYNTAX"); break; + case PARSE_ERR_EOF: msg = C2SX("UNEXPECTED_END_OF_FILE"); break; + case PARSE_ERR_TEXT_OUTSIDE_NODE: msg = C2SX("TEXT_OUTSIDE_NODE"); break; + case PARSE_ERR_UNEXPECTED_NODE_END: msg = C2SX("UNEXPECTED_NODE_END"); break; + default: msg = C2SX("UNKNOWN"); break; + } + sx_fprintf(stderr, C2SX("%s:%d: An error was found (%s), loading aborted...\n"), sd->name, dom->line_error, msg); + dom->current = NULL; + (void)XMLDoc_free(dom->doc); + dom->doc = NULL; + } + + return true; +} + +int SAX_Callbacks_init_DOM(SAX_Callbacks* sax) +{ + if (sax == NULL) + return false; + + sax->start_doc = DOMXMLDoc_doc_start; + sax->start_node = DOMXMLDoc_node_start; + sax->end_node = DOMXMLDoc_node_end; + sax->new_text = DOMXMLDoc_node_text; + sax->on_error = DOMXMLDoc_parse_error; + sax->end_doc = DOMXMLDoc_doc_end; + sax->all_event = NULL; + + return true; +} + +int XMLDoc_parse_file_SAX(const SXML_CHAR* filename, const SAX_Callbacks* sax, void* user) +{ + FILE* f; + int ret; + SAX_Data sd; + SXML_CHAR* fmode = +#ifndef SXMLC_UNICODE + C2SX("rt"); +#else + C2SX("rb"); /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */ + BOM_TYPE bom; +#endif + + + if (sax == NULL || filename == NULL || filename[0] == NULC) + return false; + + f = sx_fopen(filename, fmode); + if (f == NULL) + return false; + /* Microsoft' 'ftell' returns invalid position for Unicode text files + (see http://connect.microsoft.com/VisualStudio/feedback/details/369265/ftell-ftell-nolock-incorrectly-handling-unicode-text-translation) + However, we're opening the file as binary in Unicode so we don't fall into that case... + */ + #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64)) + //setvbuf(f, NULL, _IONBF, 0); + #endif + + sd.name = (SXML_CHAR*)filename; + sd.user = user; +#ifdef SXMLC_UNICODE + bom = freadBOM(f, NULL, NULL); /* Skip BOM, if any */ + /* In Unicode, re-open the file in text-mode if there is no BOM (or UTF-8) as we assume that + the file is "plain" text (i.e. 1 byte = 1 character). If opened in binary mode, 'fgetwc' + would read 2 bytes for 1 character, which would not work on "plain" files. */ + if (bom == BOM_NONE || bom == BOM_UTF_8) { + fclose(f); + f = sx_fopen(filename, C2SX("rt")); + if (f == NULL) + return false; + if (bom == BOM_UTF_8) + freadBOM(f, NULL, NULL); /* Skip the UTF-8 BOM that was found */ + } +#endif + ret = _parse_data_SAX((void*)f, DATA_SOURCE_FILE, sax, &sd); + (void)fclose(f); + + return ret; +} + +int XMLDoc_parse_buffer_SAX(const SXML_CHAR* buffer, const SXML_CHAR* name, const SAX_Callbacks* sax, void* user) +{ + DataSourceBuffer dsb = { buffer, 0 }; + SAX_Data sd; + + if (sax == NULL || buffer == NULL) + return false; + + sd.name = name; + sd.user = user; + return _parse_data_SAX((void*)&dsb, DATA_SOURCE_BUFFER, sax, &sd); +} + +int XMLDoc_parse_file_DOM_text_as_nodes(const SXML_CHAR* filename, XMLDoc* doc, int text_as_nodes) +{ + DOM_through_SAX dom; + SAX_Callbacks sax; + + if (doc == NULL || filename == NULL || filename[0] == NULC || doc->init_value != XML_INIT_DONE) + return false; + + sx_strncpy(doc->filename, filename, SXMLC_MAX_PATH - 1); + doc->filename[SXMLC_MAX_PATH - 1] = NULC; + + /* Read potential BOM on file, only when unicode is defined */ +#ifdef SXMLC_UNICODE + { + /* In Unicode, open the file as binary so that further 'fgetwc' read all bytes */ + FILE* f = sx_fopen(filename, C2SX("rb")); + if (f != NULL) { + #if defined(SXMLC_UNICODE) && (defined(WIN32) || defined(WIN64)) + //setvbuf(f, NULL, _IONBF, 0); + #endif + doc->bom_type = freadBOM(f, doc->bom, &doc->sz_bom); + fclose(f); + } + } +#endif + + dom.doc = doc; + dom.current = NULL; + dom.text_as_nodes = text_as_nodes; + SAX_Callbacks_init_DOM(&sax); + + if (!XMLDoc_parse_file_SAX(filename, &sax, &dom)) { + (void)XMLDoc_free(doc); + dom.doc = NULL; + return false; + } + + return true; +} + +int XMLDoc_parse_buffer_DOM_text_as_nodes(const SXML_CHAR* buffer, const SXML_CHAR* name, XMLDoc* doc, int text_as_nodes) +{ + DOM_through_SAX dom; + SAX_Callbacks sax; + + if (doc == NULL || buffer == NULL || doc->init_value != XML_INIT_DONE) + return false; + + dom.doc = doc; + dom.current = NULL; + dom.text_as_nodes = text_as_nodes; + SAX_Callbacks_init_DOM(&sax); + + return XMLDoc_parse_buffer_SAX(buffer, name, &sax, &dom) ? true : XMLDoc_free(doc); +} + + + +/* --- Utility functions (ex sxmlutils.c) --- */ + +#ifdef DBG_MEM +static int nb_alloc = 0, nb_free = 0; + +void* __malloc(size_t sz) +{ + void* p = malloc(sz); + if (p != NULL) + nb_alloc++; + printf("0x%x: MALLOC (%d) - NA %d - NF %d = %d\n", p, sz, nb_alloc, nb_free, nb_alloc - nb_free); + return p; +} + +void* __calloc(size_t count, size_t sz) +{ + void* p = calloc(count, sz); + if (p != NULL) + nb_alloc++; + printf("0x%x: CALLOC (%d, %d) - NA %d - NF %d = %d\n", p, count, sz, nb_alloc, nb_free, nb_alloc - nb_free); + return p; +} + +void* __realloc(void* mem, size_t sz) +{ + void* p = realloc(mem, sz); + if (mem == NULL && p != NULL) + nb_alloc++; + else if (mem != NULL && sz == 0) + nb_free++; + printf("0x%x: REALLOC 0x%x (%d)", p, mem, sz); + if (mem == NULL) + printf(" - NA %d - NF %d = %d", nb_alloc, nb_free, nb_alloc - nb_free); + printf("\n"); + return p; +} + +void __free(void* mem) +{ + nb_free++; + printf("0x%x: FREE - NA %d - NF %d = %d\n", mem, nb_alloc, nb_free, nb_alloc - nb_free); + free(mem); +} + +char* __strdup(const char* s) +{ +/* Mimic the behavior of sx_strdup(), as we can't use it directly here: DBG_MEM is defined + and sx_strdup is this function! (bug #5) */ +#ifdef SXMLC_UNICODE + char* p = wcsdup(s); +#else + char* p = strdup(s); +#endif + if (p != NULL) + nb_alloc++; + printf("0x%x: STRDUP (%d) - NA %d - NF %d = %d\n", p, sx_strlen(s), nb_alloc, nb_free, nb_alloc - nb_free); + return p; +} +#endif + +/* Dictionary of special characters and their HTML equivalent */ +static struct _html_special_dict { + SXML_CHAR chr; /* Original character */ + SXML_CHAR* html; /* Equivalent HTML string */ + int html_len; /* 'sx_strlen(html)' */ +} HTML_SPECIAL_DICT[] = { + { C2SX('<'), C2SX("<"), 4 }, + { C2SX('>'), C2SX(">"), 4 }, + { C2SX('"'), C2SX("""), 6 }, + { C2SX('\''), C2SX("'"), 6 }, + { C2SX('&'), C2SX("&"), 5 }, + { NULC, NULL, 0 }, /* Terminator */ +}; + +int _bgetc(DataSourceBuffer* ds) +{ + if (ds == NULL || ds->buf[ds->cur_pos] == NULC) + return EOF; + + return (int)(ds->buf[ds->cur_pos++]); +} + +int _beob(DataSourceBuffer* ds) +{ + + if (ds == NULL || ds->buf[ds->cur_pos] == NULC) + return true; + + return false; +} + +int read_line_alloc(void* in, DataSourceType in_type, SXML_CHAR** line, int* sz_line, int i0, SXML_CHAR from, SXML_CHAR to, int keep_fromto, SXML_CHAR interest, int* interest_count) +{ + int init_sz = 0; + SXML_CHAR ch, *pt; + int c; + int n, ret; + int (*mgetc)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_bgetc : (int(*)(void*))sx_fgetc); + int (*meos)(void* ds) = (in_type == DATA_SOURCE_BUFFER ? (int(*)(void*))_beob : (int(*)(void*))feof); + + if (in == NULL || line == NULL) + return 0; + + if (to == NULC) + to = C2SX('\n'); + /* Search for character 'from' */ + if (interest_count != NULL) + *interest_count = 0; + while (true) { + /* Reaching EOF before 'to' char is not an error but should trigger 'line' alloc and init to '' */ + if ((c = mgetc(in)) == EOF) + break; + ch = (SXML_CHAR)c; + if (interest_count != NULL && ch == interest) + (*interest_count)++; + /* If 'from' is '\0', we stop here */ + if (ch == from || from == NULC) + break; + } + + if (sz_line == NULL) + sz_line = &init_sz; + + if (*line == NULL || *sz_line == 0) { + if (*sz_line == 0) *sz_line = MEM_INCR_RLA; + *line = (SXML_CHAR*)__malloc(*sz_line*sizeof(SXML_CHAR)); + if (*line == NULL) + return 0; + } + if (i0 < 0) i0 = 0; + if (i0 > *sz_line) + return 0; + + n = i0; + if (c == CEOF) { /* EOF reached before 'to' char => return the empty string */ + (*line)[n] = NULC; + return meos(in) ? n : 0; /* Error if not EOF */ + } + if (ch != from || keep_fromto) + (*line)[n++] = ch; + (*line)[n] = NULC; + ret = 0; + while (true) { + if ((c = mgetc(in)) == CEOF) { /* EOF or error */ + (*line)[n] = NULC; + ret = meos(in) ? n : 0; + break; + } + ch = (SXML_CHAR)c; + if (interest_count != NULL && ch == interest) + (*interest_count)++; + (*line)[n] = ch; + if (ch != to || (keep_fromto && to != NULC && ch == to)) /* If we reached the 'to' character and we keep it, we still need to add the extra '\0' */ + n++; + if (n >= *sz_line) { /* Too many characters for our line => realloc some more */ + *sz_line += MEM_INCR_RLA; + pt = (SXML_CHAR*)__realloc(*line, *sz_line*sizeof(SXML_CHAR)); + if (pt == NULL) { + ret = 0; + break; + } else + *line = pt; + } + (*line)[n] = NULC; /* If we reached the 'to' character and we want to strip it, 'n' hasn't changed and 'line[n]' (which is 'to') will be replaced by '\0' */ + if (ch == to) { + ret = n; + break; + } + } + +#if 0 /* Automatic buffer resize is deactivated */ + /* Resize line to the exact size */ + pt = (SXML_CHAR*)__realloc(*line, (n+1)*sizeof(SXML_CHAR)); + if (pt != NULL) + *line = pt; +#endif + + return ret; +} + +/* --- */ + +SXML_CHAR* strcat_alloc(SXML_CHAR** src1, const SXML_CHAR* src2) +{ + SXML_CHAR* cat; + int n; + + /* Do not concatenate '*src1' with itself */ + if (src1 == NULL || *src1 == src2) + return NULL; + + /* Concatenate a NULL or empty string */ + if (src2 == NULL || *src2 == NULC) + return *src1; + + n = (*src1 == NULL ? 0 : sx_strlen(*src1)) + sx_strlen(src2) + 1; + cat = (SXML_CHAR*)__realloc(*src1, n*sizeof(SXML_CHAR)); + if (cat == NULL) + return NULL; + if (*src1 == NULL) + *cat = NULC; + *src1 = cat; + sx_strcat(*src1, src2); + + return *src1; +} + +SXML_CHAR* strip_spaces(SXML_CHAR* str, SXML_CHAR repl_sq) +{ + SXML_CHAR* p; + int i, len; + + /* 'p' to the first non-space */ + for (p = str; *p != NULC && sx_isspace(*p); p++) ; /* No need to search for 'protect' as it is not a space */ + len = sx_strlen(str); + for (i = len-1; sx_isspace(str[i]); i--) ; + if (str[i] == C2SX('\\')) /* If last non-space is the protection, keep the last space */ + i++; + str[i+1] = NULC; /* New end of string to last non-space */ + + if (repl_sq == NULC) { + if (p == str && i == len) + return str; /* Nothing to do */ + for (i = 0; (str[i] = *p) != NULC; i++, p++) ; /* Copy 'p' to 'str' */ + return str; + } + + /* Squeeze all spaces with 'repl_sq' */ + i = 0; + while (*p != NULC) { + if (sx_isspace(*p)) { + str[i++] = repl_sq; + while (sx_isspace(*++p)) ; /* Skips all next spaces */ + } else { + if (*p == C2SX('\\')) + p++; + str[i++] = *p++; + } + } + str[i] = NULC; + + return str; +} + +SXML_CHAR* str_unescape(SXML_CHAR* str) +{ + int i, j; + + if (str == NULL) + return NULL; + + for (i = j = 0; str[j]; j++) { + if (str[j] == C2SX('\\')) + j++; + str[i++] = str[j]; + } + + return str; +} + +int split_left_right(SXML_CHAR* str, SXML_CHAR sep, int* l0, int* l1, int* i_sep, int* r0, int* r1, int ignore_spaces, int ignore_quotes) +{ + int n0, n1, is; + SXML_CHAR quote = '\0'; + + if (str == NULL) + return false; + + if (i_sep != NULL) + *i_sep = -1; + + if (!ignore_spaces) /* No sense of ignore quotes if spaces are to be kept */ + ignore_quotes = false; + + /* Parse left part */ + + if (ignore_spaces) { + for (n0 = 0; str[n0] != NULC && sx_isspace(str[n0]); n0++) ; /* Skip head spaces, n0 points to first non-space */ + if (ignore_quotes && isquote(str[n0])) { /* If quote is found, look for next one */ + quote = str[n0++]; /* Quote can be '\'' or '"' */ + for (n1 = n0; str[n1] != NULC && str[n1] != quote; n1++) { + if (str[n1] == C2SX('\\') && str[++n1] == NULC) + break; /* Escape character (can be the last) */ + } + for (is = n1 + 1; str[is] != NULC && sx_isspace(str[is]); is++) ; /* '--' not to take quote into account */ + } else { + for (n1 = n0; str[n1] != NULC && str[n1] != sep && !sx_isspace(str[n1]); n1++) ; /* Search for separator or a space */ + for (is = n1; str[is] != NULC && sx_isspace(str[is]); is++) ; + } + } else { + n0 = 0; + for (n1 = 0; str[n1] != NULC && str[n1] != sep; n1++) ; /* Search for separator only */ + if (str[n1] != sep) /* Separator not found: malformed string */ + return false; + is = n1; + } + + /* Here 'n0' is the start of left member, 'n1' is the character after the end of left member */ + + if (l0 != NULL) + *l0 = n0; + if (l1 != NULL) + *l1 = n1 - 1; + if (i_sep != NULL) + *i_sep = is; + if (str[is] == NULC || str[is+1] == NULC) { /* No separator => empty right member */ + if (r0 != NULL) + *r0 = is; + if (r1 != NULL) + *r1 = is-1; + if (i_sep != NULL) + *i_sep = (str[is] == NULC ? -1 : is); + return true; + } + + /* Parse right part */ + + n0 = is + 1; + if (ignore_spaces) { + for (; str[n0] != NULC && sx_isspace(str[n0]); n0++) ; + if (ignore_quotes && isquote(str[n0])) + quote = str[n0]; + } + + for (n1 = ++n0; str[n1]; n1++) { + if (ignore_quotes && str[n1] == quote) /* Quote was reached */ + break; + if (str[n1] == C2SX('\\') && str[++n1] == NULC) /* Escape character (can be the last) */ + break; + } + if (ignore_quotes && str[n1--] != quote) /* Quote is not the same than earlier, '--' is not to take it into account */ + return false; + if (!ignore_spaces) + while (str[++n1]) ; /* Jump down the end of the string */ + + if (r0 != NULL) + *r0 = n0; + if (r1 != NULL) + *r1 = n1; + + return true; +} + +BOM_TYPE freadBOM(FILE* f, unsigned char* bom, int* sz_bom) +{ + unsigned char c1, c2; + long pos; + + if (f == NULL) + return BOM_NONE; + + /* Save position and try to read and skip BOM if found. If not, go back to save position. */ + pos = ftell(f); + if (pos < 0) + return BOM_NONE; + if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) { + fseek(f, pos, SEEK_SET); + return BOM_NONE; + } + if (bom != NULL) { + bom[0] = c1; + bom[1] = c2; + bom[2] = '\0'; + if (sz_bom != NULL) + *sz_bom = 2; + } + switch ((unsigned short)(c1 << 8) | c2) { + case (unsigned short)0xfeff: + return BOM_UTF_16BE; + + case (unsigned short)0xfffe: + pos = ftell(f); /* Save current position to get it back if BOM is not UTF-32LE */ + if (pos < 0) + return BOM_UTF_16LE; + if (fread(&c1, sizeof(char), 1, f) != 1 || fread(&c2, sizeof(char), 1, f) != 1) { + fseek(f, pos, SEEK_SET); + return BOM_UTF_16LE; + } + if (c1 == 0x00 && c2 == 0x00) { + if (bom != NULL) + bom[2] = bom[3] = bom[4] = '\0'; + if (sz_bom != NULL) + *sz_bom = 4; + return BOM_UTF_32LE; + } + fseek(f, pos, SEEK_SET); /* fseek(f, -2, SEEK_CUR) is not garanteed on Windows (and actually fail in Unicode...) */ + return BOM_UTF_16LE; + + case (unsigned short)0x0000: + if (fread(&c1, sizeof(char), 1, f) == 1 && fread(&c2, sizeof(char), 1, f) == 1 + && c1 == 0xfe && c2 == 0xff) { + bom[2] = c1; + bom[3] = c2; + bom[4] = '\0'; + if (sz_bom != NULL) + *sz_bom = 4; + return BOM_UTF_32BE; + } + fseek(f, pos, SEEK_SET); + return BOM_NONE; + + case (unsigned short)0xefbb: /* UTF-8? */ + if (fread(&c1, sizeof(char), 1, f) != 1 || c1 != 0xbf) { /* Not UTF-8 */ + fseek(f, pos, SEEK_SET); + if (bom != NULL) + bom[0] = '\0'; + if (sz_bom != NULL) + *sz_bom = 0; + return BOM_NONE; + } + if (bom != NULL) { + bom[2] = c1; + bom[3] = '\0'; + } + if (sz_bom != NULL) + *sz_bom = 3; + return BOM_UTF_8; + + default: /* No BOM, go back */ + fseek(f, pos, SEEK_SET); + if (bom != NULL) + bom[0] = '\0'; + if (sz_bom != NULL) + *sz_bom = 0; + return BOM_NONE; + } +} + +/* --- */ + +SXML_CHAR* html2str(SXML_CHAR* html, SXML_CHAR* str) +{ + SXML_CHAR *ps, *pd; + int i; + + if (html == NULL) return NULL; + + if (str == NULL) str = html; + + /* Look for '&' and matches it to any of the recognized HTML pattern. */ + /* If found, replaces the '&' by the corresponding char. */ + /* 'p2' is the char to analyze, 'p1' is where to insert it */ + for (pd = str, ps = html; *ps; ps++, pd++) { + if (*ps != C2SX('&')) { + if (pd != ps) + *pd = *ps; + continue; + } + + for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { + if (sx_strncmp(ps, HTML_SPECIAL_DICT[i].html, HTML_SPECIAL_DICT[i].html_len)) + continue; + + *pd = HTML_SPECIAL_DICT[i].chr; + ps += HTML_SPECIAL_DICT[i].html_len-1; + break; + } + /* If no string was found, simply copy the character */ + if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps) + *pd = *ps; + } + *pd = NULC; + + return str; +} + +/* TODO: Allocate 'html'? */ +SXML_CHAR* str2html(SXML_CHAR* str, SXML_CHAR* html) +{ + SXML_CHAR *ps, *pd; + int i; + + if (str == NULL) + return NULL; + + if (html == str) /* Not handled (yet) */ + return NULL; + + if (html == NULL) { /* Allocate 'html' to the correct size */ + html = __malloc(strlen_html(str) * sizeof(SXML_CHAR)); + if (html == NULL) + return NULL; + } + + for (ps = str, pd = html; *ps; ps++, pd++) { + for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { + if (*ps == HTML_SPECIAL_DICT[i].chr) { + sx_strcpy(pd, HTML_SPECIAL_DICT[i].html); + pd += HTML_SPECIAL_DICT[i].html_len - 1; + break; + } + } + if (HTML_SPECIAL_DICT[i].chr == NULC && pd != ps) + *pd = *ps; + } + *pd = NULC; + + return html; +} + +int strlen_html(SXML_CHAR* str) +{ + int i, j, n; + + if (str == NULL) + return 0; + + n = 0; + for (i = 0; str[i] != NULC; i++) { + for (j = 0; HTML_SPECIAL_DICT[j].chr; j++) { + if (str[i] == HTML_SPECIAL_DICT[j].chr) { + n += HTML_SPECIAL_DICT[j].html_len; + break; + } + } + if (HTML_SPECIAL_DICT[j].chr == NULC) + n++; + } + + return n; +} + +int fprintHTML(FILE* f, SXML_CHAR* str) +{ + SXML_CHAR* p; + int i, n; + + for (p = str, n = 0; *p != NULC; p++) { + for (i = 0; HTML_SPECIAL_DICT[i].chr; i++) { + if (*p != HTML_SPECIAL_DICT[i].chr) + continue; + sx_fprintf(f, HTML_SPECIAL_DICT[i].html); + n += HTML_SPECIAL_DICT[i].html_len; + break; + } + if (HTML_SPECIAL_DICT[i].chr == NULC) { + (void)sx_fputc(*p, f); + n++; + } + } + + return n; +} + +int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern) +{ + SXML_CHAR *p, *s; + + if (str == NULL && pattern == NULL) + return true; + + if (str == NULL || pattern == NULL) + return false; + + p = pattern; + s = str; + while (true) { + switch (*p) { + /* Any character matches, go to next one */ + case C2SX('?'): + p++; + s++; + break; + + /* Go to next character in pattern and wait until it is found in 'str' */ + case C2SX('*'): + for (; *p != NULC; p++) { /* Squeeze '**?*??**' to '*' */ + if (*p != C2SX('*') && *p != C2SX('?')) + break; + } + for (; *s != NULC; s++) { + if (*s == *p) + break; + } + break; + + /* NULL character on pattern has to be matched by 'str' */ + case 0: + return *s ? false : true; + + default: + if (*p == C2SX('\\')) /* Escape character */ + p++; + if (*p++ != *s++) /* Characters do not match */ + return false; + break; + } + } + + return false; +} diff --git a/sxmlclib.h b/sxmlclib.h new file mode 100644 index 0000000..c423043 --- /dev/null +++ b/sxmlclib.h @@ -0,0 +1,827 @@ +/* + Copyright (c) 2010, Matthieu Labas + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + OF SUCH DAMAGE. + + The views and conclusions contained in the software and documentation are those of the + authors and should not be interpreted as representing official policies, either expressed + or implied, of the FreeBSD Project. +*/ +#ifndef _SXML_H_ +#define _SXML_H_ + +#define SXMLC_VERSION "4.2.4" + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef SXMLC_UNICODE + typedef wchar_t SXML_CHAR; + #define C2SX(c) L ## c + #define CEOF WEOF + #define sx_strcmp wcscmp + #define sx_strncmp wcsncmp + #define sx_strlen wcslen + #define sx_strdup wcsdup + #define sx_strchr wcschr + #define sx_strrchr wcsrchr + #define sx_strcpy wcscpy + #define sx_strncpy wcsncpy + #define sx_strcat wcscat + #define sx_printf wprintf + #define sx_fprintf fwprintf + #define sx_sprintf swprintf + #define sx_fgetc fgetwc + #define sx_fputc fputwc + #define sx_isspace iswspace + #if defined(WIN32) || defined(WIN64) + #define sx_fopen _wfopen + #else + #define sx_fopen fopen + #endif + #define sx_fclose fclose +#else + typedef char SXML_CHAR; + #define C2SX(c) c + #define CEOF EOF + #define sx_strcmp strcmp + #define sx_strncmp strncmp + #define sx_strlen strlen + #define sx_strdup __strdup + #define sx_strchr strchr + #define sx_strrchr strrchr + #define sx_strcpy strcpy + #define sx_strncpy strncpy + #define sx_strcat strcat + #define sx_printf printf + #define sx_fprintf fprintf + #define sx_sprintf sprintf + #define sx_fgetc fgetc + #define sx_fputc fputc + #define sx_isspace(ch) isspace((int)ch) + #define sx_fopen fopen + #define sx_fclose fclose +#endif + +#ifdef DBG_MEM + void* __malloc(size_t sz); + void* __calloc(size_t count, size_t sz); + void* __realloc(void* mem, size_t sz); + void __free(void* mem); + char* __strdup(const char* s); +#else + #define __malloc malloc + #define __calloc calloc + #define __realloc realloc + #define __free free + #define __strdup strdup +#endif + +#ifndef MEM_INCR_RLA +#define MEM_INCR_RLA (256*sizeof(SXML_CHAR)) /* Initial buffer size and increment for memory reallocations */ +#endif + +#ifndef false +#define false 0 +#endif + +#ifndef true +#define true 1 +#endif + +#define NULC ((SXML_CHAR)C2SX('\0')) +#define isquote(c) (((c) == C2SX('"')) || ((c) == C2SX('\''))) + +/* + Buffer data source used by 'read_line_alloc' when required. + 'buf' should be 0-terminated. + */ +typedef struct _DataSourceBuffer { + const SXML_CHAR* buf; + int cur_pos; +} DataSourceBuffer; + +typedef FILE* DataSourceFile; + +typedef enum _DataSourceType { + DATA_SOURCE_FILE = 0, + DATA_SOURCE_BUFFER, + DATA_SOURCE_MAX +} DataSourceType; + +#ifndef false +#define false 0 +#endif + +#ifndef true +#define true 1 +#endif + +/* Node types */ +typedef enum _TagType { + TAG_ERROR = -1, + TAG_NONE = 0, + TAG_PARTIAL, /* Node containing a legal '>' that stopped file reading */ + TAG_FATHER, /* - Next nodes will be children of this one. */ + TAG_SELF, /* - Standalone node. */ + TAG_INSTR, /* - Processing instructions, or prolog node. */ + TAG_COMMENT, /* */ + TAG_CDATA, /* - CDATA node */ + TAG_DOCTYPE, /* - DOCTYPE node */ + TAG_END, /* - End of father node. */ + TAG_TEXT, /* text node*/ + + TAG_USER = 100 /* User-defined tag start */ +} TagType; + +/* TODO: Performance improvement with some fixed-sized strings ??? (e.g. XMLAttribute.name[64], XMLNode.tag[64]) */ + +typedef struct _XMLAttribute { + SXML_CHAR* name; + SXML_CHAR* value; + int active; +} XMLAttribute; + +/* Constant to know whether a struct has been initialized (XMLNode or XMLDoc) */ +#define XML_INIT_DONE 0x19770522 /* Happy Birthday ;) */ + +/* + An XML node. + */ +typedef struct _XMLNode { + SXML_CHAR* tag; /* Tag name */ + SXML_CHAR* text; /* Text inside the node */ + XMLAttribute* attributes; + int n_attributes; + + struct _XMLNode* father; /* NULL if root */ + struct _XMLNode** children; + int n_children; + + TagType tag_type; /* Node type ('TAG_FATHER', 'TAG_SELF' or 'TAG_END') */ + int active; /* 'true' to tell that node is active and should be displayed by 'XMLDoc_print' */ + + void* user; /* Pointer for user data associated to the node */ + + /* Keep 'init_value' as the last member */ + int init_value; /* Initialized to 'XML_INIT_DONE' to indicate that node has been initialized properly */ +} XMLNode; + +/* + An XML document. + */ +#ifndef SXMLC_MAX_PATH +#define SXMLC_MAX_PATH 256 +#endif +typedef struct _XMLDoc { + SXML_CHAR filename[SXMLC_MAX_PATH]; +#ifdef SXMLC_UNICODE + BOM_TYPE bom_type; + unsigned char bom[5]; /* First characters read that might be a BOM when unicode is used */ + int sz_bom; /* Number of bytes in BOM */ +#endif + XMLNode** nodes; /* Nodes of the document, including prolog, comments and root nodes */ + int n_nodes; /* Number of nodes in 'nodes' */ + int i_root; /* Index of first root node in 'nodes', -1 if document is empty */ + + /* Keep 'init_value' as the last member */ + int init_value; /* Initialized to 'XML_INIT_DONE' to indicate that document has been initialized properly */ +} XMLDoc; + +/* + Register an XML tag, giving its 'start' and 'end' string, which should include '<' and '>'. + The 'tag_type' is user-given and has to be less than or equal to 'TAG_USER'. It will be + returned as the 'tag_type' member of the XMLNode struct. Note that no test is performed + to check for an already-existing tag_type. + Return tag index in user tags table when successful, or '-1' if the 'tag_type' is invalid or + the new tag could not be registered (e.g. when 'start' does not start with '<' or 'end' does not end with '>'). + */ +int XML_register_user_tag(TagType tag_type, SXML_CHAR* start, SXML_CHAR* end); + +/* + Remove a registered user tag. + Return the new number of registered user tags or '-1' if 'i_tag' is invalid. + */ +int XML_unregister_user_tag(int i_tag); + +/* + Return the number of registered tags. + */ +int XML_get_nb_registered_user_tags(void); + +/* + Return the index of first occurrence of 'tag_type' in registered user tags, or '-1' if not found. + */ +int XML_get_registered_user_tag(TagType tag_type); + + +typedef enum _ParseError { + PARSE_ERR_NONE = 0, + PARSE_ERR_MEMORY = -1, + PARSE_ERR_UNEXPECTED_TAG_END = -2, + PARSE_ERR_SYNTAX = -3, + PARSE_ERR_EOF = -4, + PARSE_ERR_TEXT_OUTSIDE_NODE = -5, /* During DOM loading */ + PARSE_ERR_UNEXPECTED_NODE_END = -6 /* During DOM loading */ +} ParseError; + +/* + Events that can happen when loading an XML document. + These will be passed to the 'all_event' callback of the SAX parser. + */ +typedef enum _XMLEvent { + XML_EVENT_START_DOC, + XML_EVENT_START_NODE, + XML_EVENT_END_NODE, + XML_EVENT_TEXT, + XML_EVENT_ERROR, + XML_EVENT_END_DOC +} XMLEvent; + +/* + Structure given as an argument for SAX callbacks to retrieve information about + parsing status + */ +typedef struct _SAX_Data { + const SXML_CHAR* name; + int line_num; + void* user; +} SAX_Data; + +/* + User callbacks used for SAX parsing. Return values of these callbacks should be 0 to stop parsing. + Members can be set to NULL to disable handling of some events. + All parameters are pointers to structures that will no longer be available after callback returns. + It is recommended that the callback uses the information and stores it in its own data structure. + WARNING! SAX PARSING DOES NOT CHECK FOR XML INTEGRITY! e.g. a tag end without a matching tag start + will not be detected by the parser and should be detected by the callbacks instead. + */ +typedef struct _SAX_Callbacks { + /* + Callback called when parsing starts, before parsing the first node. + */ + int (*start_doc)(SAX_Data* sd); + + /* + Callback called when a new node starts (e.g. '' or ''). + If any, attributes can be read from 'node->attributes'. + N.B. '' will trigger an immediate call to the 'end_node' callback + after the 'start_node' callback. + */ + int (*start_node)(const XMLNode* node, SAX_Data* sd); + + /* + Callback called when a node ends (e.g. '' or ''). + */ + int (*end_node)(const XMLNode* node, SAX_Data* sd); + + /* + Callback called when text has been found in the last node. + */ + int (*new_text)(SXML_CHAR* text, SAX_Data* sd); + + /* + Callback called when parsing is finished. + No other callbacks will be called after it. + */ + int (*end_doc)(SAX_Data* sd); + + /* + Callback called when an error occurs during parsing. + 'error_num' is the error number and 'line_number' is the line number in the stream + being read (file or buffer). + */ + int (*on_error)(ParseError error_num, int line_number, SAX_Data* sd); + + /* + Callback called when text has been found in the last node. + 'event' is the type of event for which the callback was called: + XML_EVENT_START_DOC: + 'node' is NULL. + 'text' is the file name if a file is being parsed, NULL if a buffer is being parsed. + 'n' is 0. + XML_EVENT_START_NODE: + 'node' is the node starting, with tag and all attributes initialized. + 'text' is NULL. + 'n' is the number of lines parsed. + XML_EVENT_END_NODE: + 'node' is the node ending, with tag, attributes and text initialized. + 'text' is NULL. + 'n' is the number of lines parsed. + XML_EVENT_TEXT: + 'node' is NULL. + 'text' is the text to be added to last node started and not finished. + 'n' is the number of lines parsed. + XML_EVENT_ERROR: + Everything is NULL. + 'n' is one of the 'PARSE_ERR_*'. + XML_EVENT_END_DOC: + 'node' is NULL. + 'text' is the file name if a file is being parsed, NULL if a buffer is being parsed. + 'n' is the number of lines parsed. + */ + int (*all_event)(XMLEvent event, const XMLNode* node, SXML_CHAR* text, const int n, SAX_Data* sd); +} SAX_Callbacks; + +/* + Helper function to initialize all 'sax' members to NULL. + Return 'false' is 'sax' is NULL. + */ +int SAX_Callbacks_init(SAX_Callbacks* sax); + +/* + Set of SAX callbacks used by 'XMLDoc_parse_file_DOM'. + These are made available to be able to load an XML document using DOM implementation + with user-defined code at some point (e.g. counting nodes, running search, ...). + In this case, the 'XMLDoc_parse_file_SAX' has to be called instead of the 'XMLDoc_parse_file_DOM', + providing either these callbacks directly, or a functions calling these callbacks. + To do that, you should initialize the 'doc' member of the 'DOM_through_SAX' struct and call the + 'XMLDoc_parse_file_SAX' giving this struct as a the 'user' data pointer. + */ + +typedef struct _DOM_through_SAX { + XMLDoc* doc; /* Document to fill up */ + XMLNode* current; /* For internal use (current father node) */ + ParseError error; /* For internal use (parse status) */ + int line_error; /* For internal use (line number when error occurred) */ + int text_as_nodes; /* For internal use (store text inside nodes as sequential TAG_TEXT nodes) */ +} DOM_through_SAX; + +int DOMXMLDoc_doc_start(SAX_Data* dom); +int DOMXMLDoc_node_start(const XMLNode* node, SAX_Data* dom); +int DOMXMLDoc_node_text(SXML_CHAR* text, SAX_Data* dom); +int DOMXMLDoc_node_end(const XMLNode* node, SAX_Data* dom); +int DOMXMLDoc_parse_error(ParseError error_num, int line_number, SAX_Data* sd); +int DOMXMLDoc_doc_end(SAX_Data* dom); + +/* + Initialize 'sax' with the "official" DOM callbacks. + */ +int SAX_Callbacks_init_DOM(SAX_Callbacks* sax); + +/* --- XMLNode methods --- */ + +/* + Fills 'xmlattr' with 'xmlattr->name' to 'attrName' and 'xmlattr->value' to 'attr Value'. + 'str' is supposed to be like 'attrName[ ]=[ ]["]attr Value["]'. + Return 0 if not enough memory or bad parameters (NULL 'str' or 'xmlattr'). + 2 if last quote is missing in the attribute value. + 1 if 'xmlattr' was filled correctly. + */ +int XML_parse_attribute_to(const SXML_CHAR* str, int to, XMLAttribute* xmlattr); + +#define XML_parse_attribute(str, xmlattr) XML_parse_attribute_to(str, -1, xmlattr) + +/* + Reads a string that is supposed to be an xml tag like '' or ''. + Fills the 'xmlnode' structure with the tag name and its attributes. + Returns 0 if an error occurred (malformed 'str' or memory). 'TAG_*' when string is recognized. + */ +TagType XML_parse_1string(const SXML_CHAR* str, XMLNode* xmlnode); + +/* + Allocate and initialize XML nodes. + 'n' is the number of contiguous elements to allocate (to create and array). + Return 'NULL' if not enough memory, or the pointer to the elements otherwise. + */ +XMLNode* XMLNode_allocN(int n); + +/* + Shortcut to allocate one node only. + */ +#define XMLNode_alloc() XMLNode_allocN(1) + +/* + Initialize an already-allocated XMLNode. + */ +int XMLNode_init(XMLNode* node); + +/* + Free a node and all its children. + */ +int XMLNode_free(XMLNode* node); + +/* + Free XMLNode 'dst' and copy 'src' to 'dst', along with its children if specified. + If 'src' is NULL, 'dst' is freed and initialized. + */ +int XMLNode_copy(XMLNode* dst, const XMLNode* src, int copy_children); + +/* + Allocate a node and copy 'node' into it. + If 'copy_children' is 'true', all children of 'node' will be copied to the new node. + Return 'NULL' if not enough memory, or a pointer to the new node otherwise. + */ +XMLNode* XMLNode_dup(const XMLNode* node, int copy_children); + +/* + Set the active/inactive state of 'node'. + Set 'active' to 'true' to activate 'node' and all its children, and enable its use + in other functions (e.g. 'XMLDoc_print', 'XMLNode_search_child'). + */ +int XMLNode_set_active(XMLNode* node, int active); + +/* + Set 'node' tag. + Return 'false' for memory error, 'true' otherwise. + */ +int XMLNode_set_tag(XMLNode* node, const SXML_CHAR* tag); + +/* + Set the node type among one of the valid ones (TAG_FATHER, TAG_SELF, TAG_INSTR, + TAG_COMMENT, TAG_CDATA, TAG_DOCTYPE) or any user-registered tag. + Return 'false' when the node or the 'tag_type' is invalid. + */ +int XMLNode_set_type(XMLNode* node, const TagType tag_type); + +/* + Add an attribute to 'node' or update an existing one. + The attribute has a 'name' and a 'value'. + Return the new number of attributes, or -1 for memory problem. + */ +int XMLNode_set_attribute(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR* attr_value); + +/* + Retrieve an attribute value, based on its name, allocating 'attr_value'. + If the attribute name does not exist, set 'attr_value' to the given default value. + Return 'false' when the node is invalid, 'attr_name' is NULL or empty, or 'attr_value' is NULL. + */ +int XMLNode_get_attribute_with_default(XMLNode* node, const SXML_CHAR* attr_name, const SXML_CHAR** attr_value, const SXML_CHAR* default_attr_value); + +/* + Helper macro that retrieve an attribute value, or an empty string if the attribute does + not exist. + */ +#define XMLNode_get_attribute(node, attr_name, attr_value) XMLNode_get_attribute_with_default(node, attr_name, attr_value, C2SX("")) + +/* + Return the number of active attributes of 'node', or '-1' if 'node' is invalid. +*/ +int XMLNode_get_attribute_count(const XMLNode* node); + +/* + Search for the active attribute 'attr_name' in 'node', starting from index 'isearch' + and returns its index, or -1 if not found or error. + */ +int XMLNode_search_attribute(const XMLNode* node, const SXML_CHAR* attr_name, int isearch); + +/* + Remove attribute index 'i_attr'. + Return the new number of attributes or -1 on invalid arguments. + */ +int XMLNode_remove_attribute(XMLNode* node, int i_attr); + +/* + Remove all attributes from 'node'. + */ +int XMLNode_remove_all_attributes(XMLNode* node); + +/* + Set node text. + Return 'true' when successful, 'false' on error. + */ +int XMLNode_set_text(XMLNode* node, const SXML_CHAR* text); + +/* + Helper macro to remove text from 'node'. + */ +#define XMLNode_remove_text(node) XMLNode_set_text(node, NULL); + +/* + Add a child to a node. + Return 'false' for memory problem, 'true' otherwise. + */ +int XMLNode_add_child(XMLNode* node, XMLNode* child); + +/* + Return the number of active children nodes of 'node', or '-1' if 'node' is invalid. + */ +int XMLNode_get_children_count(const XMLNode* node); + +/* + Return a reference to the 'i_child'th active node. + */ +XMLNode* XMLNode_get_child(const XMLNode* node, int i_child); + +/* + Remove the 'i_child'th active child of 'node'. + If 'free_child' is 'true', free the child node itself. This parameter is usually 'true' + but should be 'false' when child nodes are pointers to local or global variables instead of + user-allocated memory. + Return the new number of children or -1 on invalid arguments. + */ +int XMLNode_remove_child(XMLNode* node, int i_child, int free_child); + +/* + Remove all children from 'node'. + */ +int XMLNode_remove_children(XMLNode* node); + +/* + Return 'true' if 'node1' is the same as 'node2' (i.e. same tag, same active attributes). + */ +int XMLNode_equal(const XMLNode* node1, const XMLNode* node2); + +/* + Return the next sibling of node 'node', or NULL if 'node' is invalid or the last child + or if its father could not be determined (i.e. 'node' is a root node). + */ +XMLNode* XMLNode_next_sibling(const XMLNode* node); + +/* + Return the next node in XML order i.e. first child or next sibling, or NULL + if 'node' is invalid or the end of its root node is reached. + */ +XMLNode* XMLNode_next(const XMLNode* node); + + +/* --- XMLDoc methods --- */ + + +/* + Initializes an already-allocated XML document. + */ +int XMLDoc_init(XMLDoc* doc); + +/* + Free an XML document. + Return 'false' if 'doc' was not initialized. + */ +int XMLDoc_free(XMLDoc* doc); + +/* + Set the new 'doc' root node among all existing nodes in 'doc'. + Return 'false' if bad arguments, 'true' otherwise. + */ +int XMLDoc_set_root(XMLDoc* doc, int i_root); + +/* + Add a node to the document, specifying the type. + If its type is TAG_FATHER, it also sets the document root node if previously undefined. + Return the node index, or -1 if bad arguments or memory error. + */ +int XMLDoc_add_node(XMLDoc* doc, XMLNode* node); + +/* + Remove a node from 'doc' root nodes, base on its index. + If 'free_node' is 'true', free the node itself. This parameter is usually 'true' + but should be 'false' when the node is a pointer to local or global variable instead of + user-allocated memory. + Return 'true' if node was removed or 'false' if 'doc' or 'i_node' is invalid. + */ +int XMLDoc_remove_node(XMLDoc* doc, int i_node, int free_node); + +/* + Shortcut macro to retrieve root node from a document. + Equivalent to + doc->nodes[doc->i_root] + */ +#define XMLDoc_root(doc) ((doc)->nodes[(doc)->i_root]) + +/* + Shortcut macro to add a node to 'doc' root node. + Equivalent to + XMLDoc_add_child_root(XMLDoc* doc, XMLNode* child); + */ +#define XMLDoc_add_child_root(doc, child) XMLNode_add_child((doc)->nodes[(doc)->i_root], (child)) + +/* + Default quote to use to print attribute value. + User can redefine it with its own character by adding a #define XML_DEFAULT_QUOTE before including + this file. + */ +#ifndef XML_DEFAULT_QUOTE +#define XML_DEFAULT_QUOTE C2SX('"') +#endif + +/* + Print the node and its children to a file (that can be stdout). + - 'tag_sep' is the string to use to separate nodes from each other (usually "\n"). + - 'child_sep' is the additional string to put for each child level (usually "\t"). + - 'keep_text_spaces' indicates that text should not be printed if it is composed of + spaces, tabs or new lines only (e.g. when XML document spans on several lines due to + pretty-printing). + - 'sz_line' is the maximum number of characters that can be put on a single line. The + node remainder will be output to extra lines. + - 'nb_char_tab' is how many characters should be counted for a tab when counting characters + in the line. It usually is 8 or 4, but at least 1. + - 'depth' is an internal parameter that is used to determine recursively how deep we are in + the tree. It should be initialized to 0 at first call. + Return 'false' on invalid arguments (NULL 'node' or 'f'), 'true' otherwise. + */ +int XMLNode_print_attr_sep(const XMLNode* node, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab); + +/* For backward compatibility */ +#define XMLNode_print(node, f, tag_sep, child_sep, keep_text_spaces, sz_line, nb_char_tab) XMLNode_print_attr_sep(node, f, tag_sep, child_sep, C2SX(" "), keep_text_spaces, sz_line, nb_char_tab) + +/* + Print the node "header": , spanning it on several lines if needed. + Return 'false' on invalid arguments (NULL 'node' or 'f'), 'true' otherwise. + */ +int XMLNode_print_header(const XMLNode* node, FILE* f, int sz_line, int nb_char_tab); + +/* + Prints the XML document using 'XMLNode_print' on all document root nodes. + */ +int XMLDoc_print_attr_sep(const XMLDoc* doc, FILE* f, const SXML_CHAR* tag_sep, const SXML_CHAR* child_sep, const SXML_CHAR* attr_sep, int keep_text_spaces, int sz_line, int nb_char_tab); + +/* For backward compatibility */ +#define XMLDoc_print(doc, f, tag_sep, child_sep, keep_text_spaces, sz_line, nb_char_tab) XMLDoc_print_attr_sep(doc, f, tag_sep, child_sep, C2SX(" "), keep_text_spaces, sz_line, nb_char_tab) + +/* + Create a new XML document from a given 'filename' and load it to 'doc'. + 'text_as_nodes' should be non-zero to put text into separate TAG_TEXT nodes. + Return 'false' in case of error (memory or unavailable filename, malformed document), 'true' otherwise. + */ +int XMLDoc_parse_file_DOM_text_as_nodes(const SXML_CHAR* filename, XMLDoc* doc, int text_as_nodes); + +/* For backward compatibility */ +#define XMLDoc_parse_file_DOM(filename, doc) XMLDoc_parse_file_DOM_text_as_nodes(filename, doc, 0) + +/* + Create a new XML document from a memory buffer 'buffer' that can be given a name 'name', and load + it into 'doc'. + 'text_as_nodes' should be non-zero to put text into separate TAG_TEXT nodes. + Return 'false' in case of error (memory or unavailable filename, malformed document), 'true' otherwise. + */ +int XMLDoc_parse_buffer_DOM_text_as_nodes(const SXML_CHAR* buffer, const SXML_CHAR* name, XMLDoc* doc, int text_as_nodes); + +/* For backward compatibility */ +#define XMLDoc_parse_buffer_DOM(buffer, name, doc) XMLDoc_parse_buffer_DOM_text_as_nodes(buffer, name, doc, 0) + +/* + Parse an XML document from a given 'filename', calling SAX callbacks given in the 'sax' structure. + 'user' is a user-given pointer that will be given back to all callbacks. + Return 'false' in case of error (memory or unavailable filename, malformed document), 'true' otherwise. + */ +int XMLDoc_parse_file_SAX(const SXML_CHAR* filename, const SAX_Callbacks* sax, void* user); + +/* + Parse an XML document from a memory buffer 'buffer' that can be given a name 'name', + calling SAX callbacks given in the 'sax' structure. + 'user' is a user-given pointer that will be given back to all callbacks. + Return 'false' in case of error (memory or unavailable filename, malformed document), 'true' otherwise. + */ +int XMLDoc_parse_buffer_SAX(const SXML_CHAR* buffer, const SXML_CHAR* name, const SAX_Callbacks* sax, void* user); + +/* + Parse an XML file using the DOM implementation. + */ +#define XMLDoc_parse_file XMLDOC_parse_file_DOM + + + +/* --- Utility functions --- */ + +/* + Functions to get next byte from buffer data source and know if the end has been reached. + Return as 'fgetc' and 'feof' would for 'FILE*'. + */ +int _bgetc(DataSourceBuffer* ds); +int _beob(DataSourceBuffer* ds); +/* + Reads a line from data source 'in', eventually (re-)allocating a given buffer 'line'. + Characters read will be stored in 'line' starting at 'i0' (this allows multiple calls to + 'read_line_alloc' on the same 'line' buffer without overwriting it at each call). + 'in_type' specifies the type of data source to be read: 'in' is 'FILE*' if 'in_type' + 'sz_line' is the size of the buffer 'line' if previously allocated. 'line' can point + to NULL, in which case it will be allocated '*sz_line' bytes. After the function returns, + '*sz_line' is the actual buffer size. This allows multiple calls to this function using the + same buffer (without re-allocating/freeing). + If 'sz_line' is non NULL and non 0, it means that '*line' is a VALID pointer to a location + of '*sz_line' SXML_CHAR (not bytes! Multiply by sizeof(SXML_CHAR) to get number of bytes). + Searches for character 'from' until character 'to'. If 'from' is 0, starts from + current position. If 'to' is 0, it is replaced by '\n'. + If 'keep_fromto' is 0, removes characters 'from' and 'to' from the line. + If 'interest_count' is not NULL, will receive the count of 'interest' characters while searching + for 'to' (e.g. use 'interest'='\n' to count lines in file). + Returns the number of characters in the line or 0 if an error occurred. + 'read_line_alloc' uses constant 'MEM_INCR_RLA' to reallocate memory when needed. It is possible + to override this definition to use another value. + */ +int read_line_alloc(void* in, DataSourceType in_type, SXML_CHAR** line, int* sz_line, int i0, SXML_CHAR from, SXML_CHAR to, int keep_fromto, SXML_CHAR interest, int* interest_count); + +/* + Concatenates the string pointed at by 'src1' with 'src2' into '*src1' and + return it ('*src1'). + Return NULL when out of memory. + */ +SXML_CHAR* strcat_alloc(SXML_CHAR** src1, const SXML_CHAR* src2); + +/* + Strip spaces at the beginning and end of 'str', modifying 'str'. + If 'repl_sq' is not '\0', squeezes spaces to an single character ('repl_sq'). + If not '\0', 'protect' is used to protect spaces from being deleted (usually a backslash). + Returns the string or NULL if 'protect' is a space (which would not make sense). + */ +SXML_CHAR* strip_spaces(SXML_CHAR* str, SXML_CHAR repl_sq); + +/* + Remove '\' characters from 'str', modifying it. + Return 'str'. + */ +SXML_CHAR* str_unescape(SXML_CHAR* str); + +/* + Split 'str' into a left and right part around a separator 'sep'. + The left part is located between indexes 'l0' and 'l1' while the right part is + between 'r0' and 'r1' and the separator position is at 'i_sep' (whenever these are + not NULL). + If 'ignore_spaces' is 'true', computed indexes will not take into account potential + spaces around the separator as well as before left part and after right part. + if 'ignore_quotes' is 'true', " or ' will not be taken into account when parsing left + and right members. + Whenever the right member is empty (e.g. "attrib" or "attrib="), '*r0' is initialized + to 'str' size and '*r1' to '*r0-1' (crossed). + If the separator was not found (i.e. left member only), '*i_sep' is '-1'. + Return 'false' when 'str' is malformed, 'true' when splitting was successful. + */ +int split_left_right(SXML_CHAR* str, SXML_CHAR sep, int* l0, int* l1, int* i_sep, int* r0, int* r1, int ignore_spaces, int ignore_quotes); + +typedef enum _BOM_TYPE { + BOM_NONE = 0x00, + BOM_UTF_8 = 0xefbbbf, + BOM_UTF_16BE = 0xfeff, + BOM_UTF_16LE = 0xfffe, + BOM_UTF_32BE = 0x0000feff, + BOM_UTF_32LE = 0xfffe0000 +} BOM_TYPE; +/* + Detect a potential BOM at the current file position and read it into 'bom' (if not NULL, + 'bom' should be at least 5 bytes). It also moves the 'f' beyond the BOM so it's possible to + skip it by calling 'freadBOM(f, NULL, NULL)'. If no BOM is found, it leaves 'f' file pointer + is reset to its original location. + If not null, 'sz_bom' is filled with how many bytes are stored in 'bom'. + Return the BOM type or BOM_NONE if none found (empty 'bom' in this case). + */ +BOM_TYPE freadBOM(FILE* f, unsigned char* bom, int* sz_bom); + +/* + Replace occurrences of special HTML characters escape sequences (e.g. '&') found in 'html' + by its character equivalent (e.g. '&') into 'str'. + If 'html' and 'str' are the same pointer replacement is made in 'str' itself, overwriting it. + If 'str' is NULL, replacement is made into 'html', overwriting it. + Returns 'str' (or 'html' if 'str' was NULL). + */ +SXML_CHAR* html2str(SXML_CHAR* html, SXML_CHAR* str); + +/* + Replace occurrences of special characters (e.g. '&') found in 'str' into their XML escaped + equivalent (e.g. '&') into 'xml'. + 'xml' is supposed allocated to the correct size (e.g. using 'malloc(strlen_html(str)+30)') and + different from 'str' (unlike 'html2str'), as string will expand. If it is NULL, 'str' will be + analyzed and a string will be allocated to the exact size, before being returned. In that case, + it is the responsibility of the caller to free() the result! + Return 'xml' or NULL if 'str' or 'xml' are NULL, or when 'xml' is 'str'. +*/ +SXML_CHAR* str2html(SXML_CHAR* str, SXML_CHAR* xml); + +/* + Return the length of 'str' as if all its special character were replaced by their HTML + equivalent. + Return 0 if 'str' is NULL. + */ +int strlen_html(SXML_CHAR* str); + +/* + Print 'str' to 'f', transforming special characters into their HTML equivalent. + Returns the number of output characters. + */ +int fprintHTML(FILE* f, SXML_CHAR* str); + +/* + Checks whether 'str' corresponds to 'pattern'. + 'pattern' can use wildcads such as '*' (any potentially empty string) or + '?' (any character) and use '\' as an escape character. + Returns 'true' when 'str' matches 'pattern', 'false' otherwise. + */ +int regstrcmp(SXML_CHAR* str, SXML_CHAR* pattern); + +#ifdef __cplusplus +} +#endif + +#endif -- cgit v1.2.3