1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sxmlclib.h"
void print_article_title(XMLNode *root);
int process(char *fn) {
XMLDoc *doc = malloc(sizeof(XMLDoc));
if (!doc) {
fprintf(stderr, "Could not allocate memory for the XMLDoc when trying to parse %s.\n", fn);
exit(1);
}
XMLDoc_init(doc);
int ret = XMLDoc_parse_file_DOM(fn, doc);
if (!ret) {
fprintf(stderr, "Error when parsing file '%s'.\n", fn);
return 1;
}
// Apparently ill-formed XML is not an error but the library
// can't find the root node afterwards...
if (doc->i_root < 0) {
fprintf(stderr, "i_root was negative. Skipping file '%s'.\n", fn);
return 1;
}
XMLNode *root = doc->nodes[doc->i_root];
if (!root) {
fprintf(stderr, "Root was NULL. Exiting.\n");
return 1;
}
print_article_title(root);
XMLDoc_free(doc);
return 0;
}
XMLNode* find_child_node(XMLNode *node, char* tagname) {
XMLNode** children = node->children;
XMLNode* next = NULL;
for (int i = 0; i < node->n_children; i++) {
if (!strcmp(children[i]->tag, tagname)) {
next = children[i];
break;
}
}
return next;
}
void print_article_title(XMLNode *root) {
XMLNode* next = NULL;
next = find_child_node(root, "front");
if (!next) {
fprintf(stderr, "Could not find front tag.\n");
return;
}
next = find_child_node(next, "article-meta");
if (!next) {
fprintf(stderr, "Could not find article-meta tag.\n");
return;
}
next = find_child_node(next, "title-group");
if (!next) {
fprintf(stderr, "Could not find title-group tag.\n");
return;
}
next = find_child_node(next, "article-title");
if (!next) {
fprintf(stderr, "Could not find article-title tag.\n");
return;
}
printf("article-title: %s\n", next->text);
}
int main(int argc, char *argv[]) {
for (int i = 1; i < argc; i++) {
process(argv[i]);
}
return 0;
}
|