summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2016-12-01 22:27:15 +0100
committerSilvan Jegen <s.jegen@gmail.com>2016-12-01 22:28:45 +0100
commite0d2ede90d34fc35440c414228a536c1c869dbfc (patch)
tree5293627e4b255f0a679a4eebb42569c0cf930404
parente75b77c73a89f3bcc1790ceb05fd1dc0051f15c2 (diff)
Make sure to get text in sub-elementsHEADmaster
We use the simpler API offered by the Go standard library to extract the article titles. I could not find another way to make sure I get the nested sub-elements.
-rw-r--r--goencxml.go51
1 files changed, 42 insertions, 9 deletions
diff --git a/goencxml.go b/goencxml.go
index b4fb7cf..35ddf5c 100644
--- a/goencxml.go
+++ b/goencxml.go
@@ -2,23 +2,56 @@ package main
import (
"bufio"
+ "bytes"
"encoding/xml"
"fmt"
+ "io"
"os"
)
-type article struct {
- Title string `xml:"front>article-meta>title-group>article-title"`
-}
-
func process(r *bufio.Reader) {
- var a article
+ var (
+ intitle bool
+ inarticlemeta bool
+ buffer bytes.Buffer
+ )
- err := xml.NewDecoder(r).Decode(&a)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error when decoding XML file %q\n", err)
+ dec := xml.NewDecoder(r)
+ for {
+ token, err := dec.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error when decoding XML file %q\n", err)
+ os.Exit(1)
+ }
+ switch t := token.(type) {
+ case xml.StartElement:
+ if t.Name.Local == "article-meta" {
+ inarticlemeta = true
+ continue
+ }
+ if t.Name.Local == "article-title" && inarticlemeta {
+ intitle = true
+ }
+ case xml.CharData:
+ if !intitle || !inarticlemeta {
+ continue
+ }
+ buffer.Write(t)
+ case xml.EndElement:
+ if t.Name.Local == "article-meta" {
+ inarticlemeta = false
+ continue
+ }
+ if t.Name.Local == "article-title" && inarticlemeta {
+ intitle = false
+ fmt.Printf("article-title: %s\n", buffer.String())
+ buffer.Reset()
+ }
+ }
}
- fmt.Printf("article-title: %s\n", a.Title)
}
func main() {