diff options
author | Silvan Jegen <s.jegen@gmail.com> | 2016-12-01 22:27:15 +0100 |
---|---|---|
committer | Silvan Jegen <s.jegen@gmail.com> | 2016-12-01 22:28:45 +0100 |
commit | e0d2ede90d34fc35440c414228a536c1c869dbfc (patch) | |
tree | 5293627e4b255f0a679a4eebb42569c0cf930404 | |
parent | e75b77c73a89f3bcc1790ceb05fd1dc0051f15c2 (diff) |
We use the simpler API offered by the Go standard library to extract
the article titles. I could not find another way to make sure I get the
nested sub-elements.
-rw-r--r-- | goencxml.go | 51 |
1 files changed, 42 insertions, 9 deletions
diff --git a/goencxml.go b/goencxml.go index b4fb7cf..35ddf5c 100644 --- a/goencxml.go +++ b/goencxml.go @@ -2,23 +2,56 @@ package main import ( "bufio" + "bytes" "encoding/xml" "fmt" + "io" "os" ) -type article struct { - Title string `xml:"front>article-meta>title-group>article-title"` -} - func process(r *bufio.Reader) { - var a article + var ( + intitle bool + inarticlemeta bool + buffer bytes.Buffer + ) - err := xml.NewDecoder(r).Decode(&a) - if err != nil { - fmt.Fprintf(os.Stderr, "Error when decoding XML file %q\n", err) + dec := xml.NewDecoder(r) + for { + token, err := dec.Token() + if err == io.EOF { + break + } + if err != nil { + fmt.Fprintf(os.Stderr, "Error when decoding XML file %q\n", err) + os.Exit(1) + } + switch t := token.(type) { + case xml.StartElement: + if t.Name.Local == "article-meta" { + inarticlemeta = true + continue + } + if t.Name.Local == "article-title" && inarticlemeta { + intitle = true + } + case xml.CharData: + if !intitle || !inarticlemeta { + continue + } + buffer.Write(t) + case xml.EndElement: + if t.Name.Local == "article-meta" { + inarticlemeta = false + continue + } + if t.Name.Local == "article-title" && inarticlemeta { + intitle = false + fmt.Printf("article-title: %s\n", buffer.String()) + buffer.Reset() + } + } } - fmt.Printf("article-title: %s\n", a.Title) } func main() { |