From 3018cef11b595dde582f22a48685f15756d18b9b Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Sun, 14 Jun 2015 15:57:00 +0200 Subject: The intag-state has to be preserved through sequential calls of Read --- unxml.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/unxml.go b/unxml.go index f3909dc..6478995 100644 --- a/unxml.go +++ b/unxml.go @@ -21,6 +21,7 @@ type Reader struct { type ElementReader struct { xr Reader tagsinstack map[string]int + intagtokeep bool } //type stack []string @@ -127,7 +128,7 @@ func (r *Reader) Read(out []byte) (int, error) { func (r *ElementReader) Read(out []byte) (int, error) { //fmt.Fprintf(os.Stderr, "Read has been called.\n") var err error - intagtokeep := true + //r.intagtokeep = true r.xr.count = 0 n := 0 @@ -159,7 +160,7 @@ func (r *ElementReader) Read(out []byte) (int, error) { return r.xr.count, io.EOF case html.TextToken: - if !intagtokeep { + if !r.intagtokeep { continue } text := r.xr.tokenizer.Text() @@ -178,14 +179,11 @@ func (r *ElementReader) Read(out []byte) (int, error) { } case html.StartTagToken: - if !intagtokeep { - continue - } tn, _ := r.xr.tokenizer.TagName() //fmt.Printf("TagNameStart: %s\n", tn) if _, ok := r.xr.tagmap[string(tn)]; ok { r.tagsinstack[string(tn)]++ - intagtokeep = true + r.intagtokeep = true raw := r.xr.tokenizer.Raw() //fmt.Printf("TokenRaw: %s\n", raw) if len(raw) < lenout { @@ -207,7 +205,7 @@ func (r *ElementReader) Read(out []byte) (int, error) { //fmt.Printf("TagEndNameInStack: %s, %d\n", tn, count) if count == 1 { delete(r.tagsinstack, string(tn)) - intagtokeep = false + r.intagtokeep = false } else { r.tagsinstack[string(tn)]-- } -- cgit v1.2.1-18-gbd029