diff options
author | Silvan Jegen <s.jegen@gmail.com> | 2015-06-05 19:30:29 +0200 |
---|---|---|
committer | Silvan Jegen <s.jegen@gmail.com> | 2015-06-05 19:30:29 +0200 |
commit | 6a1cb2df856bdaa67f6a41c0e58bfceb068efbf1 (patch) | |
tree | 40a498f9c9fc5206b128983dbca671161a7c1859 /unxml.go | |
parent | 7c2edc757508a77f71ea7cdf24eda955cdaa9925 (diff) |
Add the KeepTags Read implementation
Diffstat (limited to 'unxml.go')
-rw-r--r-- | unxml.go | 52 |
1 files changed, 52 insertions, 0 deletions
@@ -51,6 +51,58 @@ func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader { } } +func (r *Reader) Read(out []byte) (int, error) { + var err error + + r.count = 0 + n := 0 + lenout := len(out) + if lenout == 0 { + return r.count, nil + } + + lenlr := len(r.lastread) + if lenlr > 0 { + n = copy(out[0:], r.lastread) + r.count += n + r.lastread = make([]byte, len(out)) + lenout -= n + } + + for { + tt := r.tokenizer.Next() + + switch tt { + case html.ErrorToken: + return r.count, io.EOF + + case html.TextToken: + text := r.tokenizer.Text() + lentext := len(text) + if lentext <= lenout { + n = copy(out[r.count:], text) + r.count += n + lenout -= n + } else { + n = copy(out[r.count:], text[:lenout-1]) + r.count += n + r.lastread = text[lenout-1:] + return r.count, err + } + + case html.StartTagToken: + tn, _ := r.tokenizer.TagName() + if _, ok := r.tagmap[string(tn)]; ok { + } + //fmt.Printf("TagName: %s\n", tn) + + case html.EndTagToken: + _, _ = r.tokenizer.TagName() + //fmt.Printf("TagEndName: %s\n", tn) + } + } +} + func (r *ElementReader) Read(out []byte) (int, error) { fmt.Fprintf(os.Stderr, "Read has been called.\n") var err error |