diff options
-rw-r--r-- | unxml.go | 52 |
1 files changed, 52 insertions, 0 deletions
@@ -51,6 +51,58 @@ func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader { } } +func (r *Reader) Read(out []byte) (int, error) { + var err error + + r.count = 0 + n := 0 + lenout := len(out) + if lenout == 0 { + return r.count, nil + } + + lenlr := len(r.lastread) + if lenlr > 0 { + n = copy(out[0:], r.lastread) + r.count += n + r.lastread = make([]byte, len(out)) + lenout -= n + } + + for { + tt := r.tokenizer.Next() + + switch tt { + case html.ErrorToken: + return r.count, io.EOF + + case html.TextToken: + text := r.tokenizer.Text() + lentext := len(text) + if lentext <= lenout { + n = copy(out[r.count:], text) + r.count += n + lenout -= n + } else { + n = copy(out[r.count:], text[:lenout-1]) + r.count += n + r.lastread = text[lenout-1:] + return r.count, err + } + + case html.StartTagToken: + tn, _ := r.tokenizer.TagName() + if _, ok := r.tagmap[string(tn)]; ok { + } + //fmt.Printf("TagName: %s\n", tn) + + case html.EndTagToken: + _, _ = r.tokenizer.TagName() + //fmt.Printf("TagEndName: %s\n", tn) + } + } +} + func (r *ElementReader) Read(out []byte) (int, error) { fmt.Fprintf(os.Stderr, "Read has been called.\n") var err error |