From 6a1cb2df856bdaa67f6a41c0e58bfceb068efbf1 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Fri, 5 Jun 2015 19:30:29 +0200 Subject: Add the KeepTags Read implementation --- unxml.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'unxml.go') diff --git a/unxml.go b/unxml.go index 80e71d7..6dfcf05 100644 --- a/unxml.go +++ b/unxml.go @@ -51,6 +51,58 @@ func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader { } } +func (r *Reader) Read(out []byte) (int, error) { + var err error + + r.count = 0 + n := 0 + lenout := len(out) + if lenout == 0 { + return r.count, nil + } + + lenlr := len(r.lastread) + if lenlr > 0 { + n = copy(out[0:], r.lastread) + r.count += n + r.lastread = make([]byte, len(out)) + lenout -= n + } + + for { + tt := r.tokenizer.Next() + + switch tt { + case html.ErrorToken: + return r.count, io.EOF + + case html.TextToken: + text := r.tokenizer.Text() + lentext := len(text) + if lentext <= lenout { + n = copy(out[r.count:], text) + r.count += n + lenout -= n + } else { + n = copy(out[r.count:], text[:lenout-1]) + r.count += n + r.lastread = text[lenout-1:] + return r.count, err + } + + case html.StartTagToken: + tn, _ := r.tokenizer.TagName() + if _, ok := r.tagmap[string(tn)]; ok { + } + //fmt.Printf("TagName: %s\n", tn) + + case html.EndTagToken: + _, _ = r.tokenizer.TagName() + //fmt.Printf("TagEndName: %s\n", tn) + } + } +} + func (r *ElementReader) Read(out []byte) (int, error) { fmt.Fprintf(os.Stderr, "Read has been called.\n") var err error -- cgit v1.2.1-18-gbd029