summaryrefslogtreecommitdiff
path: root/unxml.go
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2015-06-05 19:30:29 +0200
committerSilvan Jegen <s.jegen@gmail.com>2015-06-05 19:30:29 +0200
commit6a1cb2df856bdaa67f6a41c0e58bfceb068efbf1 (patch)
tree40a498f9c9fc5206b128983dbca671161a7c1859 /unxml.go
parent7c2edc757508a77f71ea7cdf24eda955cdaa9925 (diff)
Add the KeepTags Read implementation
Diffstat (limited to 'unxml.go')
-rw-r--r--unxml.go52
1 files changed, 52 insertions, 0 deletions
diff --git a/unxml.go b/unxml.go
index 80e71d7..6dfcf05 100644
--- a/unxml.go
+++ b/unxml.go
@@ -51,6 +51,58 @@ func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader {
}
}
+func (r *Reader) Read(out []byte) (int, error) {
+ var err error
+
+ r.count = 0
+ n := 0
+ lenout := len(out)
+ if lenout == 0 {
+ return r.count, nil
+ }
+
+ lenlr := len(r.lastread)
+ if lenlr > 0 {
+ n = copy(out[0:], r.lastread)
+ r.count += n
+ r.lastread = make([]byte, len(out))
+ lenout -= n
+ }
+
+ for {
+ tt := r.tokenizer.Next()
+
+ switch tt {
+ case html.ErrorToken:
+ return r.count, io.EOF
+
+ case html.TextToken:
+ text := r.tokenizer.Text()
+ lentext := len(text)
+ if lentext <= lenout {
+ n = copy(out[r.count:], text)
+ r.count += n
+ lenout -= n
+ } else {
+ n = copy(out[r.count:], text[:lenout-1])
+ r.count += n
+ r.lastread = text[lenout-1:]
+ return r.count, err
+ }
+
+ case html.StartTagToken:
+ tn, _ := r.tokenizer.TagName()
+ if _, ok := r.tagmap[string(tn)]; ok {
+ }
+ //fmt.Printf("TagName: %s\n", tn)
+
+ case html.EndTagToken:
+ _, _ = r.tokenizer.TagName()
+ //fmt.Printf("TagEndName: %s\n", tn)
+ }
+ }
+}
+
func (r *ElementReader) Read(out []byte) (int, error) {
fmt.Fprintf(os.Stderr, "Read has been called.\n")
var err error