summaryrefslogtreecommitdiff
path: root/unxml.go
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2015-06-02 21:00:13 +0200
committerSilvan Jegen <s.jegen@gmail.com>2015-06-02 21:00:13 +0200
commitbe5eb42e23d5625f42147275a9f3c279979567bb (patch)
treeb5887f6e9089b7b943ace3096ef6941a305861a8 /unxml.go
parent68707bbd9e45f4ec28abc7f099fbf72980545070 (diff)
Use composition
Diffstat (limited to 'unxml.go')
-rw-r--r--unxml.go72
1 files changed, 50 insertions, 22 deletions
diff --git a/unxml.go b/unxml.go
index c34f29f..cf3f238 100644
--- a/unxml.go
+++ b/unxml.go
@@ -18,6 +18,21 @@ type Reader struct {
tokenizer *html.Tokenizer
}
+type ElementReader struct {
+ r Reader
+ tagsinstack map[string]int
+}
+
+//type stack []string
+//
+//func (s stack) Empty() bool { return len(s) == 0 }
+//func (s stack) Peek() string { return s[len(s)-1] }
+//func (s *stack) Pop() string {
+// d := (*s)[len(*s)-1]
+// (*s) = (*s)[:len(*s)-1]
+// return d
+//}
+
func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader {
var tagmap map[string]bool
@@ -36,52 +51,65 @@ func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader {
}
}
-func (r *Reader) Read(out []byte) (int, error) {
+func (r *ElementReader) Read(out []byte) (int, error) {
fmt.Fprintf(os.Stderr, "Read has been called.\n")
var err error
- r.count = 0
+ r.r.count = 0
n := 0
lenout := len(out)
if lenout == 0 {
- return r.count, nil
+ return r.r.count, nil
}
- lenlr := len(r.lastread)
+ lenlr := len(r.r.lastread)
if lenlr > 0 {
- n = copy(out[0:], r.lastread)
- r.count += n
- r.lastread = make([]byte, len(out))
+ n = copy(out[0:], r.r.lastread)
+ r.r.count += n
+ r.r.lastread = make([]byte, len(out))
lenout -= n
}
for {
- tt := r.tokenizer.Next()
+ tt := r.r.tokenizer.Next()
switch tt {
case html.ErrorToken:
- fmt.Fprintf(os.Stderr, "There was an error when parsing the html: %s, %s\n", tt, r.tokenizer.Err())
- return r.count, io.EOF
+ //fmt.Fprintf(os.Stderr, "There was an error when parsing the html: %s, %s\n", tt, r.r.tokenizer.Err())
+ return r.r.count, io.EOF
case html.TextToken:
- text := r.tokenizer.Text()
+ text := r.r.tokenizer.Text()
lentext := len(text)
if lentext <= lenout {
- n = copy(out[r.count:], text)
- r.count += n
+ n = copy(out[r.r.count:], text)
+ r.r.count += n
lenout -= n
- //fmt.Printf("HAD SPACE: %q, count: %d, err: %s\n", text, r.count, err)
+ //fmt.Printf("HAD SPACE: %q, count: %d, err: %s\n", text, r.r.count, err)
} else {
- n = copy(out[r.count:], text[:lenout-1])
- r.count += n
- r.lastread = text[lenout-1:]
- //fmt.Printf("HAD NO SPACE: count: %d, err: %s\n", r.count, err)
- return r.count, err
+ n = copy(out[r.r.count:], text[:lenout-1])
+ r.r.count += n
+ r.r.lastread = text[lenout-1:]
+ //fmt.Printf("HAD NO SPACE: count: %d, err: %s\n", r.r.count, err)
+ return r.r.count, err
}
- case html.StartTagToken, html.EndTagToken:
- _, _ = r.tokenizer.TagName()
- //fmt.Printf("TagName: %s\n", tn)
+ case html.StartTagToken:
+ tn, _ := r.r.tokenizer.TagName()
+ if _, ok := r.r.tagmap[string(tn)]; ok {
+ }
+ fmt.Printf("TagName: %s\n", tn)
+
+ case html.EndTagToken:
+ tn, _ := r.r.tokenizer.TagName()
+ fmt.Printf("TagEndName: %s\n", tn)
+ if count, ok := r.tagsinstack[string(tn)]; ok {
+ if count == 1 {
+ delete(r.tagsinstack, string(tn))
+ } else {
+ r.tagsinstack[string(tn)]--
+ }
+ }
}
}
}