/* See LICENSE file for copyright and license details. */ package unxml import ( "bytes" "fmt" "io" "os" "golang.org/x/net/html" ) type Reader struct { reader io.Reader tagmap map[string]bool } func NewReaderKeepTags(r io.Reader, tagstokeep []string) *Reader { var tagmap map[string]bool if len(tagstokeep) > 0 { tagmap = make(map[string]bool, 10) for _, tag := range tagstokeep { tagmap[tag] = true } } else { tagmap = nil } return &Reader{reader: r, tagmap: tagmap, } } func (r *Reader) Read(out []byte) (n int, err error) { hr := html.NewTokenizer(r.reader) buf := bytes.NewBuffer(make([]byte, len(out))) depth := 0 for { tt := hr.Next() switch tt { case html.ErrorToken: fmt.Fprintf(os.Stderr, "There was an error when reading from the underlying os.Reader: %s", tt) return 0, hr.Err() case html.TextToken: if depth > 0 { // emitBytes should copy the []byte it receives, // if it doesn't process it immediately. n, err = buf.Write(hr.Text()) } case html.StartTagToken, html.EndTagToken: tn, _ := hr.TagName() if len(tn) == 1 && tn[0] == 'a' { if tt == html.StartTagToken { depth++ } else { depth-- } } } } return }