From 92213d258a788c6ba604669ee9c800d2d6a8cf1a Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Sun, 4 Oct 2015 16:01:49 +0200 Subject: Keep no tags --- htmlfilter.go | 47 ++++++++++------------------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/htmlfilter.go b/htmlfilter.go index dff1188..e5de3de 100644 --- a/htmlfilter.go +++ b/htmlfilter.go @@ -12,60 +12,33 @@ import ( func main() { var unxmlreader io.Reader + var n int f, err := os.Open(os.Args[1]) if err != nil { fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err) } - //unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) - //lineno := 0 - - //bbuffer := make([]byte, 100) - //for { - - // n, err = unxmlreader.Read(bbuffer) - // if err != nil { - // if err == io.EOF || n == 0 { - // break - // } - // fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) - // break - // } - - // lineno++ - // //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) - // fmt.Printf("%s", bbuffer[:n]) - //} - //fmt.Printf("%s", bbuffer[:n]) + unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) + lineno := 0 - //f.Close() - //f, err = os.Open(os.Args[1]) - //if err != nil { - // fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err) - //} - - //unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0)) - unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"}) - bufno := 0 - - bbuffer := make([]byte, 4096) - //bbuffer := make([]byte, 100) - var n int + bbuffer := make([]byte, 100) for { n, err = unxmlreader.Read(bbuffer) if err != nil { - if err == io.EOF { - fmt.Printf("%s", bbuffer[:n]) + if err == io.EOF || n == 0 { break } - fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err) + fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) break } - bufno++ + lineno++ //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) fmt.Printf("%s", bbuffer[:n]) } + fmt.Printf("%s", bbuffer[:n]) + + f.Close() } -- cgit v1.2.1-18-gbd029