From fb3e447995699ca216c66f120fa17becc94475e2 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Sun, 14 Jun 2015 17:09:02 +0200 Subject: Initial commit --- htmlfilter.go | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 htmlfilter.go diff --git a/htmlfilter.go b/htmlfilter.go new file mode 100644 index 0000000..dff1188 --- /dev/null +++ b/htmlfilter.go @@ -0,0 +1,71 @@ +/* See LICENSE file for copyright and license details. */ + +package main + +import ( + "fmt" + "io" + "os" + + "github.com/Shugyousha/unxml" +) + +func main() { + var unxmlreader io.Reader + + f, err := os.Open(os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err) + } + + //unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) + //lineno := 0 + + //bbuffer := make([]byte, 100) + //for { + + // n, err = unxmlreader.Read(bbuffer) + // if err != nil { + // if err == io.EOF || n == 0 { + // break + // } + // fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) + // break + // } + + // lineno++ + // //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) + // fmt.Printf("%s", bbuffer[:n]) + //} + //fmt.Printf("%s", bbuffer[:n]) + + //f.Close() + //f, err = os.Open(os.Args[1]) + //if err != nil { + // fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err) + //} + + //unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0)) + unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"}) + bufno := 0 + + bbuffer := make([]byte, 4096) + //bbuffer := make([]byte, 100) + var n int + for { + + n, err = unxmlreader.Read(bbuffer) + if err != nil { + if err == io.EOF { + fmt.Printf("%s", bbuffer[:n]) + break + } + fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err) + break + } + + bufno++ + //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) + fmt.Printf("%s", bbuffer[:n]) + } +} -- cgit v1.2.1-18-gbd029