diff options
author | Silvan Jegen <s.jegen@gmail.com> | 2015-06-14 17:09:02 +0200 |
---|---|---|
committer | Silvan Jegen <s.jegen@gmail.com> | 2015-06-14 17:09:02 +0200 |
commit | fb3e447995699ca216c66f120fa17becc94475e2 (patch) | |
tree | 9ea16b6a1c7276c34fee5600f092e090cd5bd770 |
Initial commit
-rw-r--r-- | htmlfilter.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/htmlfilter.go b/htmlfilter.go new file mode 100644 index 0000000..dff1188 --- /dev/null +++ b/htmlfilter.go @@ -0,0 +1,71 @@ +/* See LICENSE file for copyright and license details. */ + +package main + +import ( + "fmt" + "io" + "os" + + "github.com/Shugyousha/unxml" +) + +func main() { + var unxmlreader io.Reader + + f, err := os.Open(os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err) + } + + //unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) + //lineno := 0 + + //bbuffer := make([]byte, 100) + //for { + + // n, err = unxmlreader.Read(bbuffer) + // if err != nil { + // if err == io.EOF || n == 0 { + // break + // } + // fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) + // break + // } + + // lineno++ + // //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) + // fmt.Printf("%s", bbuffer[:n]) + //} + //fmt.Printf("%s", bbuffer[:n]) + + //f.Close() + //f, err = os.Open(os.Args[1]) + //if err != nil { + // fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err) + //} + + //unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0)) + unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"}) + bufno := 0 + + bbuffer := make([]byte, 4096) + //bbuffer := make([]byte, 100) + var n int + for { + + n, err = unxmlreader.Read(bbuffer) + if err != nil { + if err == io.EOF { + fmt.Printf("%s", bbuffer[:n]) + break + } + fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err) + break + } + + bufno++ + //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) + fmt.Printf("%s", bbuffer[:n]) + } +} |