summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2015-06-14 17:09:02 +0200
committerSilvan Jegen <s.jegen@gmail.com>2015-06-14 17:09:02 +0200
commitfb3e447995699ca216c66f120fa17becc94475e2 (patch)
tree9ea16b6a1c7276c34fee5600f092e090cd5bd770
Initial commit
-rw-r--r--htmlfilter.go71
1 files changed, 71 insertions, 0 deletions
diff --git a/htmlfilter.go b/htmlfilter.go
new file mode 100644
index 0000000..dff1188
--- /dev/null
+++ b/htmlfilter.go
@@ -0,0 +1,71 @@
+/* See LICENSE file for copyright and license details. */
+
+package main
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/Shugyousha/unxml"
+)
+
+func main() {
+ var unxmlreader io.Reader
+
+ f, err := os.Open(os.Args[1])
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err)
+ }
+
+ //unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0))
+ //lineno := 0
+
+ //bbuffer := make([]byte, 100)
+ //for {
+
+ // n, err = unxmlreader.Read(bbuffer)
+ // if err != nil {
+ // if err == io.EOF || n == 0 {
+ // break
+ // }
+ // fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err)
+ // break
+ // }
+
+ // lineno++
+ // //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n])
+ // fmt.Printf("%s", bbuffer[:n])
+ //}
+ //fmt.Printf("%s", bbuffer[:n])
+
+ //f.Close()
+ //f, err = os.Open(os.Args[1])
+ //if err != nil {
+ // fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err)
+ //}
+
+ //unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0))
+ unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"})
+ bufno := 0
+
+ bbuffer := make([]byte, 4096)
+ //bbuffer := make([]byte, 100)
+ var n int
+ for {
+
+ n, err = unxmlreader.Read(bbuffer)
+ if err != nil {
+ if err == io.EOF {
+ fmt.Printf("%s", bbuffer[:n])
+ break
+ }
+ fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err)
+ break
+ }
+
+ bufno++
+ //fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n])
+ fmt.Printf("%s", bbuffer[:n])
+ }
+}