diff options
| author | Silvan Jegen <s.jegen@gmail.com> | 2015-06-14 17:09:02 +0200 | 
|---|---|---|
| committer | Silvan Jegen <s.jegen@gmail.com> | 2015-06-14 17:09:02 +0200 | 
| commit | fb3e447995699ca216c66f120fa17becc94475e2 (patch) | |
| tree | 9ea16b6a1c7276c34fee5600f092e090cd5bd770 | |
Initial commit
| -rw-r--r-- | htmlfilter.go | 71 | 
1 files changed, 71 insertions, 0 deletions
| diff --git a/htmlfilter.go b/htmlfilter.go new file mode 100644 index 0000000..dff1188 --- /dev/null +++ b/htmlfilter.go @@ -0,0 +1,71 @@ +/* See LICENSE file for copyright and license details. */ + +package main + +import ( +	"fmt" +	"io" +	"os" + +	"github.com/Shugyousha/unxml" +) + +func main() { +	var unxmlreader io.Reader + +	f, err := os.Open(os.Args[1]) +	if err != nil { +		fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err) +	} + +	//unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) +	//lineno := 0 + +	//bbuffer := make([]byte, 100) +	//for { + +	//	n, err = unxmlreader.Read(bbuffer) +	//	if err != nil { +	//		if err == io.EOF || n == 0 { +	//			break +	//		} +	//		fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) +	//		break +	//	} + +	//	lineno++ +	//	//fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) +	//	fmt.Printf("%s", bbuffer[:n]) +	//} +	//fmt.Printf("%s", bbuffer[:n]) + +	//f.Close() +	//f, err = os.Open(os.Args[1]) +	//if err != nil { +	//	fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err) +	//} + +	//unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0)) +	unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"}) +	bufno := 0 + +	bbuffer := make([]byte, 4096) +	//bbuffer := make([]byte, 100) +	var n int +	for { + +		n, err = unxmlreader.Read(bbuffer) +		if err != nil { +			if err == io.EOF { +				fmt.Printf("%s", bbuffer[:n]) +				break +			} +			fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err) +			break +		} + +		bufno++ +		//fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) +		fmt.Printf("%s", bbuffer[:n]) +	} +} | 
