diff options
| -rw-r--r-- | htmlfilter.go | 71 | 
1 files changed, 71 insertions, 0 deletions
diff --git a/htmlfilter.go b/htmlfilter.go new file mode 100644 index 0000000..dff1188 --- /dev/null +++ b/htmlfilter.go @@ -0,0 +1,71 @@ +/* See LICENSE file for copyright and license details. */ + +package main + +import ( +	"fmt" +	"io" +	"os" + +	"github.com/Shugyousha/unxml" +) + +func main() { +	var unxmlreader io.Reader + +	f, err := os.Open(os.Args[1]) +	if err != nil { +		fmt.Fprintf(os.Stderr, "Error when opening file: %s\n", err) +	} + +	//unxmlreader = unxml.NewReaderKeepTags(f, make([]string, 0)) +	//lineno := 0 + +	//bbuffer := make([]byte, 100) +	//for { + +	//	n, err = unxmlreader.Read(bbuffer) +	//	if err != nil { +	//		if err == io.EOF || n == 0 { +	//			break +	//		} +	//		fmt.Printf("Error while reading file at line nr. %d: %v\n", lineno, err) +	//		break +	//	} + +	//	lineno++ +	//	//fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) +	//	fmt.Printf("%s", bbuffer[:n]) +	//} +	//fmt.Printf("%s", bbuffer[:n]) + +	//f.Close() +	//f, err = os.Open(os.Args[1]) +	//if err != nil { +	//	fmt.Fprintf(os.Stderr, "Error when opening file for the second time: %s\n", err) +	//} + +	//unxmlreader = unxml.NewReaderKeepElements(f, make([]string, 0)) +	unxmlreader = unxml.NewReaderKeepElements(f, []string{"div", "p"}) +	bufno := 0 + +	bbuffer := make([]byte, 4096) +	//bbuffer := make([]byte, 100) +	var n int +	for { + +		n, err = unxmlreader.Read(bbuffer) +		if err != nil { +			if err == io.EOF { +				fmt.Printf("%s", bbuffer[:n]) +				break +			} +			fmt.Printf("Error while reading file at line nr. %d: %v\n", bufno, err) +			break +		} + +		bufno++ +		//fmt.Printf("Buffer nbr. %d: %s\n", lineno, bbuffer[:n]) +		fmt.Printf("%s", bbuffer[:n]) +	} +}  | 
