summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2016-12-25 16:41:56 +0100
committerSilvan Jegen <s.jegen@gmail.com>2016-12-25 16:41:56 +0100
commit0ad0f3a85145f2b64cf5cbffa57cdb779180b724 (patch)
treeffe733b3b2eafe101700e56bd11be576c076afd4
parentbe3749d9f77e1410c11d0e8b7158a13e51faa41c (diff)
Implement our own scanner
This allows us to record byte offset and line number in the token.
-rw-r--r--conf/parser.go8
-rw-r--r--conf/scanner.go82
2 files changed, 65 insertions, 25 deletions
diff --git a/conf/parser.go b/conf/parser.go
index 21bc604..85d9b11 100644
--- a/conf/parser.go
+++ b/conf/parser.go
@@ -17,12 +17,12 @@ type parser struct {
func NewConfig(r io.Reader) *Config {
s := newScanner(r)
- tt, tok, err := s.Next()
+ tok, err := s.Scan()
for err == nil {
- fmt.Fprintf(os.Stderr, "tokentype: %d, token: %q, err: %v\n", tt, tok, err)
- tt, tok, err = s.Next()
+ fmt.Fprintf(os.Stderr, "tokentype: %d, token: %q offset: %d, line: %d\n", tok.Type, tok.Lit, tok.Offset, tok.LineNr)
+ tok, err = s.Scan()
}
- fmt.Fprintf(os.Stderr, "Error: tokentype: %d, token: %q, err: %v\n", tt, tok, err)
+ fmt.Fprintf(os.Stderr, "Error: tokentype: %d, token: %q, err: %v\n", tok.Type, tok.Lit, err)
return &Config{}
}
diff --git a/conf/scanner.go b/conf/scanner.go
index 172103f..3d83534 100644
--- a/conf/scanner.go
+++ b/conf/scanner.go
@@ -1,8 +1,12 @@
package conf
import (
- "bufio"
+ "fmt"
"io"
+ "io/ioutil"
+ "os"
+ "unicode"
+ "unicode/utf8"
)
type tokentype int
@@ -16,45 +20,81 @@ const (
)
type token struct {
- Type tokentype
+ Type tokentype
+ Offset int
+ LineNr int
+ Lit string
}
type scanner struct {
- r io.Reader
- bs bufio.Scanner
- prev token
- cur token
+ r io.Reader
+ data []byte
+ offset int
+ curline int
}
func newScanner(r io.Reader) *scanner {
- ns := bufio.NewScanner(r)
- ns.Split(bufio.ScanWords)
+ // TODO: don't be lazy
+ data, err := ioutil.ReadAll(r)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "could not read data from Reader: %v\n", err)
+ os.Exit(1)
+ }
sc := scanner{
- r: r,
- bs: *ns,
+ data: data,
+ curline: 1,
}
return &sc
}
-func getTokenType(s string) tokentype {
+func getTokenType(s []byte) tokentype {
return Nothing
}
-func (s *scanner) Next() (tokentype, string, error) {
- more := s.bs.Scan()
- if !more {
- err := s.bs.Err()
- if err != nil {
- return Nothing, "", err
+func (s *scanner) Scan() (token, error) {
+ processed := 0
+ tokenstarted := false
+ oldline := s.curline
+ for {
+ r, rlen := utf8.DecodeRune(s.data[s.offset+processed:])
+ if r == utf8.RuneError {
+ if rlen == 1 {
+ return token{}, fmt.Errorf("found invalid UTF8 at offset %d, before: %s", s.offset, s.data[s.offset])
+ } else if rlen == 0 {
+ return token{}, io.EOF
+ }
+ }
+
+ processed += rlen
+
+ if unicode.IsSpace(r) {
+ if r == '\n' {
+ s.curline++
+ }
+ if tokenstarted {
+ break
+ }
+ s.offset += rlen
+ processed = 0
+ continue
}
- return Nothing, "", io.EOF
+ tokenstarted = true
}
- tokstr := s.bs.Text()
- token := getTokenType(tokstr)
- return token, tokstr, nil
+ tokbytes := s.data[s.offset : s.offset+processed-1]
+ tokent := getTokenType(tokbytes)
+
+ s.offset += processed
+
+ ret := token{
+ Type: tokent,
+ Offset: s.offset,
+ LineNr: oldline,
+ Lit: string(tokbytes),
+ }
+ return ret, nil
}
func (s *scanner) peek() (tokentype, string, error) {