package conf import ( "fmt" "io" "io/ioutil" "os" "unicode" "unicode/utf8" ) type tokentype int const ( Name tokentype = iota Literal ListDelimiter ObjectDelimiter EmptyList EmptyObject Nothing IfStatement ) type token struct { Type tokentype Offset int LineNr int Lit string } type scanner struct { data []byte offset int curline int peeked *token } func newScanner(r io.Reader) *scanner { // TODO: don't be lazy data, err := ioutil.ReadAll(r) if err != nil { fmt.Fprintf(os.Stderr, "could not read data from Reader: %v\n", err) os.Exit(1) } sc := scanner{ data: data, curline: 1, } return &sc } func getTokenType(s []byte) tokentype { switch s[0] { case '"', '\'': return Literal case '[', ']': return ListDelimiter case '{', '}': return ObjectDelimiter } return Name } func (s *scanner) Scan() (token, error) { processed := 0 tokenstarted := false oldline := s.curline if s.peeked != nil { ret := *s.peeked s.peeked = nil return ret, nil } for { r, rlen := utf8.DecodeRune(s.data[s.offset+processed:]) if r == utf8.RuneError { if rlen == 1 { return token{}, fmt.Errorf("found invalid UTF8 at offset %d, before: %s", s.offset, string(s.data[s.offset])) } else if rlen == 0 { return token{}, io.EOF } } processed += rlen if unicode.IsSpace(r) { if r == '\n' { s.curline++ } if tokenstarted { break } s.offset += rlen processed = 0 continue } tokenstarted = true } tokbytes := s.data[s.offset : s.offset+processed-1] tokent := getTokenType(tokbytes) s.offset += processed ret := token{ Type: tokent, Offset: s.offset, LineNr: oldline, Lit: string(tokbytes), } return ret, nil } func (s *scanner) Peek() (token, error) { if s.peeked != nil { return *s.peeked, nil } tok, err := s.Scan() if err != nil { return token{}, err } s.peeked = &tok return tok, nil }