summaryrefslogtreecommitdiff
path: root/conf/scanner.go
blob: 975a8e4906b86ffb20b775d9f06cb9d81ecd88c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package conf

import (
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"unicode"
	"unicode/utf8"
)

type tokentype int

const (
	Name tokentype = iota
	Literal
	ListOpen
	ListClose
	ObjectOpen
	ObjectClose
	EmptyList
	EmptyObject
	Nothing
	IfStatement
)

var tokentypestrings []string = []string{Name: "Name", Literal: "Literal",
	ListOpen: "List Open", ListClose: "List Close", ObjectOpen: "Object Open",
	ObjectClose: "Object Close", Nothing: "Nothing", IfStatement: "If statement"}

func (tt tokentype) String() string {
	return tokentypestrings[tt]
}

type token struct {
	Type   tokentype
	Offset int
	LineNr int
	Lit    string
}

type scanner struct {
	data    []byte
	offset  int
	curline int

	peeked *token
}

func newScanner(r io.Reader) *scanner {
	// TODO: don't be lazy
	data, err := ioutil.ReadAll(r)
	if err != nil {
		fmt.Fprintf(os.Stderr, "could not read data from Reader: %v\n", err)
		os.Exit(1)
	}

	sc := scanner{
		data:    data,
		curline: 1,
	}

	return &sc
}

func getTokenType(s []byte) tokentype {
	switch s[0] {
	case '"', '\'':
		return Literal
	case '[':
		return ListOpen
	case ']':
		return ListClose
	case '{':
		return ObjectOpen
	case '}':
		return ObjectClose
	}

	return Name
}

func (s *scanner) Scan() (token, error) {
	processed := 0
	tokenstarted := false
	oldline := s.curline
	if s.peeked != nil {
		ret := *s.peeked
		s.peeked = nil
		return ret, nil
	}
	for {
		r, rlen := utf8.DecodeRune(s.data[s.offset+processed:])
		if r == utf8.RuneError {
			if rlen == 1 {
				return token{}, fmt.Errorf("found invalid UTF8 at offset %d, before: %s", s.offset, string(s.data[s.offset]))
			} else if rlen == 0 {
				return token{}, io.EOF
			}
		}

		processed += rlen

		if unicode.IsSpace(r) {
			if r == '\n' {
				s.curline++
			}
			if tokenstarted {
				break
			}
			s.offset += rlen
			processed = 0
			continue
		}
		tokenstarted = true
	}

	tokbytes := s.data[s.offset : s.offset+processed-1]
	tokent := getTokenType(tokbytes)

	s.offset += processed

	ret := token{
		Type:   tokent,
		Offset: s.offset,
		LineNr: oldline,
		Lit:    string(tokbytes),
	}
	return ret, nil
}

func (s *scanner) Peek() (token, error) {
	if s.peeked != nil {
		return *s.peeked, nil
	}

	tok, err := s.Scan()
	if err != nil {
		return token{}, err
	}
	s.peeked = &tok
	return tok, nil
}