From 0d7a0ffce2f08695ab04a20c5de866c7aa8a8da2 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Sun, 19 Feb 2017 18:55:19 +0100 Subject: Add code examples --- stasherpresent.slide | 213 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 182 insertions(+), 31 deletions(-) diff --git a/stasherpresent.slide b/stasherpresent.slide index 5383ab0..c73f399 100644 --- a/stasherpresent.slide +++ b/stasherpresent.slide @@ -4,7 +4,7 @@ Prototyping a logstash alternative 23 February 2017 Silvan Jegen -Computational Linguist, BioVariance GmbH +Computational Linguist me@sillymon.ch https://sillymon.ch @@ -17,20 +17,33 @@ https://sillymon.ch * What is it? +- Now a project of elastic +- Used to be log-focused + .image img/logstash-img1.png +* What does it do? + +"Centralize, Transform & Stash your data" + +.image img/logstash-overview.png + + * "Centralize, Transform & Stash" -- Input -- Filters -- Outputs +- "Inputs" from Log files, DBs, HTTP +- "Filters" for cleaning and transforming +- "Outputs" for archiving, alerting, monitoring, etc. + + +* "Centralize, Transform & Stash" -Examples +Plugins -- Input: -- Filters: -- Outputs: +- Inputs: file, syslog, redis, ... +- Filters: grok, mutate, drop, ... +- Outputs: elasticsearch, file, graphite, email, ... * How does it work? @@ -40,66 +53,204 @@ Custom configuration language input { stdin {} } filter { - http { - "A" => "url:port" + anonymize { + algorithm => "SHA256" + fields => ["field1", "field2"] + key => "something" } } output { - elasticsearch { hosts => ["localhost:9200"] } - stdout { codec => rubydebug } + elasticsearch { + hosts => ["localhost:9200"] + } + + csv { + fields => ["field1", "[nested][field]"] + path => "./test-%{+YYYY-MM-dd}.txt" + } } -* Modules +* Plugins - output { - stdout { codec => rubydebug } + filter { + anonymize { + algorithm => "SHA256" + fields => ["field1", "field2"] + key => "something" + } } * Some statistics - Written in Ruby -- 10000 LOC in 999 files +- ~25K LOC in 377 files +- 360+ contributors +- 7'600+ commits +- 7'000+ stars * Stasher Why? -- Apparently Logstash is very slow (see ElasticSearch Bumbles) +- Apparently Logstash is very slow +- Generality of the work flow - I like Go -- Generality (error handling) -- Or not? * Implementation -- Plugins? -- Generality and error handling + +* Interfaces + + type Input interface { + Start() chan *work.Work + } + + + type Filter interface { + Filter(*work.Work) *work.Work + } + + + type Output interface { + Output(*work.Work) error + } + + +* Manager + + type Manager struct { + Input input.Input + Filter filter.Filter + Output output.Output + } + + +* Manager + + func (m *Manager) Run() { + var wg sync.WaitGroup + ic := m.Input.Start() + for w := range ic { + if w.Error() != nil { + fmt.Printf("Got an error when getting Work input: %q\n", w.Error()) + continue + } + wg.Add(1) + go func(w *work.Work) { + nw := m.Filter.Filter(w) + err := nw.Error() + if err != nil { + fmt.Printf("Got an error when filtering Work: %q\n", err) + } + err = m.Output.Output(nw) + if err != nil { + fmt.Printf("Got an error when outputting Work: %q\n", err) + } + wg.Done() + }(w) + } + wg.Wait() + } + +* Main advantages over shell script + +- Error handling + +- Declarative config + + +* Error handling + + for w := range ic { + if w.Err != nil { + fmt.Printf("Got an error when getting Work input: %q\n", w.Err) + continue + } + wg.Add(1) + go func(w *work.Work) { + nw := m.Filter.Filter(w) + err := nw.Error() + if err != nil { + fmt.Printf("Got an error when filtering Work: %q\n", err) + } + err = m.Output.Output(nw) + if err != nil { + fmt.Printf("Got an error when outputting Work: %q\n", err) + } + + wg.Done() + }(w) + } + + +* Config parser + +- Currently only supports string literals (no arrays) + +- Hand-written parser + +- Uses the Registry to get the modules + + +* Registry + +registry/registry.go + + var ( + Inputregistry map[string]func(map[string]string) input.Input + Filterregistry map[string]func(map[string]string) filter.Filter + Outputregistry map[string]func(map[string]string) output.Output + ) + + +* Registry + +input/http/http.go + + func init() { + registry.Inputregistry["http"] = New + } + + +* Registry + +conf/init.go + + import ( + // Initialize the different modules. By importing them in this + // way, their constructors are registered in the registry. + _ "github.com/Shugyousha/stasher/input/http" + _ "github.com/Shugyousha/stasher/input/stdin" + + _ "github.com/Shugyousha/stasher/filter/http" + _ "github.com/Shugyousha/stasher/filter/str" + + _ "github.com/Shugyousha/stasher/output/http" + _ "github.com/Shugyousha/stasher/output/stdout" + ) * Demo -* Todos +* High-level TODOs - Watch input directories - Multiple modules for each main module - Proper (configurable?) error handling -- DSL/declarative vs. Programming language balance? +- If else? * Considerations -- Plugins (Go 1.8!?) +- "Dynamic" Plugins (Go 1.8!?) +- Use HTTP for everything? - Better off with shell scripts? - -Further Text, including invocations like: - -.link http://foo label -.caption _Gopher_ by [[http://www.reneefrench.com][Renée French]] - -Again, more text +- Generality and error handling +- DSL/declarative vs. Programming language balance? -- cgit v1.2.1-18-gbd029