summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvan Jegen <s.jegen@gmail.com>2017-01-16 21:04:42 +0100
committerSilvan Jegen <s.jegen@gmail.com>2017-01-16 21:04:42 +0100
commitcdc9dd8459b5e303fb9e979ffa34c5af932aa8df (patch)
treef80c1ca0ec07ee6150d84764787ecbc8604b8755
parentaa7a3501c67f435e7f34b19f2f3f87f910005a5a (diff)
Remove unneeded slide and work on the outline
-rw-r--r--stasherpresent.tex293
1 files changed, 51 insertions, 242 deletions
diff --git a/stasherpresent.tex b/stasherpresent.tex
index d74d4e5..3536572 100644
--- a/stasherpresent.tex
+++ b/stasherpresent.tex
@@ -12,7 +12,7 @@
\begin{document}
-\title{stasher - Conceptionally exploring logstash in Go}
+\title{stasher - Conceptionally exploring Logstash in Go}
\author{Silvan Jegen}
\date{\today}
@@ -27,281 +27,90 @@
\section{Logstash}
\subsection{What is it?}
-
-\begin{frame}\frametitle{Uses}
- Extensible Markup Language (XML) \pause
-
- \begin{block}{XML aspects}
- \begin{itemize}
- \item Well-formedness
- \item Validation
- \item Namespaces
- \item Entities
- \end{itemize}
- \end{block}
+\begin{frame}\frametitle{Logstash}
+ \begin{columns}[T]
+ \begin{column}{.5\textwidth}
+ \begin{center}
+ \Huge Logstash
+ \end{center}
+ \end{column}
+ \begin{column}{.5\textwidth}
+ \includegraphics[width=\textwidth]{icon-logstash-bb.png}
+ \end{column}
+ \end{columns}
\end{frame}
-\begin{frame}\frametitle{XML in theory}
- \begin{block}{Related specifications}
- \begin{itemize}
- \item XSLT
- \item XPath
- \item XQuery
- \item XML Encryption
- \item ...
- \end{itemize}
- \end{block}
+\begin{frame}\frametitle{What is it?}
+ \includegraphics[width=0.5\textwidth]{logstash-img1.png}
\end{frame}
-\begin{frame}\frametitle{XML-based formats}
- \begin{block}{Variants}
+\begin{frame}\frametitle{What is it?}
+ \begin{block}{Centralize, Transform \& Stash}
\begin{itemize}
- \item RDF XML
- \item XMPP
- \item EPUB
- \item XHTML
- \item ...
- \item 200+ more
+ \item Input
+ \item Filter
+ \item Outputs
\end{itemize}
\end{block}
\end{frame}
-\subsection{XML in practice}
-\begin{frame}\frametitle{XML in practice}
- \Huge XML in practice
+\begin{frame}\frametitle{Examples}
+ \begin{itemize}
+ \item Input:
+ \item Filter:
+ \item Outputs:
+ \end{itemize}
\end{frame}
+\subsection{How does it work?}
\begin{frame}\frametitle{XML in practice}
- \begin{block}{Enterprise usage}
- \begin{itemize}
- \item SOAP
- \item Configuration
- \item Data storage/exchange
- \item Java ecosystem...
- \end{itemize}
- \end{block}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{Markup}
-
- Annotate parts of text with additional information \pause
-
- \begin{block}{Text Markup}
- \begin{verbatim}
- <document>Some text <tag>some other text that
- should be tagged</tag> even <tag2>more</tag2>
- text...</document>
- \end{verbatim}
- \end{block}
+ \Huge XML in practice
\end{frame}
-\begin{frame}[fragile]\frametitle{XML vs. JSON}
- \begin{block}{XML}
+\begin{frame}[fragile]\frametitle{Configuration}
+ \begin{block}{Custom configuration language?}
\begin{verbatim}
- <document>Some text <tag>some other text that
- should be tagged</tag> even <tag2>more</tag2>
- text...</document>
- \end{verbatim}
- \end{block} \pause
- \begin{block}{JSON}
- \begin{verbatim}
- ["Some text ", {"t": "tag", "s": "some other text
- that should be tagged"}, "even", {"t": "tag2",
- "s": "more"}, "text..."]
+ input { stdin { } }
+ output {
+ elasticsearch { hosts => ["localhost:9200"] }
+ stdout { codec => rubydebug }
+ }
\end{verbatim}
\end{block}
\end{frame}
-\section{Dealing with XML}
-\begin{frame}\frametitle{Dealing with XML}
- \Huge Dealing with XML
+\section{stasher}
+\begin{frame}\frametitle{stasher}
+ \Huge stasher
\end{frame}
-\subsection{Programming interfaces}
-\begin{frame}\frametitle{Programming interfaces}
+\subsection{Why?}
+\begin{frame}\frametitle{Why?}
\begin{itemize}
- \item Stream-oriented (SAX, Stax)
- \item Tree traversal (DOM)
- \item XML Data binding
- \item Transformation languages (XSLT, XQuery) \pause
- \item Other?
+ \item Apparently Logstash is very slow \pause
+ \item I like Go
+ \item Generality \pause
+ \item Or not?
\end{itemize}
\end{frame}
-\subsection{Benchmark}
+\subsection{Implementation}
\begin{frame}\frametitle{Benchmark}
-
- \begin{itemize}
- \item ezxml
- \item Golang encoding/xml
- \item mxml ('Mini-XML', not 'Minimal XML')
- \item Python 2 ElementTree
- \item sxmlc
- \item yxml
- \end{itemize}
-
- All code available at:
-
- git://git.sillymon.ch/slcon3.git
-
-\end{frame}
-
-\begin{frame}\frametitle{Benchmark setup}
- Linux machine, i7 CPU, 8GB RAM
\begin{itemize}
- \item 627MB of XML in 10'000 files from PubMed Central
- \item Printing the article titles
- \item 20 runs (after cache warming)
- \item Single-threaded (except for Go)
+ \item Plugins?
+ \item Generality
+ \item DSL vs. Programming language balance?
\end{itemize}
\end{frame}
-\begin{frame}[fragile]\frametitle{ezxml}
- \begin{block}{ezxml}
- \begin{itemize}
- \item Program: 21 lines
- \item Library: 623 lines
- \item URL: http://ezxml.sourceforge.net/
- \item Type: DOM (Level 3; XPath)
- \end{itemize}
- \end{block}
- \begin{verbatim}
- ezxml_t title = ezxml_get(ezdoc, "front", 0,\
- "article-meta", 0, "title-group", 0,\
- "article-title", -1);
- \end{verbatim}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{Go encoding/xml}
- \begin{block}{Go encoding/xml}
- \begin{itemize}
- \item Program: 30 lines
- \item Library: 6235 lines (stdlib)
- \item URL: https://golang.org/pkg/encoding/xml/
- \item Type: XML Data binding
- \end{itemize}
- \end{block}
- \begin{verbatim}
- type article struct {
- Title string `xml:"front>article-meta>title-group>\
- article-title"`
- }
- \end{verbatim}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{mxml}
- \begin{block}{mxml}
- \begin{itemize}
- \item Program: 38 lines
- \item Library: 9633 lines
- \item URL: http://www.minixml.org/
- \item Type: DOM (Level 3; Xpath)?
- \end{itemize}
- \end{block}
- \begin{verbatim}
- node = mxmlFindElement(root, root, "title-group",
- NULL, NULL, MXML_DESCEND);
- \end{verbatim}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{Python 2 ElementTree}
- \begin{block}{Python 2 ElementTree}
- \begin{itemize}
- \item Program: 12 lines
- \item Library: 1107 lines (stdlib)
- \item URL:
-
- https://docs.python.org/2/library/xml.etree.element\-tree.html
-
- \item Type: DOM (Level 3; Xpath)?
- \end{itemize}
- \end{block}
- \begin{verbatim}
- tg = r.findall("./front/article-meta/title-group")
- at = tg[0].find("article-title")
- \end{verbatim}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{sxmlc}
- \begin{block}{sxmlc}
- \begin{itemize}
- \item Program: 59 lines
- \item Library: 2690 lines
- \item URL: http://sxmlc.sourceforge.net/
- \item Type: DOM
- \end{itemize}
- \end{block}
- \begin{verbatim}
- const char *path[] = {"front", "article-meta",\
- "title-group", "article-title", NULL};
- for (int i = 0; path[i]; i++) {
- next = find_child_node(next, path[i]);
- if (!next) {
- fprintf(stderr, "Could not find '%s'
- tag.\n", path[i]);
- return;
- }
- }
- \end{verbatim}
-\end{frame}
-
-\begin{frame}[fragile]\frametitle{yxml}
- \begin{block}{yxml}
- \begin{itemize}
- \item Program: 103 lines
- \item Library: 1039 lines
- \item URL: https://dev.yorhel.nl/yxml
- \item Type: Stream-oriented
- \end{itemize}
- \end{block}
- \begin{verbatim}
- case YXML_ELEMSTART:
- if (!strcmp(state->elem, "title-group")) {
- intitlegroup = 1;
- } else if (!strcmp(state->elem, "article-title")
- && intitlegroup) {
- printf("%s: ", state->elem);
- inarticletitle = 1;
- }
- break;
- \end{verbatim}
-\end{frame}
-
-
-\begin{frame}\frametitle{Time \& Size}
-
- \begin{tabular}{ l | c | c | c | c | c }
- & mean & $\sigma$ & min & max & size \\ \hline
- Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\
- Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\
- mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\
- sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\
- ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\
- yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K
- \end{tabular}
-
-\end{frame}
-
-
-\section{Conclusion}
-\begin{frame}[fragile]\frametitle{Conclusion}
+\section{Considerations?}
+\begin{frame}\frametitle{Considerations}
\begin{itemize}
- \item Complex specifications, (comparatively) hard to parse and verbose
- \item Use the ezxml or yxml libraries \pause
- \item Ok:
- \begin{verbatim}
- <document>Some text <tag>some other text that
- should be tagged</tag> even <tag2>more</tag2>
- text...</document>
- \end{verbatim} \pause
- \item No:
- \begin{verbatim}
- <thing><key>Key</key><value>Val</value></thing>
- \end{verbatim}
+ \item Plugins?
+ \item
\end{itemize}
\end{frame}
-
\begin{frame}\frametitle{Thanks for your attention}
Questions?