From cdc9dd8459b5e303fb9e979ffa34c5af932aa8df Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Mon, 16 Jan 2017 21:04:42 +0100 Subject: Remove unneeded slide and work on the outline --- stasherpresent.tex | 293 ++++++++++------------------------------------------- 1 file changed, 51 insertions(+), 242 deletions(-) diff --git a/stasherpresent.tex b/stasherpresent.tex index d74d4e5..3536572 100644 --- a/stasherpresent.tex +++ b/stasherpresent.tex @@ -12,7 +12,7 @@ \begin{document} -\title{stasher - Conceptionally exploring logstash in Go} +\title{stasher - Conceptionally exploring Logstash in Go} \author{Silvan Jegen} \date{\today} @@ -27,281 +27,90 @@ \section{Logstash} \subsection{What is it?} - -\begin{frame}\frametitle{Uses} - Extensible Markup Language (XML) \pause - - \begin{block}{XML aspects} - \begin{itemize} - \item Well-formedness - \item Validation - \item Namespaces - \item Entities - \end{itemize} - \end{block} +\begin{frame}\frametitle{Logstash} + \begin{columns}[T] + \begin{column}{.5\textwidth} + \begin{center} + \Huge Logstash + \end{center} + \end{column} + \begin{column}{.5\textwidth} + \includegraphics[width=\textwidth]{icon-logstash-bb.png} + \end{column} + \end{columns} \end{frame} -\begin{frame}\frametitle{XML in theory} - \begin{block}{Related specifications} - \begin{itemize} - \item XSLT - \item XPath - \item XQuery - \item XML Encryption - \item ... - \end{itemize} - \end{block} +\begin{frame}\frametitle{What is it?} + \includegraphics[width=0.5\textwidth]{logstash-img1.png} \end{frame} -\begin{frame}\frametitle{XML-based formats} - \begin{block}{Variants} +\begin{frame}\frametitle{What is it?} + \begin{block}{Centralize, Transform \& Stash} \begin{itemize} - \item RDF XML - \item XMPP - \item EPUB - \item XHTML - \item ... - \item 200+ more + \item Input + \item Filter + \item Outputs \end{itemize} \end{block} \end{frame} -\subsection{XML in practice} -\begin{frame}\frametitle{XML in practice} - \Huge XML in practice +\begin{frame}\frametitle{Examples} + \begin{itemize} + \item Input: + \item Filter: + \item Outputs: + \end{itemize} \end{frame} +\subsection{How does it work?} \begin{frame}\frametitle{XML in practice} - \begin{block}{Enterprise usage} - \begin{itemize} - \item SOAP - \item Configuration - \item Data storage/exchange - \item Java ecosystem... - \end{itemize} - \end{block} -\end{frame} - -\begin{frame}[fragile]\frametitle{Markup} - - Annotate parts of text with additional information \pause - - \begin{block}{Text Markup} - \begin{verbatim} - Some text some other text that - should be tagged even more - text... - \end{verbatim} - \end{block} + \Huge XML in practice \end{frame} -\begin{frame}[fragile]\frametitle{XML vs. JSON} - \begin{block}{XML} +\begin{frame}[fragile]\frametitle{Configuration} + \begin{block}{Custom configuration language?} \begin{verbatim} - Some text some other text that - should be tagged even more - text... - \end{verbatim} - \end{block} \pause - \begin{block}{JSON} - \begin{verbatim} - ["Some text ", {"t": "tag", "s": "some other text - that should be tagged"}, "even", {"t": "tag2", - "s": "more"}, "text..."] + input { stdin { } } + output { + elasticsearch { hosts => ["localhost:9200"] } + stdout { codec => rubydebug } + } \end{verbatim} \end{block} \end{frame} -\section{Dealing with XML} -\begin{frame}\frametitle{Dealing with XML} - \Huge Dealing with XML +\section{stasher} +\begin{frame}\frametitle{stasher} + \Huge stasher \end{frame} -\subsection{Programming interfaces} -\begin{frame}\frametitle{Programming interfaces} +\subsection{Why?} +\begin{frame}\frametitle{Why?} \begin{itemize} - \item Stream-oriented (SAX, Stax) - \item Tree traversal (DOM) - \item XML Data binding - \item Transformation languages (XSLT, XQuery) \pause - \item Other? + \item Apparently Logstash is very slow \pause + \item I like Go + \item Generality \pause + \item Or not? \end{itemize} \end{frame} -\subsection{Benchmark} +\subsection{Implementation} \begin{frame}\frametitle{Benchmark} - - \begin{itemize} - \item ezxml - \item Golang encoding/xml - \item mxml ('Mini-XML', not 'Minimal XML') - \item Python 2 ElementTree - \item sxmlc - \item yxml - \end{itemize} - - All code available at: - - git://git.sillymon.ch/slcon3.git - -\end{frame} - -\begin{frame}\frametitle{Benchmark setup} - Linux machine, i7 CPU, 8GB RAM \begin{itemize} - \item 627MB of XML in 10'000 files from PubMed Central - \item Printing the article titles - \item 20 runs (after cache warming) - \item Single-threaded (except for Go) + \item Plugins? + \item Generality + \item DSL vs. Programming language balance? \end{itemize} \end{frame} -\begin{frame}[fragile]\frametitle{ezxml} - \begin{block}{ezxml} - \begin{itemize} - \item Program: 21 lines - \item Library: 623 lines - \item URL: http://ezxml.sourceforge.net/ - \item Type: DOM (Level 3; XPath) - \end{itemize} - \end{block} - \begin{verbatim} - ezxml_t title = ezxml_get(ezdoc, "front", 0,\ - "article-meta", 0, "title-group", 0,\ - "article-title", -1); - \end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{Go encoding/xml} - \begin{block}{Go encoding/xml} - \begin{itemize} - \item Program: 30 lines - \item Library: 6235 lines (stdlib) - \item URL: https://golang.org/pkg/encoding/xml/ - \item Type: XML Data binding - \end{itemize} - \end{block} - \begin{verbatim} - type article struct { - Title string `xml:"front>article-meta>title-group>\ - article-title"` - } - \end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{mxml} - \begin{block}{mxml} - \begin{itemize} - \item Program: 38 lines - \item Library: 9633 lines - \item URL: http://www.minixml.org/ - \item Type: DOM (Level 3; Xpath)? - \end{itemize} - \end{block} - \begin{verbatim} - node = mxmlFindElement(root, root, "title-group", - NULL, NULL, MXML_DESCEND); - \end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{Python 2 ElementTree} - \begin{block}{Python 2 ElementTree} - \begin{itemize} - \item Program: 12 lines - \item Library: 1107 lines (stdlib) - \item URL: - - https://docs.python.org/2/library/xml.etree.element\-tree.html - - \item Type: DOM (Level 3; Xpath)? - \end{itemize} - \end{block} - \begin{verbatim} - tg = r.findall("./front/article-meta/title-group") - at = tg[0].find("article-title") - \end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{sxmlc} - \begin{block}{sxmlc} - \begin{itemize} - \item Program: 59 lines - \item Library: 2690 lines - \item URL: http://sxmlc.sourceforge.net/ - \item Type: DOM - \end{itemize} - \end{block} - \begin{verbatim} - const char *path[] = {"front", "article-meta",\ - "title-group", "article-title", NULL}; - for (int i = 0; path[i]; i++) { - next = find_child_node(next, path[i]); - if (!next) { - fprintf(stderr, "Could not find '%s' - tag.\n", path[i]); - return; - } - } - \end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{yxml} - \begin{block}{yxml} - \begin{itemize} - \item Program: 103 lines - \item Library: 1039 lines - \item URL: https://dev.yorhel.nl/yxml - \item Type: Stream-oriented - \end{itemize} - \end{block} - \begin{verbatim} - case YXML_ELEMSTART: - if (!strcmp(state->elem, "title-group")) { - intitlegroup = 1; - } else if (!strcmp(state->elem, "article-title") - && intitlegroup) { - printf("%s: ", state->elem); - inarticletitle = 1; - } - break; - \end{verbatim} -\end{frame} - - -\begin{frame}\frametitle{Time \& Size} - - \begin{tabular}{ l | c | c | c | c | c } - & mean & $\sigma$ & min & max & size \\ \hline - Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\ - Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\ - mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\ - sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\ - ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\ - yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K - \end{tabular} - -\end{frame} - - -\section{Conclusion} -\begin{frame}[fragile]\frametitle{Conclusion} +\section{Considerations?} +\begin{frame}\frametitle{Considerations} \begin{itemize} - \item Complex specifications, (comparatively) hard to parse and verbose - \item Use the ezxml or yxml libraries \pause - \item Ok: - \begin{verbatim} - Some text some other text that - should be tagged even more - text... - \end{verbatim} \pause - \item No: - \begin{verbatim} - KeyVal - \end{verbatim} + \item Plugins? + \item \end{itemize} \end{frame} - \begin{frame}\frametitle{Thanks for your attention} Questions? -- cgit v1.2.1-18-gbd029