summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--stasherpresent.tex311
1 files changed, 311 insertions, 0 deletions
diff --git a/stasherpresent.tex b/stasherpresent.tex
new file mode 100644
index 0000000..d74d4e5
--- /dev/null
+++ b/stasherpresent.tex
@@ -0,0 +1,311 @@
+\pdfminorversion=4 % This is needed for impressive to work with this file (only functions when using pdflatex it seems)
+\documentclass{beamer}
+% \usetheme{Frankfurt}
+\usetheme{Gopher}
+\usecolortheme{Gopher}
+\usepackage{german}
+\usepackage{qtree}
+\usepackage{graphicx}
+%\usepackage{covington}
+\usepackage{ulem}
+
+
+\begin{document}
+
+\title{stasher - Conceptionally exploring logstash in Go}
+\author{Silvan Jegen}
+\date{\today}
+
+\begin{frame}
+\titlepage
+\end{frame}
+
+\begin{frame}
+\frametitle{Contents} \tableofcontents
+\end{frame}
+
+
+\section{Logstash}
+\subsection{What is it?}
+
+\begin{frame}\frametitle{Uses}
+ Extensible Markup Language (XML) \pause
+
+ \begin{block}{XML aspects}
+ \begin{itemize}
+ \item Well-formedness
+ \item Validation
+ \item Namespaces
+ \item Entities
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}\frametitle{XML in theory}
+ \begin{block}{Related specifications}
+ \begin{itemize}
+ \item XSLT
+ \item XPath
+ \item XQuery
+ \item XML Encryption
+ \item ...
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}\frametitle{XML-based formats}
+ \begin{block}{Variants}
+ \begin{itemize}
+ \item RDF XML
+ \item XMPP
+ \item EPUB
+ \item XHTML
+ \item ...
+ \item 200+ more
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\subsection{XML in practice}
+\begin{frame}\frametitle{XML in practice}
+ \Huge XML in practice
+\end{frame}
+
+\begin{frame}\frametitle{XML in practice}
+ \begin{block}{Enterprise usage}
+ \begin{itemize}
+ \item SOAP
+ \item Configuration
+ \item Data storage/exchange
+ \item Java ecosystem...
+ \end{itemize}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{Markup}
+
+ Annotate parts of text with additional information \pause
+
+ \begin{block}{Text Markup}
+ \begin{verbatim}
+ <document>Some text <tag>some other text that
+ should be tagged</tag> even <tag2>more</tag2>
+ text...</document>
+ \end{verbatim}
+ \end{block}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{XML vs. JSON}
+ \begin{block}{XML}
+ \begin{verbatim}
+ <document>Some text <tag>some other text that
+ should be tagged</tag> even <tag2>more</tag2>
+ text...</document>
+ \end{verbatim}
+ \end{block} \pause
+ \begin{block}{JSON}
+ \begin{verbatim}
+ ["Some text ", {"t": "tag", "s": "some other text
+ that should be tagged"}, "even", {"t": "tag2",
+ "s": "more"}, "text..."]
+ \end{verbatim}
+ \end{block}
+\end{frame}
+
+\section{Dealing with XML}
+\begin{frame}\frametitle{Dealing with XML}
+ \Huge Dealing with XML
+\end{frame}
+
+\subsection{Programming interfaces}
+\begin{frame}\frametitle{Programming interfaces}
+ \begin{itemize}
+ \item Stream-oriented (SAX, Stax)
+ \item Tree traversal (DOM)
+ \item XML Data binding
+ \item Transformation languages (XSLT, XQuery) \pause
+ \item Other?
+ \end{itemize}
+\end{frame}
+
+\subsection{Benchmark}
+\begin{frame}\frametitle{Benchmark}
+
+ \begin{itemize}
+ \item ezxml
+ \item Golang encoding/xml
+ \item mxml ('Mini-XML', not 'Minimal XML')
+ \item Python 2 ElementTree
+ \item sxmlc
+ \item yxml
+ \end{itemize}
+
+ All code available at:
+
+ git://git.sillymon.ch/slcon3.git
+
+\end{frame}
+
+\begin{frame}\frametitle{Benchmark setup}
+ Linux machine, i7 CPU, 8GB RAM
+ \begin{itemize}
+ \item 627MB of XML in 10'000 files from PubMed Central
+ \item Printing the article titles
+ \item 20 runs (after cache warming)
+ \item Single-threaded (except for Go)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{ezxml}
+ \begin{block}{ezxml}
+ \begin{itemize}
+ \item Program: 21 lines
+ \item Library: 623 lines
+ \item URL: http://ezxml.sourceforge.net/
+ \item Type: DOM (Level 3; XPath)
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ ezxml_t title = ezxml_get(ezdoc, "front", 0,\
+ "article-meta", 0, "title-group", 0,\
+ "article-title", -1);
+ \end{verbatim}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{Go encoding/xml}
+ \begin{block}{Go encoding/xml}
+ \begin{itemize}
+ \item Program: 30 lines
+ \item Library: 6235 lines (stdlib)
+ \item URL: https://golang.org/pkg/encoding/xml/
+ \item Type: XML Data binding
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ type article struct {
+ Title string `xml:"front>article-meta>title-group>\
+ article-title"`
+ }
+ \end{verbatim}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{mxml}
+ \begin{block}{mxml}
+ \begin{itemize}
+ \item Program: 38 lines
+ \item Library: 9633 lines
+ \item URL: http://www.minixml.org/
+ \item Type: DOM (Level 3; Xpath)?
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ node = mxmlFindElement(root, root, "title-group",
+ NULL, NULL, MXML_DESCEND);
+ \end{verbatim}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{Python 2 ElementTree}
+ \begin{block}{Python 2 ElementTree}
+ \begin{itemize}
+ \item Program: 12 lines
+ \item Library: 1107 lines (stdlib)
+ \item URL:
+
+ https://docs.python.org/2/library/xml.etree.element\-tree.html
+
+ \item Type: DOM (Level 3; Xpath)?
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ tg = r.findall("./front/article-meta/title-group")
+ at = tg[0].find("article-title")
+ \end{verbatim}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{sxmlc}
+ \begin{block}{sxmlc}
+ \begin{itemize}
+ \item Program: 59 lines
+ \item Library: 2690 lines
+ \item URL: http://sxmlc.sourceforge.net/
+ \item Type: DOM
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ const char *path[] = {"front", "article-meta",\
+ "title-group", "article-title", NULL};
+ for (int i = 0; path[i]; i++) {
+ next = find_child_node(next, path[i]);
+ if (!next) {
+ fprintf(stderr, "Could not find '%s'
+ tag.\n", path[i]);
+ return;
+ }
+ }
+ \end{verbatim}
+\end{frame}
+
+\begin{frame}[fragile]\frametitle{yxml}
+ \begin{block}{yxml}
+ \begin{itemize}
+ \item Program: 103 lines
+ \item Library: 1039 lines
+ \item URL: https://dev.yorhel.nl/yxml
+ \item Type: Stream-oriented
+ \end{itemize}
+ \end{block}
+ \begin{verbatim}
+ case YXML_ELEMSTART:
+ if (!strcmp(state->elem, "title-group")) {
+ intitlegroup = 1;
+ } else if (!strcmp(state->elem, "article-title")
+ && intitlegroup) {
+ printf("%s: ", state->elem);
+ inarticletitle = 1;
+ }
+ break;
+ \end{verbatim}
+\end{frame}
+
+
+\begin{frame}\frametitle{Time \& Size}
+
+ \begin{tabular}{ l | c | c | c | c | c }
+ & mean & $\sigma$ & min & max & size \\ \hline
+ Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\
+ Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\
+ mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\
+ sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\
+ ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\
+ yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K
+ \end{tabular}
+
+\end{frame}
+
+
+\section{Conclusion}
+\begin{frame}[fragile]\frametitle{Conclusion}
+ \begin{itemize}
+ \item Complex specifications, (comparatively) hard to parse and verbose
+ \item Use the ezxml or yxml libraries \pause
+ \item Ok:
+ \begin{verbatim}
+ <document>Some text <tag>some other text that
+ should be tagged</tag> even <tag2>more</tag2>
+ text...</document>
+ \end{verbatim} \pause
+ \item No:
+ \begin{verbatim}
+ <thing><key>Key</key><value>Val</value></thing>
+ \end{verbatim}
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}\frametitle{Thanks for your attention}
+
+ Questions?
+
+\end{frame}
+
+\end{document}