\pdfminorversion=4 % This is needed for impressive to work with this file (only functions when using pdflatex it seems) \documentclass{beamer} % \usetheme{Frankfurt} \usetheme{Gopher} \usecolortheme{Gopher} \usepackage{german} \usepackage{qtree} \usepackage{graphicx} %\usepackage{covington} \usepackage{ulem} \begin{document} \title{stasher - Conceptionally exploring logstash in Go} \author{Silvan Jegen} \date{\today} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Contents} \tableofcontents \end{frame} \section{Logstash} \subsection{What is it?} \begin{frame}\frametitle{Uses} Extensible Markup Language (XML) \pause \begin{block}{XML aspects} \begin{itemize} \item Well-formedness \item Validation \item Namespaces \item Entities \end{itemize} \end{block} \end{frame} \begin{frame}\frametitle{XML in theory} \begin{block}{Related specifications} \begin{itemize} \item XSLT \item XPath \item XQuery \item XML Encryption \item ... \end{itemize} \end{block} \end{frame} \begin{frame}\frametitle{XML-based formats} \begin{block}{Variants} \begin{itemize} \item RDF XML \item XMPP \item EPUB \item XHTML \item ... \item 200+ more \end{itemize} \end{block} \end{frame} \subsection{XML in practice} \begin{frame}\frametitle{XML in practice} \Huge XML in practice \end{frame} \begin{frame}\frametitle{XML in practice} \begin{block}{Enterprise usage} \begin{itemize} \item SOAP \item Configuration \item Data storage/exchange \item Java ecosystem... \end{itemize} \end{block} \end{frame} \begin{frame}[fragile]\frametitle{Markup} Annotate parts of text with additional information \pause \begin{block}{Text Markup} \begin{verbatim} Some text some other text that should be tagged even more text... \end{verbatim} \end{block} \end{frame} \begin{frame}[fragile]\frametitle{XML vs. JSON} \begin{block}{XML} \begin{verbatim} Some text some other text that should be tagged even more text... \end{verbatim} \end{block} \pause \begin{block}{JSON} \begin{verbatim} ["Some text ", {"t": "tag", "s": "some other text that should be tagged"}, "even", {"t": "tag2", "s": "more"}, "text..."] \end{verbatim} \end{block} \end{frame} \section{Dealing with XML} \begin{frame}\frametitle{Dealing with XML} \Huge Dealing with XML \end{frame} \subsection{Programming interfaces} \begin{frame}\frametitle{Programming interfaces} \begin{itemize} \item Stream-oriented (SAX, Stax) \item Tree traversal (DOM) \item XML Data binding \item Transformation languages (XSLT, XQuery) \pause \item Other? \end{itemize} \end{frame} \subsection{Benchmark} \begin{frame}\frametitle{Benchmark} \begin{itemize} \item ezxml \item Golang encoding/xml \item mxml ('Mini-XML', not 'Minimal XML') \item Python 2 ElementTree \item sxmlc \item yxml \end{itemize} All code available at: git://git.sillymon.ch/slcon3.git \end{frame} \begin{frame}\frametitle{Benchmark setup} Linux machine, i7 CPU, 8GB RAM \begin{itemize} \item 627MB of XML in 10'000 files from PubMed Central \item Printing the article titles \item 20 runs (after cache warming) \item Single-threaded (except for Go) \end{itemize} \end{frame} \begin{frame}[fragile]\frametitle{ezxml} \begin{block}{ezxml} \begin{itemize} \item Program: 21 lines \item Library: 623 lines \item URL: http://ezxml.sourceforge.net/ \item Type: DOM (Level 3; XPath) \end{itemize} \end{block} \begin{verbatim} ezxml_t title = ezxml_get(ezdoc, "front", 0,\ "article-meta", 0, "title-group", 0,\ "article-title", -1); \end{verbatim} \end{frame} \begin{frame}[fragile]\frametitle{Go encoding/xml} \begin{block}{Go encoding/xml} \begin{itemize} \item Program: 30 lines \item Library: 6235 lines (stdlib) \item URL: https://golang.org/pkg/encoding/xml/ \item Type: XML Data binding \end{itemize} \end{block} \begin{verbatim} type article struct { Title string `xml:"front>article-meta>title-group>\ article-title"` } \end{verbatim} \end{frame} \begin{frame}[fragile]\frametitle{mxml} \begin{block}{mxml} \begin{itemize} \item Program: 38 lines \item Library: 9633 lines \item URL: http://www.minixml.org/ \item Type: DOM (Level 3; Xpath)? \end{itemize} \end{block} \begin{verbatim} node = mxmlFindElement(root, root, "title-group", NULL, NULL, MXML_DESCEND); \end{verbatim} \end{frame} \begin{frame}[fragile]\frametitle{Python 2 ElementTree} \begin{block}{Python 2 ElementTree} \begin{itemize} \item Program: 12 lines \item Library: 1107 lines (stdlib) \item URL: https://docs.python.org/2/library/xml.etree.element\-tree.html \item Type: DOM (Level 3; Xpath)? \end{itemize} \end{block} \begin{verbatim} tg = r.findall("./front/article-meta/title-group") at = tg[0].find("article-title") \end{verbatim} \end{frame} \begin{frame}[fragile]\frametitle{sxmlc} \begin{block}{sxmlc} \begin{itemize} \item Program: 59 lines \item Library: 2690 lines \item URL: http://sxmlc.sourceforge.net/ \item Type: DOM \end{itemize} \end{block} \begin{verbatim} const char *path[] = {"front", "article-meta",\ "title-group", "article-title", NULL}; for (int i = 0; path[i]; i++) { next = find_child_node(next, path[i]); if (!next) { fprintf(stderr, "Could not find '%s' tag.\n", path[i]); return; } } \end{verbatim} \end{frame} \begin{frame}[fragile]\frametitle{yxml} \begin{block}{yxml} \begin{itemize} \item Program: 103 lines \item Library: 1039 lines \item URL: https://dev.yorhel.nl/yxml \item Type: Stream-oriented \end{itemize} \end{block} \begin{verbatim} case YXML_ELEMSTART: if (!strcmp(state->elem, "title-group")) { intitlegroup = 1; } else if (!strcmp(state->elem, "article-title") && intitlegroup) { printf("%s: ", state->elem); inarticletitle = 1; } break; \end{verbatim} \end{frame} \begin{frame}\frametitle{Time \& Size} \begin{tabular}{ l | c | c | c | c | c } & mean & $\sigma$ & min & max & size \\ \hline Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\ Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\ mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\ sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\ ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\ yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K \end{tabular} \end{frame} \section{Conclusion} \begin{frame}[fragile]\frametitle{Conclusion} \begin{itemize} \item Complex specifications, (comparatively) hard to parse and verbose \item Use the ezxml or yxml libraries \pause \item Ok: \begin{verbatim} Some text some other text that should be tagged even more text... \end{verbatim} \pause \item No: \begin{verbatim} KeyVal \end{verbatim} \end{itemize} \end{frame} \begin{frame}\frametitle{Thanks for your attention} Questions? \end{frame} \end{document}