diff options
author | Silvan Jegen <s.jegen@gmail.com> | 2017-01-16 20:12:49 +0100 |
---|---|---|
committer | Silvan Jegen <s.jegen@gmail.com> | 2017-01-16 20:12:49 +0100 |
commit | aa7a3501c67f435e7f34b19f2f3f87f910005a5a (patch) | |
tree | fcdb3085bf0792c83257b21d85384a80131fd3c1 |
Initial commit
-rw-r--r-- | stasherpresent.tex | 311 |
1 files changed, 311 insertions, 0 deletions
diff --git a/stasherpresent.tex b/stasherpresent.tex new file mode 100644 index 0000000..d74d4e5 --- /dev/null +++ b/stasherpresent.tex @@ -0,0 +1,311 @@ +\pdfminorversion=4 % This is needed for impressive to work with this file (only functions when using pdflatex it seems) +\documentclass{beamer} +% \usetheme{Frankfurt} +\usetheme{Gopher} +\usecolortheme{Gopher} +\usepackage{german} +\usepackage{qtree} +\usepackage{graphicx} +%\usepackage{covington} +\usepackage{ulem} + + +\begin{document} + +\title{stasher - Conceptionally exploring logstash in Go} +\author{Silvan Jegen} +\date{\today} + +\begin{frame} +\titlepage +\end{frame} + +\begin{frame} +\frametitle{Contents} \tableofcontents +\end{frame} + + +\section{Logstash} +\subsection{What is it?} + +\begin{frame}\frametitle{Uses} + Extensible Markup Language (XML) \pause + + \begin{block}{XML aspects} + \begin{itemize} + \item Well-formedness + \item Validation + \item Namespaces + \item Entities + \end{itemize} + \end{block} +\end{frame} + +\begin{frame}\frametitle{XML in theory} + \begin{block}{Related specifications} + \begin{itemize} + \item XSLT + \item XPath + \item XQuery + \item XML Encryption + \item ... + \end{itemize} + \end{block} +\end{frame} + +\begin{frame}\frametitle{XML-based formats} + \begin{block}{Variants} + \begin{itemize} + \item RDF XML + \item XMPP + \item EPUB + \item XHTML + \item ... + \item 200+ more + \end{itemize} + \end{block} +\end{frame} + +\subsection{XML in practice} +\begin{frame}\frametitle{XML in practice} + \Huge XML in practice +\end{frame} + +\begin{frame}\frametitle{XML in practice} + \begin{block}{Enterprise usage} + \begin{itemize} + \item SOAP + \item Configuration + \item Data storage/exchange + \item Java ecosystem... + \end{itemize} + \end{block} +\end{frame} + +\begin{frame}[fragile]\frametitle{Markup} + + Annotate parts of text with additional information \pause + + \begin{block}{Text Markup} + \begin{verbatim} + <document>Some text <tag>some other text that + should be tagged</tag> even <tag2>more</tag2> + text...</document> + \end{verbatim} + \end{block} +\end{frame} + +\begin{frame}[fragile]\frametitle{XML vs. JSON} + \begin{block}{XML} + \begin{verbatim} + <document>Some text <tag>some other text that + should be tagged</tag> even <tag2>more</tag2> + text...</document> + \end{verbatim} + \end{block} \pause + \begin{block}{JSON} + \begin{verbatim} + ["Some text ", {"t": "tag", "s": "some other text + that should be tagged"}, "even", {"t": "tag2", + "s": "more"}, "text..."] + \end{verbatim} + \end{block} +\end{frame} + +\section{Dealing with XML} +\begin{frame}\frametitle{Dealing with XML} + \Huge Dealing with XML +\end{frame} + +\subsection{Programming interfaces} +\begin{frame}\frametitle{Programming interfaces} + \begin{itemize} + \item Stream-oriented (SAX, Stax) + \item Tree traversal (DOM) + \item XML Data binding + \item Transformation languages (XSLT, XQuery) \pause + \item Other? + \end{itemize} +\end{frame} + +\subsection{Benchmark} +\begin{frame}\frametitle{Benchmark} + + \begin{itemize} + \item ezxml + \item Golang encoding/xml + \item mxml ('Mini-XML', not 'Minimal XML') + \item Python 2 ElementTree + \item sxmlc + \item yxml + \end{itemize} + + All code available at: + + git://git.sillymon.ch/slcon3.git + +\end{frame} + +\begin{frame}\frametitle{Benchmark setup} + Linux machine, i7 CPU, 8GB RAM + \begin{itemize} + \item 627MB of XML in 10'000 files from PubMed Central + \item Printing the article titles + \item 20 runs (after cache warming) + \item Single-threaded (except for Go) + \end{itemize} +\end{frame} + +\begin{frame}[fragile]\frametitle{ezxml} + \begin{block}{ezxml} + \begin{itemize} + \item Program: 21 lines + \item Library: 623 lines + \item URL: http://ezxml.sourceforge.net/ + \item Type: DOM (Level 3; XPath) + \end{itemize} + \end{block} + \begin{verbatim} + ezxml_t title = ezxml_get(ezdoc, "front", 0,\ + "article-meta", 0, "title-group", 0,\ + "article-title", -1); + \end{verbatim} +\end{frame} + +\begin{frame}[fragile]\frametitle{Go encoding/xml} + \begin{block}{Go encoding/xml} + \begin{itemize} + \item Program: 30 lines + \item Library: 6235 lines (stdlib) + \item URL: https://golang.org/pkg/encoding/xml/ + \item Type: XML Data binding + \end{itemize} + \end{block} + \begin{verbatim} + type article struct { + Title string `xml:"front>article-meta>title-group>\ + article-title"` + } + \end{verbatim} +\end{frame} + +\begin{frame}[fragile]\frametitle{mxml} + \begin{block}{mxml} + \begin{itemize} + \item Program: 38 lines + \item Library: 9633 lines + \item URL: http://www.minixml.org/ + \item Type: DOM (Level 3; Xpath)? + \end{itemize} + \end{block} + \begin{verbatim} + node = mxmlFindElement(root, root, "title-group", + NULL, NULL, MXML_DESCEND); + \end{verbatim} +\end{frame} + +\begin{frame}[fragile]\frametitle{Python 2 ElementTree} + \begin{block}{Python 2 ElementTree} + \begin{itemize} + \item Program: 12 lines + \item Library: 1107 lines (stdlib) + \item URL: + + https://docs.python.org/2/library/xml.etree.element\-tree.html + + \item Type: DOM (Level 3; Xpath)? + \end{itemize} + \end{block} + \begin{verbatim} + tg = r.findall("./front/article-meta/title-group") + at = tg[0].find("article-title") + \end{verbatim} +\end{frame} + +\begin{frame}[fragile]\frametitle{sxmlc} + \begin{block}{sxmlc} + \begin{itemize} + \item Program: 59 lines + \item Library: 2690 lines + \item URL: http://sxmlc.sourceforge.net/ + \item Type: DOM + \end{itemize} + \end{block} + \begin{verbatim} + const char *path[] = {"front", "article-meta",\ + "title-group", "article-title", NULL}; + for (int i = 0; path[i]; i++) { + next = find_child_node(next, path[i]); + if (!next) { + fprintf(stderr, "Could not find '%s' + tag.\n", path[i]); + return; + } + } + \end{verbatim} +\end{frame} + +\begin{frame}[fragile]\frametitle{yxml} + \begin{block}{yxml} + \begin{itemize} + \item Program: 103 lines + \item Library: 1039 lines + \item URL: https://dev.yorhel.nl/yxml + \item Type: Stream-oriented + \end{itemize} + \end{block} + \begin{verbatim} + case YXML_ELEMSTART: + if (!strcmp(state->elem, "title-group")) { + intitlegroup = 1; + } else if (!strcmp(state->elem, "article-title") + && intitlegroup) { + printf("%s: ", state->elem); + inarticletitle = 1; + } + break; + \end{verbatim} +\end{frame} + + +\begin{frame}\frametitle{Time \& Size} + + \begin{tabular}{ l | c | c | c | c | c } + & mean & $\sigma$ & min & max & size \\ \hline + Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\ + Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\ + mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\ + sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\ + ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\ + yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K + \end{tabular} + +\end{frame} + + +\section{Conclusion} +\begin{frame}[fragile]\frametitle{Conclusion} + \begin{itemize} + \item Complex specifications, (comparatively) hard to parse and verbose + \item Use the ezxml or yxml libraries \pause + \item Ok: + \begin{verbatim} + <document>Some text <tag>some other text that + should be tagged</tag> even <tag2>more</tag2> + text...</document> + \end{verbatim} \pause + \item No: + \begin{verbatim} + <thing><key>Key</key><value>Val</value></thing> + \end{verbatim} + \end{itemize} +\end{frame} + + +\begin{frame}\frametitle{Thanks for your attention} + + Questions? + +\end{frame} + +\end{document} |