\pdfminorversion=4 % This is needed for impressive to work with this file (only functions when using pdflatex it seems)
\documentclass{beamer}
% \usetheme{Frankfurt}
\usetheme{Gopher}
\usecolortheme{Gopher}
\usepackage{german}
\usepackage{qtree}
\usepackage{graphicx}
%\usepackage{covington}
\usepackage{ulem}
\begin{document}
\title{stasher - Conceptionally exploring logstash in Go}
\author{Silvan Jegen}
\date{\today}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{Contents} \tableofcontents
\end{frame}
\section{Logstash}
\subsection{What is it?}
\begin{frame}\frametitle{Uses}
Extensible Markup Language (XML) \pause
\begin{block}{XML aspects}
\begin{itemize}
\item Well-formedness
\item Validation
\item Namespaces
\item Entities
\end{itemize}
\end{block}
\end{frame}
\begin{frame}\frametitle{XML in theory}
\begin{block}{Related specifications}
\begin{itemize}
\item XSLT
\item XPath
\item XQuery
\item XML Encryption
\item ...
\end{itemize}
\end{block}
\end{frame}
\begin{frame}\frametitle{XML-based formats}
\begin{block}{Variants}
\begin{itemize}
\item RDF XML
\item XMPP
\item EPUB
\item XHTML
\item ...
\item 200+ more
\end{itemize}
\end{block}
\end{frame}
\subsection{XML in practice}
\begin{frame}\frametitle{XML in practice}
\Huge XML in practice
\end{frame}
\begin{frame}\frametitle{XML in practice}
\begin{block}{Enterprise usage}
\begin{itemize}
\item SOAP
\item Configuration
\item Data storage/exchange
\item Java ecosystem...
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[fragile]\frametitle{Markup}
Annotate parts of text with additional information \pause
\begin{block}{Text Markup}
\begin{verbatim}
Some text some other text that
should be tagged even more
text...
\end{verbatim}
\end{block}
\end{frame}
\begin{frame}[fragile]\frametitle{XML vs. JSON}
\begin{block}{XML}
\begin{verbatim}
Some text some other text that
should be tagged even more
text...
\end{verbatim}
\end{block} \pause
\begin{block}{JSON}
\begin{verbatim}
["Some text ", {"t": "tag", "s": "some other text
that should be tagged"}, "even", {"t": "tag2",
"s": "more"}, "text..."]
\end{verbatim}
\end{block}
\end{frame}
\section{Dealing with XML}
\begin{frame}\frametitle{Dealing with XML}
\Huge Dealing with XML
\end{frame}
\subsection{Programming interfaces}
\begin{frame}\frametitle{Programming interfaces}
\begin{itemize}
\item Stream-oriented (SAX, Stax)
\item Tree traversal (DOM)
\item XML Data binding
\item Transformation languages (XSLT, XQuery) \pause
\item Other?
\end{itemize}
\end{frame}
\subsection{Benchmark}
\begin{frame}\frametitle{Benchmark}
\begin{itemize}
\item ezxml
\item Golang encoding/xml
\item mxml ('Mini-XML', not 'Minimal XML')
\item Python 2 ElementTree
\item sxmlc
\item yxml
\end{itemize}
All code available at:
git://git.sillymon.ch/slcon3.git
\end{frame}
\begin{frame}\frametitle{Benchmark setup}
Linux machine, i7 CPU, 8GB RAM
\begin{itemize}
\item 627MB of XML in 10'000 files from PubMed Central
\item Printing the article titles
\item 20 runs (after cache warming)
\item Single-threaded (except for Go)
\end{itemize}
\end{frame}
\begin{frame}[fragile]\frametitle{ezxml}
\begin{block}{ezxml}
\begin{itemize}
\item Program: 21 lines
\item Library: 623 lines
\item URL: http://ezxml.sourceforge.net/
\item Type: DOM (Level 3; XPath)
\end{itemize}
\end{block}
\begin{verbatim}
ezxml_t title = ezxml_get(ezdoc, "front", 0,\
"article-meta", 0, "title-group", 0,\
"article-title", -1);
\end{verbatim}
\end{frame}
\begin{frame}[fragile]\frametitle{Go encoding/xml}
\begin{block}{Go encoding/xml}
\begin{itemize}
\item Program: 30 lines
\item Library: 6235 lines (stdlib)
\item URL: https://golang.org/pkg/encoding/xml/
\item Type: XML Data binding
\end{itemize}
\end{block}
\begin{verbatim}
type article struct {
Title string `xml:"front>article-meta>title-group>\
article-title"`
}
\end{verbatim}
\end{frame}
\begin{frame}[fragile]\frametitle{mxml}
\begin{block}{mxml}
\begin{itemize}
\item Program: 38 lines
\item Library: 9633 lines
\item URL: http://www.minixml.org/
\item Type: DOM (Level 3; Xpath)?
\end{itemize}
\end{block}
\begin{verbatim}
node = mxmlFindElement(root, root, "title-group",
NULL, NULL, MXML_DESCEND);
\end{verbatim}
\end{frame}
\begin{frame}[fragile]\frametitle{Python 2 ElementTree}
\begin{block}{Python 2 ElementTree}
\begin{itemize}
\item Program: 12 lines
\item Library: 1107 lines (stdlib)
\item URL:
https://docs.python.org/2/library/xml.etree.element\-tree.html
\item Type: DOM (Level 3; Xpath)?
\end{itemize}
\end{block}
\begin{verbatim}
tg = r.findall("./front/article-meta/title-group")
at = tg[0].find("article-title")
\end{verbatim}
\end{frame}
\begin{frame}[fragile]\frametitle{sxmlc}
\begin{block}{sxmlc}
\begin{itemize}
\item Program: 59 lines
\item Library: 2690 lines
\item URL: http://sxmlc.sourceforge.net/
\item Type: DOM
\end{itemize}
\end{block}
\begin{verbatim}
const char *path[] = {"front", "article-meta",\
"title-group", "article-title", NULL};
for (int i = 0; path[i]; i++) {
next = find_child_node(next, path[i]);
if (!next) {
fprintf(stderr, "Could not find '%s'
tag.\n", path[i]);
return;
}
}
\end{verbatim}
\end{frame}
\begin{frame}[fragile]\frametitle{yxml}
\begin{block}{yxml}
\begin{itemize}
\item Program: 103 lines
\item Library: 1039 lines
\item URL: https://dev.yorhel.nl/yxml
\item Type: Stream-oriented
\end{itemize}
\end{block}
\begin{verbatim}
case YXML_ELEMSTART:
if (!strcmp(state->elem, "title-group")) {
intitlegroup = 1;
} else if (!strcmp(state->elem, "article-title")
&& intitlegroup) {
printf("%s: ", state->elem);
inarticletitle = 1;
}
break;
\end{verbatim}
\end{frame}
\begin{frame}\frametitle{Time \& Size}
\begin{tabular}{ l | c | c | c | c | c }
& mean & $\sigma$ & min & max & size \\ \hline
Python ElementTree & 155.5 & 8.869 & 145.7 & 172.3 & N/A \\
Go encoding/xml & 48.34 & 4.982 & 35.33 & 52.92 & 2M \\
mxml & 23.96 & 1.841 & 22.32 & 27.64 & N/A \\
sxmlc & 15.51 & 0.259 & 15.12 & 16.01 & 41K \\
ezxml & 6.460 & 0.058 & 6.366 & 6.592 & 31K \\
yxml & 4.123 & 0.220 & 3.885 & 4.520 & 18K
\end{tabular}
\end{frame}
\section{Conclusion}
\begin{frame}[fragile]\frametitle{Conclusion}
\begin{itemize}
\item Complex specifications, (comparatively) hard to parse and verbose
\item Use the ezxml or yxml libraries \pause
\item Ok:
\begin{verbatim}
Some text some other text that
should be tagged even more
text...
\end{verbatim} \pause
\item No:
\begin{verbatim}
KeyVal
\end{verbatim}
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Thanks for your attention}
Questions?
\end{frame}
\end{document}