diff options
| author | Silvan Jegen <s.jegen@gmail.com> | 2017-01-16 21:04:42 +0100 | 
|---|---|---|
| committer | Silvan Jegen <s.jegen@gmail.com> | 2017-01-16 21:04:42 +0100 | 
| commit | cdc9dd8459b5e303fb9e979ffa34c5af932aa8df (patch) | |
| tree | f80c1ca0ec07ee6150d84764787ecbc8604b8755 | |
| parent | aa7a3501c67f435e7f34b19f2f3f87f910005a5a (diff) | |
Remove unneeded slide and work on the outline
| -rw-r--r-- | stasherpresent.tex | 293 | 
1 files changed, 51 insertions, 242 deletions
| diff --git a/stasherpresent.tex b/stasherpresent.tex index d74d4e5..3536572 100644 --- a/stasherpresent.tex +++ b/stasherpresent.tex @@ -12,7 +12,7 @@  \begin{document} -\title{stasher - Conceptionally exploring logstash in Go} +\title{stasher - Conceptionally exploring Logstash in Go}  \author{Silvan Jegen}   \date{\today} @@ -27,281 +27,90 @@  \section{Logstash}  \subsection{What is it?} - -\begin{frame}\frametitle{Uses} -	Extensible Markup Language (XML) \pause - -	\begin{block}{XML aspects} -		\begin{itemize} -			\item Well-formedness -			\item Validation -			\item Namespaces -			\item Entities -		\end{itemize} -	\end{block} +\begin{frame}\frametitle{Logstash} +	\begin{columns}[T] +		\begin{column}{.5\textwidth} +			\begin{center} +				\Huge Logstash +			\end{center} +		\end{column} +		\begin{column}{.5\textwidth} +			 \includegraphics[width=\textwidth]{icon-logstash-bb.png} +		\end{column} +	\end{columns}  \end{frame} -\begin{frame}\frametitle{XML in theory} -	\begin{block}{Related specifications} -		\begin{itemize} -			\item XSLT -			\item XPath -			\item XQuery -			\item XML Encryption -			\item ... -		\end{itemize} -	\end{block} +\begin{frame}\frametitle{What is it?} + \includegraphics[width=0.5\textwidth]{logstash-img1.png}  \end{frame} -\begin{frame}\frametitle{XML-based formats} -	\begin{block}{Variants} +\begin{frame}\frametitle{What is it?} +	\begin{block}{Centralize, Transform \& Stash}  		\begin{itemize} -				\item RDF XML -				\item XMPP -				\item EPUB -				\item XHTML -				\item ... -				\item 200+ more +			\item Input +			\item Filter +			\item Outputs  		\end{itemize}  	\end{block}  \end{frame} -\subsection{XML in practice} -\begin{frame}\frametitle{XML in practice} -	\Huge XML in practice +\begin{frame}\frametitle{Examples} +	\begin{itemize} +		\item Input: +		\item Filter: +		\item Outputs: +	\end{itemize}  \end{frame} +\subsection{How does it work?}  \begin{frame}\frametitle{XML in practice} -	\begin{block}{Enterprise usage} -		\begin{itemize} -			\item SOAP -			\item Configuration -			\item Data storage/exchange -			\item Java ecosystem... -		\end{itemize} -	\end{block} -\end{frame} - -\begin{frame}[fragile]\frametitle{Markup} - -	Annotate parts of text with additional information \pause - -	\begin{block}{Text Markup} -		\begin{verbatim} -				<document>Some text <tag>some other text that -				should be tagged</tag> even <tag2>more</tag2> -				text...</document> -		\end{verbatim} -	\end{block} +	\Huge XML in practice  \end{frame} -\begin{frame}[fragile]\frametitle{XML vs. JSON} -	\begin{block}{XML} +\begin{frame}[fragile]\frametitle{Configuration} +	\begin{block}{Custom configuration language?}  		\begin{verbatim} -				<document>Some text <tag>some other text that -				should be tagged</tag> even <tag2>more</tag2> -				text...</document> -		\end{verbatim} -	\end{block}  \pause -	\begin{block}{JSON} -		\begin{verbatim} -			["Some text ", {"t": "tag", "s": "some other text -			that should be tagged"}, "even", {"t": "tag2", -			"s": "more"}, "text..."] +			input { stdin { } } +			output { +			  elasticsearch { hosts => ["localhost:9200"] } +			  stdout { codec => rubydebug } +			}  		\end{verbatim}  	\end{block}  \end{frame} -\section{Dealing with XML} -\begin{frame}\frametitle{Dealing with XML} -	\Huge Dealing with XML +\section{stasher} +\begin{frame}\frametitle{stasher} +	\Huge stasher  \end{frame} -\subsection{Programming interfaces} -\begin{frame}\frametitle{Programming interfaces} +\subsection{Why?} +\begin{frame}\frametitle{Why?}  	\begin{itemize} -		\item Stream-oriented (SAX, Stax) -		\item Tree traversal (DOM) -		\item XML Data binding -		\item Transformation languages (XSLT, XQuery) \pause -		\item Other? +		\item Apparently Logstash is very slow \pause +		\item I like Go +		\item Generality \pause +		\item Or not?  	\end{itemize}  \end{frame} -\subsection{Benchmark} +\subsection{Implementation}  \begin{frame}\frametitle{Benchmark} - -	\begin{itemize} -		\item ezxml -		\item Golang encoding/xml -		\item mxml  ('Mini-XML', not 'Minimal XML') -		\item Python 2 ElementTree -		\item sxmlc -		\item yxml -	\end{itemize} - -	All code available at: - -	git://git.sillymon.ch/slcon3.git - -\end{frame} - -\begin{frame}\frametitle{Benchmark setup} -	Linux machine, i7 CPU, 8GB RAM  	\begin{itemize} -		\item 627MB of XML in 10'000 files from PubMed Central -		\item Printing the article titles -		\item 20 runs (after cache warming) -		\item Single-threaded (except for Go) +		\item Plugins? +		\item Generality +		\item DSL vs. Programming language balance?  	\end{itemize}  \end{frame} -\begin{frame}[fragile]\frametitle{ezxml} -	\begin{block}{ezxml} -		\begin{itemize} -			\item Program: 21 lines -			\item Library: 623 lines -			\item URL: http://ezxml.sourceforge.net/ -			\item Type: DOM (Level 3; XPath) -		\end{itemize} -	\end{block} -	\begin{verbatim} -		ezxml_t title = ezxml_get(ezdoc, "front", 0,\ -		  "article-meta", 0, "title-group", 0,\ -		  "article-title", -1); -	\end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{Go encoding/xml} -	\begin{block}{Go encoding/xml} -		\begin{itemize} -			\item Program: 30 lines -			\item Library: 6235 lines (stdlib) -			\item URL: https://golang.org/pkg/encoding/xml/ -			\item Type: XML Data binding -		\end{itemize} -	\end{block} -	\begin{verbatim} -		type article struct { -		  Title string `xml:"front>article-meta>title-group>\ -		                article-title"` -		} -	\end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{mxml} -	\begin{block}{mxml} -		\begin{itemize} -			\item Program: 38 lines -			\item Library: 9633 lines -			\item URL: http://www.minixml.org/ -			\item Type: DOM (Level 3; Xpath)? -		\end{itemize} -	\end{block} -	\begin{verbatim} -		node = mxmlFindElement(root, root, "title-group", -		 NULL, NULL, MXML_DESCEND); -	\end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{Python 2 ElementTree} -	\begin{block}{Python 2 ElementTree} -		\begin{itemize} -			\item Program: 12 lines -			\item Library: 1107 lines (stdlib) -			\item URL: - -				    https://docs.python.org/2/library/xml.etree.element\-tree.html - -			\item Type: DOM (Level 3; Xpath)? -		\end{itemize} -	\end{block} -	\begin{verbatim} -		tg = r.findall("./front/article-meta/title-group") -		at = tg[0].find("article-title") -	\end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{sxmlc} -	\begin{block}{sxmlc} -		\begin{itemize} -			\item Program: 59 lines -			\item Library: 2690 lines -			\item URL: http://sxmlc.sourceforge.net/ -			\item Type: DOM -		\end{itemize} -	\end{block} -	\begin{verbatim} -		const char *path[] = {"front", "article-meta",\ -		           "title-group", "article-title", NULL}; -		for (int i = 0; path[i]; i++) { -		  next = find_child_node(next, path[i]); -		  if (!next) { -		    fprintf(stderr, "Could not find '%s' -				                 tag.\n", path[i]); -		    return; -		  } -		} -	\end{verbatim} -\end{frame} - -\begin{frame}[fragile]\frametitle{yxml} -	\begin{block}{yxml} -		\begin{itemize} -			\item Program: 103 lines -			\item Library: 1039 lines -			\item URL: https://dev.yorhel.nl/yxml -			\item Type: Stream-oriented -		\end{itemize} -	\end{block} -	\begin{verbatim} -		case YXML_ELEMSTART: -		 if (!strcmp(state->elem, "title-group")) { -		    intitlegroup = 1; -		 } else if (!strcmp(state->elem, "article-title") -		            && intitlegroup) { -		   printf("%s: ", state->elem); -		   inarticletitle = 1; -		 } -		 break; -	\end{verbatim} -\end{frame} - - -\begin{frame}\frametitle{Time \& Size} - -	\begin{tabular}{ l | c | c | c | c | c } -						            &      mean  &      $\sigma$  &      min  &      max &     size   \\     \hline -		Python ElementTree  &     155.5  &         8.869  &    145.7  &    172.3 &    N/A   \\ -		Go encoding/xml     &     48.34  &         4.982  &    35.33  &    52.92 &    2M   \\ -		mxml                &     23.96  &         1.841  &    22.32  &    27.64 &    N/A   \\ -		sxmlc               &     15.51  &         0.259  &    15.12  &    16.01 &    41K    \\ -		ezxml               &     6.460  &         0.058  &    6.366  &    6.592 &    31K   \\ -		yxml                &     4.123  &         0.220  &    3.885  &    4.520 &    18K -	\end{tabular} - -\end{frame} - - -\section{Conclusion} -\begin{frame}[fragile]\frametitle{Conclusion} +\section{Considerations?} +\begin{frame}\frametitle{Considerations}  	\begin{itemize} -		\item Complex specifications, (comparatively) hard to parse and verbose -		\item Use the ezxml or yxml libraries \pause -		\item Ok: -					\begin{verbatim} -						<document>Some text <tag>some other text that -						should be tagged</tag> even <tag2>more</tag2> -						text...</document> -					\end{verbatim} \pause -		\item No: -					\begin{verbatim} -						<thing><key>Key</key><value>Val</value></thing> -					\end{verbatim} +		\item Plugins? +		\item  	\end{itemize}  \end{frame} -  \begin{frame}\frametitle{Thanks for your attention}  	Questions? | 
