owfd: rtsp: add tokenizer

The tokenizer does what the name says: Split a line into tokens. This will be useful for more advanced header-lines where we need to split them up so we can parse them. Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
author: David Herrmann <dh.herrmann@gmail.com> 2013-10-31 15:37:45 +0100
committer: David Herrmann <dh.herrmann@gmail.com> 2013-10-31 15:37:45 +0100
commit: a2fb4bb24bc986988372e7a122660273578819b4 (patch)
tree: 56e9527f37be6c425dbd7f60ed5ea066a7a635ac
parent: 935d4a59bd44068c60f471f5cb336fc17f93b6dd (diff)
4 files changed, 226 insertions, 1 deletions
diff --git a/Makefile.am b/Makefile.am
index 1f52992..af69526 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -91,6 +91,7 @@ libowfd_la_SOURCES = \
 	src/rtsp.h \
 	src/rtsp_ctrl.c \
 	src/rtsp_decoder.c \
+	src/rtsp_tokenizer.c \
 	src/shared.h \
 	src/shared.c \
 	src/wpa_ctrl.h \
diff --git a/src/rtsp.h b/src/rtsp.h
index 798b827..1ffcdc3 100644
--- a/src/rtsp.h
+++ b/src/rtsp.h
@@ -102,6 +102,10 @@ void owfd_rtsp_decoder_flush(struct owfd_rtsp_decoder *dec);
 int owfd_rtsp_decoder_feed(struct owfd_rtsp_decoder *dec,
 			   const char *buf, size_t len);
 
+/* rtsp tokenizer */
+
+ssize_t owfd_rtsp_tokenize(const char *line, char **out);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/rtsp_tokenizer.c b/src/rtsp_tokenizer.c
new file mode 100644
index 0000000..1f3b6a1
--- /dev/null
+++ b/src/rtsp_tokenizer.c
@@ -0,0 +1,176 @@
+/*
+ * OpenWFD - Open-Source Wifi-Display Implementation
+ *
+ * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include "shared.h"
+#include "rtsp.h"
+
+ssize_t owfd_rtsp_tokenize(const char *line, char **out)
+{
+	char *t, *dst, c, prev, last_c;
+	const char *src;
+	size_t l, num, len;
+	bool quoted, escaped;
+
+	/* we need at most twice as much space for all the terminating 0s */
+	len = strlen(line);
+	l = len * 2 + 1;
+	if (l <= len)
+		return -ENOMEM;
+
+	t = malloc(l);
+	if (!t)
+		return -ENOMEM;
+
+	/* set t[0]=0 in case strlen(line)==0 */
+	*t = 0;
+
+	num = 0;
+	src = line;
+	dst = t;
+	quoted = 0;
+	escaped = 0;
+	prev = 0;
+	last_c = 0;
+
+	for ( ; *src; ++src) {
+		c = *src;
+		prev = last_c;
+		last_c = c;
+
+		if (quoted) {
+			if (escaped) {
+				if (c == '\\') {
+					*dst++ = '\\';
+				} else if (c == '"') {
+					*dst++ = '"';
+				} else if (c == 'n') {
+					*dst++ = '\n';
+				} else if (c == 'r') {
+					*dst++ = '\r';
+				} else if (c == 't') {
+					*dst++ = '\t';
+				} else if (c == 'a') {
+					*dst++ = '\a';
+				} else if (c == 'f') {
+					*dst++ = '\f';
+				} else if (c == 'v') {
+					*dst++ = '\v';
+				} else if (c == 'b') {
+					*dst++ = '\b';
+				} else if (c == 'e') {
+					*dst++ = 0x1b;	/* ESC */
+				} else if (c == '0' || c == 0) {
+					/* drop binary zero escape "\0" */
+					--dst;
+				} else {
+					*dst++ = c;
+				}
+
+				escaped = 0;
+			} else {
+				if (c == '"') {
+					*dst++ = 0;
+					++num;
+					quoted = 0;
+					last_c = 0;
+				} else if (c == '\\') {
+					escaped = 1;
+				} else if (c == 0) {
+					/* skip */
+				} else {
+					*dst++ = c;
+				}
+			}
+		} else {
+			if (c == '"') {
+				if (prev) {
+					*dst++ = 0;
+					++num;
+				}
+
+				quoted = 1;
+				escaped = 0;
+				last_c = 0;
+			} else if (c == 0) {
+				/* skip */
+			} else if (c == ' ') {
+				if (prev) {
+					*dst++ = 0;
+					++num;
+				}
+				last_c = 0;
+			} else if (c == '(' ||
+				   c == ')' ||
+				   c == '[' ||
+				   c == ']' ||
+				   c == '{' ||
+				   c == '}' ||
+				   c == '<' ||
+				   c == '>' ||
+				   c == '@' ||
+				   c == ',' ||
+				   c == ';' ||
+				   c == ':' ||
+				   c == '\\' ||
+				   c == '/' ||
+				   c == '?' ||
+				   c == '=') {
+				if (prev) {
+					*dst++ = 0;
+					++num;
+				}
+
+				*dst++ = c;
+				*dst++ = 0;
+				++num;
+				last_c = 0;
+			} else if (c <= 31 || c == 127) {
+				/* ignore CTLs */
+				if (prev) {
+					*dst++ = 0;
+					++num;
+				}
+				last_c = 0;
+			} else {
+				*dst++ = c;
+			}
+		}
+	}
+
+	prev = last_c;
+
+	if (prev) {
+		*dst++ = 0;
+		++num;
+	}
+
+	*out = t;
+	return num;
+}
diff --git a/test/test_rtsp.c b/test/test_rtsp.c
index bc23037..82d5346 100644
--- a/test/test_rtsp.c
+++ b/test/test_rtsp.c
@@ -128,7 +128,7 @@ static void test_rtsp_decoder_event(struct owfd_rtsp_decoder *dec,
 	}
 
 	/* print message */
-	if (1) {
+	if (0) {
 		fprintf(stderr, "Message:\n");
 		fprintf(stderr, "  header_num: %zu:\n", msg->header_num);
 		for (i = 0; i < msg->header_num; ++i)
@@ -241,8 +241,52 @@ START_TEST(test_rtsp_decoder)
 }
 END_TEST
 
+static void tokenize(const char *line, const char *expect, size_t len,
+		     size_t num)
+{
+	char *t, *s;
+	ssize_t l, i;
+
+	ck_assert(len > 0);
+
+	l = owfd_rtsp_tokenize(line, &t);
+	ck_assert(l >= 0);
+
+	if (0) {
+		fprintf(stderr, "TOKENIZER (%lu):\n", (unsigned long)l);
+		s = t;
+		for (i = 0; i < l; ++i) {
+			fprintf(stderr, "  TOKEN: %s\n", s);
+			s += strlen(s) + 1;
+		}
+	}
+
+	ck_assert(l == (ssize_t)num);
+	ck_assert(!memcmp(t, expect, len));
+	free(t);
+}
+
+#define TOKENIZE(_line, _exp, _num) \
+	tokenize((_line), (_exp), sizeof(_exp), _num)
+
+START_TEST(test_rtsp_tokenizer)
+{
+	TOKENIZE("", "", 0);
+	TOKENIZE("asdf", "asdf", 1);
+	TOKENIZE("asdf\"\"asdf", "asdf\0\0asdf", 3);
+	TOKENIZE("asdf\"asdf\"asdf", "asdf\0asdf\0asdf", 3);
+	TOKENIZE("\"asdf\"", "asdf", 1);
+	TOKENIZE("\"\\n\\\\\\r\"", "\n\\\r", 1);
+	TOKENIZE("\"\\\"\"", "\"", 1);
+	TOKENIZE("\"\\0\\\0\"", "", 1);
+	TOKENIZE("content-length:   100", "content-length\0:\0""100", 3);
+	TOKENIZE("content-args: (50+10)", "content-args\0:\0(\0""50+10\0)", 5);
+}
+END_TEST
+
 TEST_DEFINE_CASE(decoder)
 	TEST(test_rtsp_decoder)
+	TEST(test_rtsp_tokenizer)
 TEST_END_CASE
 
 TEST_DEFINE(
author	David Herrmann <dh.herrmann@gmail.com>	2013-10-31 15:37:45 +0100
committer	David Herrmann <dh.herrmann@gmail.com>	2013-10-31 15:37:45 +0100
commit	a2fb4bb24bc986988372e7a122660273578819b4 (patch)
tree	56e9527f37be6c425dbd7f60ed5ea066a7a635ac
parent	935d4a59bd44068c60f471f5cb336fc17f93b6dd (diff)