summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsandmann <sandmann>2004-09-01 11:59:18 +0000
committersandmann <sandmann>2004-09-01 11:59:18 +0000
commitab46c861ef2dc507abdfeb50391ea3d9cd5d1179 (patch)
tree683c679981d99b764ecdcf0c4057af8763a7dade
parentbf426687bbded794a682f660c9754796282c0566 (diff)
*** empty log message ***
-rwxr-xr-xTODO189
-rw-r--r--src/lac.h14
-rw-r--r--src/lachttp.c1
-rw-r--r--src/lacuri.c2
-rw-r--r--tests/http-test.c2
-rw-r--r--tests/lacwget.c38
-rw-r--r--tests/uri-test.c66
7 files changed, 160 insertions, 152 deletions
diff --git a/TODO b/TODO
index 97d36c2..77ce31a 100755
--- a/TODO
+++ b/TODO
@@ -8,27 +8,10 @@ correctness:
Problem: Other threads might not want the signal
ignored. This race condition may be acceptable
- * Fix problem where we go into infinite loop if the server
- closes the connection before we send anything. (showstopper
- for 1.0)
-
- - www.skolekom.dk used to be like that (problem is in
- transport_handle_close)
-
* thread-safeness (big lock?) (showstopper for 1.0). Note:
lac_activity_cancel() from another thread. This could get
hairy :-/
- * consider
-
- activity = lac_begin_activity (timeout);
- ...;
- lac_activity_wait(activity)/lac_activity_cancel();
-
- lac_end_activity(activity);
-
- within "..." you can call lac_activity_cancel();
-
* relative URI's
- showstopper for 1.0
@@ -50,10 +33,6 @@ correctness:
- Accept it based on "Ok".
misc:
- * never emit events from a constructor. apps will get confused
- if they get an event before the constructor returns. add an idle
- handler instead. [May already be done, need to check]
-
* need a real regression test suite
* get rid of CacheRecord->type
* Cleanup of the DNS module
@@ -88,9 +67,6 @@ performance:
server if we can.
consistency:
- * be consistent about "str" vs. "string"
- - what does glib do?
-
* generally:
- foo_get_bar() means "whatever is returned is owned by
@@ -204,81 +180,6 @@ features:
about different ways of structuring i/o
* cookie support (see RFC 2109)
- * LacCowString (?)
- - do a simple copy-on-write string on contiguous memory
-
- pro: simple, easy to get c-string
- con: insert in the middle of it is inefficient
-
- - how much data in an xml/html document is markup and
- how much is character data?
- - we risk doing a lot if copying during parsing of xml
- if eg a begin tag spans more than one chunk of data
- - even if we get the cow string into glib and get
- gdkpixbuf to use it, images may need to a lot of
- uncompressing anyway so that all of the data will be
- copied anyway.
- - actually, memcpy() is probably not really a bottleneck
- if do aggressive caching and reuse images whenever
- possible.
- - since we have "const char *" and a dummy reserved pointer
- in events, we can do this later without breaking anything.
-
- possible API:
-
- /*
- * Copy-on-write string
- */
- typedef struct _LacCowString LacCowString;
- LacCowString *lac_cow_string_new (const gchar *init,
- gsize len);
- LacCowString *lac_cow_string_copy (const LacCowString *cow_string);
- void lac_cow_string_free (LacCowString *cow_string);
- gboolean lac_cow_string_equal (const LacCowString *cow_string1,
- const LacCowString *cow_string2);
- guint lac_cow_string_hash (const LacCowString *cow_string);
- gint lac_cow_string_compare (const LacCowString *cow_string1,
- const LacCowString *cow_string2);
- LacCowString *lac_cow_string_truncate (LacCowString *cow_string,
- gsize size);
- LacCowString *lac_cow_string_prepend (LacCowString *cow_string,
- const gchar *val,
- gsize len);
- LacCowString *lac_cow_string_insert (LacCowString *cow_string,
- gsize pos,
- const gchar *val,
- gsize len);
- LacCowString *lac_cow_string_append (LacCowString *cow_string,
- const gchar *val,
- gsize len);
- LacCowString *lac_cow_string_erase (LacCowString *cow_string,
- gsize pos,
- gsize len);
- LacCowString *lac_cow_string_concatenate (LacCowString *cow_string1, ...);
-
- * add a timeout implementation?
-
- - better scalablity
- - perhaps more convenient
- - initial implementation can ve very simple in terms
- of g_timeout_add()
-
- API:
-
- /*
- * Timeout
- */
- typedef struct _LacTimeout LacTimeout;
-
- LacTimeout *lac_timeout_new (guint interval,
- GSourceFunc callback,
- gpointer data);
- void lac_timeout_set_interval (LacTimeout *timeout,
- guint interval);
- guint lac_timeout_get_interval (LacTimeout *timeout);
- void lac_timeout_disable (LacTimeout *timeout);
- void lac_timeout_enable (LacTimeout *timeout);
- void lac_timeout_destroy (LacTimeout *timeout);
activity:
Use cases:
@@ -473,6 +374,9 @@ Already done:
==============
misc:
+ * never emit events from a constructor. apps will get confused
+ if they get an event before the constructor returns. add an idle
+ handler instead. [May already be done, need to check]
* move a lot of #includes from lac.h to lacprivate.h
* rename lacdns-manager.[ch] -> lacdns-query.[ch].
* format functions correctly
@@ -502,6 +406,13 @@ misc:
they can't be added later.
correctness:
+ * Fix problem where we go into infinite loop if the server
+ closes the connection before we send anything. (showstopper
+ for 1.0)
+
+ - www.skolekom.dk used to be like that (problem is in
+ transport_handle_close)
+ [I think this is fixed, not completely sure]
* We get 'broken server' too often
* adobe.com is not completely fixed
@@ -651,6 +562,82 @@ correctness:
performance:
+ * add a timeout implementation? [nah, fix the one in glib instead]
+
+ - better scalablity
+ - perhaps more convenient
+ - initial implementation can ve very simple in terms
+ of g_timeout_add()
+
+ API:
+
+ /*
+ * Timeout
+ */
+ typedef struct _LacTimeout LacTimeout;
+
+ LacTimeout *lac_timeout_new (guint interval,
+ GSourceFunc callback,
+ gpointer data);
+ void lac_timeout_set_interval (LacTimeout *timeout,
+ guint interval);
+ guint lac_timeout_get_interval (LacTimeout *timeout);
+ void lac_timeout_disable (LacTimeout *timeout);
+ void lac_timeout_enable (LacTimeout *timeout);
+ void lac_timeout_destroy (LacTimeout *timeout);
+
+ * LacCowString ([not worth it])
+ - do a simple copy-on-write string on contiguous memory
+
+ pro: simple, easy to get c-string
+ con: insert in the middle of it is inefficient
+
+ - how much data in an xml/html document is markup and
+ how much is character data?
+ - we risk doing a lot if copying during parsing of xml
+ if eg a begin tag spans more than one chunk of data
+ - even if we get the cow string into glib and get
+ gdkpixbuf to use it, images may need to a lot of
+ uncompressing anyway so that all of the data will be
+ copied anyway.
+ - actually, memcpy() is probably not really a bottleneck
+ if do aggressive caching and reuse images whenever
+ possible.
+ - since we have "const char *" and a dummy reserved pointer
+ in events, we can do this later without breaking anything.
+
+ possible API:
+
+ /*
+ * Copy-on-write string
+ */
+ typedef struct _LacCowString LacCowString;
+ LacCowString *lac_cow_string_new (const gchar *init,
+ gsize len);
+ LacCowString *lac_cow_string_copy (const LacCowString *cow_string);
+ void lac_cow_string_free (LacCowString *cow_string);
+ gboolean lac_cow_string_equal (const LacCowString *cow_string1,
+ const LacCowString *cow_string2);
+ guint lac_cow_string_hash (const LacCowString *cow_string);
+ gint lac_cow_string_compare (const LacCowString *cow_string1,
+ const LacCowString *cow_string2);
+ LacCowString *lac_cow_string_truncate (LacCowString *cow_string,
+ gsize size);
+ LacCowString *lac_cow_string_prepend (LacCowString *cow_string,
+ const gchar *val,
+ gsize len);
+ LacCowString *lac_cow_string_insert (LacCowString *cow_string,
+ gsize pos,
+ const gchar *val,
+ gsize len);
+ LacCowString *lac_cow_string_append (LacCowString *cow_string,
+ const gchar *val,
+ gsize len);
+ LacCowString *lac_cow_string_erase (LacCowString *cow_string,
+ gsize pos,
+ gsize len);
+ LacCowString *lac_cow_string_concatenate (LacCowString *cow_string1, ...);
+
* limit number of outstanding dns queries? Yes.
* sockets are almost always writable, so in general we should avoid
poll()ing just to write. Instead, just send(), and only call
@@ -670,6 +657,10 @@ performance:
call the callback several times.
consistency:
+ * be consistent about "str" vs. "string"
+ - what does glib do? (a little of both)
+ I'll go with string
+
* be consistent about whether len comes before data or vice versa
- data comes first followed by length. This is consistent
with unix, glib and everything else
diff --git a/src/lac.h b/src/lac.h
index cdef1e0..fa7f67c 100644
--- a/src/lac.h
+++ b/src/lac.h
@@ -315,13 +315,13 @@ struct _LacUri {
guint checksum; /* used internally */
};
-LacUri * lac_uri_new_from_str (const LacUri *base, const gchar *str);
-LacUri * lac_uri_copy (const LacUri *uri);
-gchar * lac_uri_string (const LacUri *uri);
-void lac_uri_free (LacUri *uri);
-gboolean lac_uri_equal (const LacUri *uri1,
- const LacUri *uri2);
-
+LacUri * lac_uri_new_from_string (const LacUri *base,
+ const gchar *str);
+LacUri * lac_uri_copy (const LacUri *uri);
+gchar * lac_uri_string (const LacUri *uri);
+void lac_uri_free (LacUri *uri);
+gboolean lac_uri_equal (const LacUri *uri1,
+ const LacUri *uri2);
/*
* HTTP
diff --git a/src/lachttp.c b/src/lachttp.c
index 7265f71..3e199d5 100644
--- a/src/lachttp.c
+++ b/src/lachttp.c
@@ -2356,6 +2356,7 @@ http_transport_handle_close (HttpTransport *transport)
transport->host, transport, 0);
}
+ http_host_transport_will_close_notify (transport->host, transport);
http_transport_remove_answer_timeout (transport);
http_transport_return_outstanding_requests (transport);
http_host_transport_closed_notify (transport->host, transport);
diff --git a/src/lacuri.c b/src/lacuri.c
index 5931b70..bf1b7c1 100644
--- a/src/lacuri.c
+++ b/src/lacuri.c
@@ -800,7 +800,7 @@ step_7:
#endif
LacUri *
-lac_uri_new_from_str (const LacUri *base, const gchar *str)
+lac_uri_new_from_string (const LacUri *base, const gchar *str)
{
LacUri *result;
gchar *base_str = NULL;
diff --git a/tests/http-test.c b/tests/http-test.c
index 240d563..884a61d 100644
--- a/tests/http-test.c
+++ b/tests/http-test.c
@@ -141,7 +141,7 @@ main (int argc, char *argv[])
{
LacHttpRequest *request;
- LacUri *uri = lac_uri_new_from_str (NULL, argv[i]);
+ LacUri *uri = lac_uri_new_from_string (NULL, argv[i]);
if (!uri)
{
g_print ("bad uri: %s\n", argv[i]);
diff --git a/tests/lacwget.c b/tests/lacwget.c
index c7245ef..bf38506 100644
--- a/tests/lacwget.c
+++ b/tests/lacwget.c
@@ -72,6 +72,7 @@ typedef struct PageInfo {
gdouble parser_eof;
gboolean is_redirect;
gboolean try_parse;
+ FILE *file;
} PageInfo;
static GPtrArray *outstanding_page_infos;
@@ -295,7 +296,7 @@ handle_base_element (PageInfo *page_info, const HtmlParserBeginElementEvent *eve
if (uri)
{
- LacUri *new_base = lac_uri_new_from_str (page_info->base_uri, uri);
+ LacUri *new_base = lac_uri_new_from_string (page_info->base_uri, uri);
if (new_base)
{
printf ("new base: %s\n", lac_uri_string (new_base));
@@ -500,7 +501,6 @@ http_callback (LacHttpRequest *request, const LacHttpEvent *event)
break;
case LAC_HTTP_EVENT_CONTENT:
- g_print ("%s: content chunk (size: %d)\n", page_info->main_uri, event->content.length);
#if 0
{
int i;
@@ -508,16 +508,21 @@ http_callback (LacHttpRequest *request, const LacHttpEvent *event)
printf ("%c", event->content.data[i]);
}
#endif
- g_string_append_len (page_info->unparsed, event->content.data, event->content.length);
- if (page_info->first_content == -1.0)
- page_info->first_content = g_timer_elapsed (page_info->timer, NULL);
-
- queue_parse (page_info);
#if 0
- parse_idle (page_info);
+ g_print ("%s: content chunk (size: %d)\n", page_info->main_uri, event->content.length);
+#endif
+ if (page_info->file)
+ fwrite (event->content.data, 1, event->content.length, page_info->file);
+ g_string_append_len (page_info->unparsed, event->content.data, event->content.length);
+ if (page_info->first_content == -1.0)
+ page_info->first_content = g_timer_elapsed (page_info->timer, NULL);
+
+ queue_parse (page_info);
+#if 0
+ parse_idle (page_info);
#endif
- page_info->n_content_chunks++;
- break;
+ page_info->n_content_chunks++;
+ break;
case LAC_HTTP_EVENT_END_CONTENT:
#if 0
@@ -566,6 +571,15 @@ already_downloaded (const LacUri *uri)
return FALSE;
}
+static char *
+create_name (const char *uri_str)
+{
+ char *last_slash = strrchr (uri_str, '/');
+
+ if (last_slash)
+ return g_strdup (last_slash + 1);
+}
+
static gboolean
do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse)
{
@@ -575,6 +589,8 @@ do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse)
page_info->timer = g_timer_new ();
page_info->try_parse = try_parse;
+ page_info->file = fopen (create_name (uri_str), "w");
+
#if 0
printf ("uri: %s\n", uri_str);
#endif
@@ -582,7 +598,7 @@ do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse)
if (page_info->try_parse)
g_print ("DOWNLAOD TRUE %s ********\n", uri_str);
- uri = lac_uri_new_from_str (base_uri, uri_str);
+ uri = lac_uri_new_from_string (base_uri, uri_str);
if (!uri)
{
printf ("bad uri: %s\n", uri_str);
diff --git a/tests/uri-test.c b/tests/uri-test.c
index 4285013..55fd3a3 100644
--- a/tests/uri-test.c
+++ b/tests/uri-test.c
@@ -91,163 +91,163 @@ main ()
uri_str = "//www.daimi.au.dk/baz.html?asdf#//qwer";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "http:baz::?####";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "/index.html#interesting";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "//www:80/index.html#interesting";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "#//www:80/index.html#interesting";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = ":/#//www:80/index.html#interesting";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "mailto:sandmann@daimi.au.dk:80";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "http://www.daimi.au.dk:1324786912387461234/baz.html";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "http://www.daimi.au.dk:8080/baz.html?asdf#//qwer";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "HtTp://www:-1/baz.html?asdf#asdff";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "HtTp://wWWWWWWWWw.baz:::::as:lkj1/baz.html?asdf#asdff";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "http:#?";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "http://sandmann:baz@www.daimi.au.dk/index.html";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://ftp.daimi.au.dk/~biasdf";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://sandmann:asdf@ftp.daimi.au.dk/~biasdf";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://:asdf@ftp.daimi.au.dk/~biasdf";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://foo:bar@ftp.daimi.au.dk/~biasdf";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://foo:bar@ftp.com/~biasdf;type=d";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp://foo:bar@ftp.com/~biasdf;type=x";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "file://foo:bar@ftp.com/~biasdf;type=x";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "file:///users/sandmann/public_html/index.html";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "ftp:///users/sandmann/public_html/index.html";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "/users/sandman/public_html/index.html";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "//images.slashdot.org/topics/topictech2.gif";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "../images/euroheader.gif";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
uri_str = "Http:.................";
base_uri_str = NULL;
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
base_uri_str = "http://www.europa.com/";
- base_uri = lac_uri_new_from_str (NULL, base_uri_str);
+ base_uri = lac_uri_new_from_string (NULL, base_uri_str);
uri_str = "../images/euroheader.gif";
- uri = lac_uri_new_from_str (base_uri, uri_str);
+ uri = lac_uri_new_from_string (base_uri, uri_str);
print_uri (uri, base_uri_str, uri_str);
base_uri_str = NULL;
uri_str = "ftp://birnan:emfle@birnan.com:345/images/euroheader.gif";
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
base_uri_str = NULL;
uri_str = "//../birnan.html";
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
base_uri_str = NULL;
uri_str = "///../birnan.html";
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
base_uri_str = NULL;
uri_str = "../birnan.html";
- uri = lac_uri_new_from_str (NULL, uri_str);
+ uri = lac_uri_new_from_string (NULL, uri_str);
print_uri (uri, base_uri_str, uri_str);
return 0;