diff options
author | sandmann <sandmann> | 2004-09-01 11:59:18 +0000 |
---|---|---|
committer | sandmann <sandmann> | 2004-09-01 11:59:18 +0000 |
commit | ab46c861ef2dc507abdfeb50391ea3d9cd5d1179 (patch) | |
tree | 683c679981d99b764ecdcf0c4057af8763a7dade | |
parent | bf426687bbded794a682f660c9754796282c0566 (diff) |
*** empty log message ***
-rwxr-xr-x | TODO | 189 | ||||
-rw-r--r-- | src/lac.h | 14 | ||||
-rw-r--r-- | src/lachttp.c | 1 | ||||
-rw-r--r-- | src/lacuri.c | 2 | ||||
-rw-r--r-- | tests/http-test.c | 2 | ||||
-rw-r--r-- | tests/lacwget.c | 38 | ||||
-rw-r--r-- | tests/uri-test.c | 66 |
7 files changed, 160 insertions, 152 deletions
@@ -8,27 +8,10 @@ correctness: Problem: Other threads might not want the signal ignored. This race condition may be acceptable - * Fix problem where we go into infinite loop if the server - closes the connection before we send anything. (showstopper - for 1.0) - - - www.skolekom.dk used to be like that (problem is in - transport_handle_close) - * thread-safeness (big lock?) (showstopper for 1.0). Note: lac_activity_cancel() from another thread. This could get hairy :-/ - * consider - - activity = lac_begin_activity (timeout); - ...; - lac_activity_wait(activity)/lac_activity_cancel(); - - lac_end_activity(activity); - - within "..." you can call lac_activity_cancel(); - * relative URI's - showstopper for 1.0 @@ -50,10 +33,6 @@ correctness: - Accept it based on "Ok". misc: - * never emit events from a constructor. apps will get confused - if they get an event before the constructor returns. add an idle - handler instead. [May already be done, need to check] - * need a real regression test suite * get rid of CacheRecord->type * Cleanup of the DNS module @@ -88,9 +67,6 @@ performance: server if we can. consistency: - * be consistent about "str" vs. "string" - - what does glib do? - * generally: - foo_get_bar() means "whatever is returned is owned by @@ -204,81 +180,6 @@ features: about different ways of structuring i/o * cookie support (see RFC 2109) - * LacCowString (?) - - do a simple copy-on-write string on contiguous memory - - pro: simple, easy to get c-string - con: insert in the middle of it is inefficient - - - how much data in an xml/html document is markup and - how much is character data? - - we risk doing a lot if copying during parsing of xml - if eg a begin tag spans more than one chunk of data - - even if we get the cow string into glib and get - gdkpixbuf to use it, images may need to a lot of - uncompressing anyway so that all of the data will be - copied anyway. - - actually, memcpy() is probably not really a bottleneck - if do aggressive caching and reuse images whenever - possible. - - since we have "const char *" and a dummy reserved pointer - in events, we can do this later without breaking anything. - - possible API: - - /* - * Copy-on-write string - */ - typedef struct _LacCowString LacCowString; - LacCowString *lac_cow_string_new (const gchar *init, - gsize len); - LacCowString *lac_cow_string_copy (const LacCowString *cow_string); - void lac_cow_string_free (LacCowString *cow_string); - gboolean lac_cow_string_equal (const LacCowString *cow_string1, - const LacCowString *cow_string2); - guint lac_cow_string_hash (const LacCowString *cow_string); - gint lac_cow_string_compare (const LacCowString *cow_string1, - const LacCowString *cow_string2); - LacCowString *lac_cow_string_truncate (LacCowString *cow_string, - gsize size); - LacCowString *lac_cow_string_prepend (LacCowString *cow_string, - const gchar *val, - gsize len); - LacCowString *lac_cow_string_insert (LacCowString *cow_string, - gsize pos, - const gchar *val, - gsize len); - LacCowString *lac_cow_string_append (LacCowString *cow_string, - const gchar *val, - gsize len); - LacCowString *lac_cow_string_erase (LacCowString *cow_string, - gsize pos, - gsize len); - LacCowString *lac_cow_string_concatenate (LacCowString *cow_string1, ...); - - * add a timeout implementation? - - - better scalablity - - perhaps more convenient - - initial implementation can ve very simple in terms - of g_timeout_add() - - API: - - /* - * Timeout - */ - typedef struct _LacTimeout LacTimeout; - - LacTimeout *lac_timeout_new (guint interval, - GSourceFunc callback, - gpointer data); - void lac_timeout_set_interval (LacTimeout *timeout, - guint interval); - guint lac_timeout_get_interval (LacTimeout *timeout); - void lac_timeout_disable (LacTimeout *timeout); - void lac_timeout_enable (LacTimeout *timeout); - void lac_timeout_destroy (LacTimeout *timeout); activity: Use cases: @@ -473,6 +374,9 @@ Already done: ============== misc: + * never emit events from a constructor. apps will get confused + if they get an event before the constructor returns. add an idle + handler instead. [May already be done, need to check] * move a lot of #includes from lac.h to lacprivate.h * rename lacdns-manager.[ch] -> lacdns-query.[ch]. * format functions correctly @@ -502,6 +406,13 @@ misc: they can't be added later. correctness: + * Fix problem where we go into infinite loop if the server + closes the connection before we send anything. (showstopper + for 1.0) + + - www.skolekom.dk used to be like that (problem is in + transport_handle_close) + [I think this is fixed, not completely sure] * We get 'broken server' too often * adobe.com is not completely fixed @@ -651,6 +562,82 @@ correctness: performance: + * add a timeout implementation? [nah, fix the one in glib instead] + + - better scalablity + - perhaps more convenient + - initial implementation can ve very simple in terms + of g_timeout_add() + + API: + + /* + * Timeout + */ + typedef struct _LacTimeout LacTimeout; + + LacTimeout *lac_timeout_new (guint interval, + GSourceFunc callback, + gpointer data); + void lac_timeout_set_interval (LacTimeout *timeout, + guint interval); + guint lac_timeout_get_interval (LacTimeout *timeout); + void lac_timeout_disable (LacTimeout *timeout); + void lac_timeout_enable (LacTimeout *timeout); + void lac_timeout_destroy (LacTimeout *timeout); + + * LacCowString ([not worth it]) + - do a simple copy-on-write string on contiguous memory + + pro: simple, easy to get c-string + con: insert in the middle of it is inefficient + + - how much data in an xml/html document is markup and + how much is character data? + - we risk doing a lot if copying during parsing of xml + if eg a begin tag spans more than one chunk of data + - even if we get the cow string into glib and get + gdkpixbuf to use it, images may need to a lot of + uncompressing anyway so that all of the data will be + copied anyway. + - actually, memcpy() is probably not really a bottleneck + if do aggressive caching and reuse images whenever + possible. + - since we have "const char *" and a dummy reserved pointer + in events, we can do this later without breaking anything. + + possible API: + + /* + * Copy-on-write string + */ + typedef struct _LacCowString LacCowString; + LacCowString *lac_cow_string_new (const gchar *init, + gsize len); + LacCowString *lac_cow_string_copy (const LacCowString *cow_string); + void lac_cow_string_free (LacCowString *cow_string); + gboolean lac_cow_string_equal (const LacCowString *cow_string1, + const LacCowString *cow_string2); + guint lac_cow_string_hash (const LacCowString *cow_string); + gint lac_cow_string_compare (const LacCowString *cow_string1, + const LacCowString *cow_string2); + LacCowString *lac_cow_string_truncate (LacCowString *cow_string, + gsize size); + LacCowString *lac_cow_string_prepend (LacCowString *cow_string, + const gchar *val, + gsize len); + LacCowString *lac_cow_string_insert (LacCowString *cow_string, + gsize pos, + const gchar *val, + gsize len); + LacCowString *lac_cow_string_append (LacCowString *cow_string, + const gchar *val, + gsize len); + LacCowString *lac_cow_string_erase (LacCowString *cow_string, + gsize pos, + gsize len); + LacCowString *lac_cow_string_concatenate (LacCowString *cow_string1, ...); + * limit number of outstanding dns queries? Yes. * sockets are almost always writable, so in general we should avoid poll()ing just to write. Instead, just send(), and only call @@ -670,6 +657,10 @@ performance: call the callback several times. consistency: + * be consistent about "str" vs. "string" + - what does glib do? (a little of both) + I'll go with string + * be consistent about whether len comes before data or vice versa - data comes first followed by length. This is consistent with unix, glib and everything else @@ -315,13 +315,13 @@ struct _LacUri { guint checksum; /* used internally */ }; -LacUri * lac_uri_new_from_str (const LacUri *base, const gchar *str); -LacUri * lac_uri_copy (const LacUri *uri); -gchar * lac_uri_string (const LacUri *uri); -void lac_uri_free (LacUri *uri); -gboolean lac_uri_equal (const LacUri *uri1, - const LacUri *uri2); - +LacUri * lac_uri_new_from_string (const LacUri *base, + const gchar *str); +LacUri * lac_uri_copy (const LacUri *uri); +gchar * lac_uri_string (const LacUri *uri); +void lac_uri_free (LacUri *uri); +gboolean lac_uri_equal (const LacUri *uri1, + const LacUri *uri2); /* * HTTP diff --git a/src/lachttp.c b/src/lachttp.c index 7265f71..3e199d5 100644 --- a/src/lachttp.c +++ b/src/lachttp.c @@ -2356,6 +2356,7 @@ http_transport_handle_close (HttpTransport *transport) transport->host, transport, 0); } + http_host_transport_will_close_notify (transport->host, transport); http_transport_remove_answer_timeout (transport); http_transport_return_outstanding_requests (transport); http_host_transport_closed_notify (transport->host, transport); diff --git a/src/lacuri.c b/src/lacuri.c index 5931b70..bf1b7c1 100644 --- a/src/lacuri.c +++ b/src/lacuri.c @@ -800,7 +800,7 @@ step_7: #endif LacUri * -lac_uri_new_from_str (const LacUri *base, const gchar *str) +lac_uri_new_from_string (const LacUri *base, const gchar *str) { LacUri *result; gchar *base_str = NULL; diff --git a/tests/http-test.c b/tests/http-test.c index 240d563..884a61d 100644 --- a/tests/http-test.c +++ b/tests/http-test.c @@ -141,7 +141,7 @@ main (int argc, char *argv[]) { LacHttpRequest *request; - LacUri *uri = lac_uri_new_from_str (NULL, argv[i]); + LacUri *uri = lac_uri_new_from_string (NULL, argv[i]); if (!uri) { g_print ("bad uri: %s\n", argv[i]); diff --git a/tests/lacwget.c b/tests/lacwget.c index c7245ef..bf38506 100644 --- a/tests/lacwget.c +++ b/tests/lacwget.c @@ -72,6 +72,7 @@ typedef struct PageInfo { gdouble parser_eof; gboolean is_redirect; gboolean try_parse; + FILE *file; } PageInfo; static GPtrArray *outstanding_page_infos; @@ -295,7 +296,7 @@ handle_base_element (PageInfo *page_info, const HtmlParserBeginElementEvent *eve if (uri) { - LacUri *new_base = lac_uri_new_from_str (page_info->base_uri, uri); + LacUri *new_base = lac_uri_new_from_string (page_info->base_uri, uri); if (new_base) { printf ("new base: %s\n", lac_uri_string (new_base)); @@ -500,7 +501,6 @@ http_callback (LacHttpRequest *request, const LacHttpEvent *event) break; case LAC_HTTP_EVENT_CONTENT: - g_print ("%s: content chunk (size: %d)\n", page_info->main_uri, event->content.length); #if 0 { int i; @@ -508,16 +508,21 @@ http_callback (LacHttpRequest *request, const LacHttpEvent *event) printf ("%c", event->content.data[i]); } #endif - g_string_append_len (page_info->unparsed, event->content.data, event->content.length); - if (page_info->first_content == -1.0) - page_info->first_content = g_timer_elapsed (page_info->timer, NULL); - - queue_parse (page_info); #if 0 - parse_idle (page_info); + g_print ("%s: content chunk (size: %d)\n", page_info->main_uri, event->content.length); +#endif + if (page_info->file) + fwrite (event->content.data, 1, event->content.length, page_info->file); + g_string_append_len (page_info->unparsed, event->content.data, event->content.length); + if (page_info->first_content == -1.0) + page_info->first_content = g_timer_elapsed (page_info->timer, NULL); + + queue_parse (page_info); +#if 0 + parse_idle (page_info); #endif - page_info->n_content_chunks++; - break; + page_info->n_content_chunks++; + break; case LAC_HTTP_EVENT_END_CONTENT: #if 0 @@ -566,6 +571,15 @@ already_downloaded (const LacUri *uri) return FALSE; } +static char * +create_name (const char *uri_str) +{ + char *last_slash = strrchr (uri_str, '/'); + + if (last_slash) + return g_strdup (last_slash + 1); +} + static gboolean do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse) { @@ -575,6 +589,8 @@ do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse) page_info->timer = g_timer_new (); page_info->try_parse = try_parse; + page_info->file = fopen (create_name (uri_str), "w"); + #if 0 printf ("uri: %s\n", uri_str); #endif @@ -582,7 +598,7 @@ do_download (LacUri *base_uri, const gchar *uri_str, gboolean try_parse) if (page_info->try_parse) g_print ("DOWNLAOD TRUE %s ********\n", uri_str); - uri = lac_uri_new_from_str (base_uri, uri_str); + uri = lac_uri_new_from_string (base_uri, uri_str); if (!uri) { printf ("bad uri: %s\n", uri_str); diff --git a/tests/uri-test.c b/tests/uri-test.c index 4285013..55fd3a3 100644 --- a/tests/uri-test.c +++ b/tests/uri-test.c @@ -91,163 +91,163 @@ main () uri_str = "//www.daimi.au.dk/baz.html?asdf#//qwer"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "http:baz::?####"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "/index.html#interesting"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "//www:80/index.html#interesting"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "#//www:80/index.html#interesting"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = ":/#//www:80/index.html#interesting"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "mailto:sandmann@daimi.au.dk:80"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "http://www.daimi.au.dk:1324786912387461234/baz.html"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "http://www.daimi.au.dk:8080/baz.html?asdf#//qwer"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "HtTp://www:-1/baz.html?asdf#asdff"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "HtTp://wWWWWWWWWw.baz:::::as:lkj1/baz.html?asdf#asdff"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = ""; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "http:#?"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "http://sandmann:baz@www.daimi.au.dk/index.html"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://ftp.daimi.au.dk/~biasdf"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://sandmann:asdf@ftp.daimi.au.dk/~biasdf"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://:asdf@ftp.daimi.au.dk/~biasdf"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://foo:bar@ftp.daimi.au.dk/~biasdf"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://foo:bar@ftp.com/~biasdf;type=d"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp://foo:bar@ftp.com/~biasdf;type=x"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "file://foo:bar@ftp.com/~biasdf;type=x"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "file:///users/sandmann/public_html/index.html"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "ftp:///users/sandmann/public_html/index.html"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "/users/sandman/public_html/index.html"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "//images.slashdot.org/topics/topictech2.gif"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "../images/euroheader.gif"; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); uri_str = "Http:................."; base_uri_str = NULL; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); base_uri_str = "http://www.europa.com/"; - base_uri = lac_uri_new_from_str (NULL, base_uri_str); + base_uri = lac_uri_new_from_string (NULL, base_uri_str); uri_str = "../images/euroheader.gif"; - uri = lac_uri_new_from_str (base_uri, uri_str); + uri = lac_uri_new_from_string (base_uri, uri_str); print_uri (uri, base_uri_str, uri_str); base_uri_str = NULL; uri_str = "ftp://birnan:emfle@birnan.com:345/images/euroheader.gif"; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); base_uri_str = NULL; uri_str = "//../birnan.html"; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); base_uri_str = NULL; uri_str = "///../birnan.html"; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); base_uri_str = NULL; uri_str = "../birnan.html"; - uri = lac_uri_new_from_str (NULL, uri_str); + uri = lac_uri_new_from_string (NULL, uri_str); print_uri (uri, base_uri_str, uri_str); return 0; |