summaryrefslogtreecommitdiff
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig13
-rw-r--r--net/dccp/Makefile9
-rw-r--r--net/dccp/ackvec.c296
-rw-r--r--net/dccp/ackvec.h53
-rw-r--r--net/dccp/ccid.c189
-rw-r--r--net/dccp/ccid.h129
-rw-r--r--net/dccp/ccids/Kconfig43
-rw-r--r--net/dccp/ccids/Makefile4
-rw-r--r--net/dccp/ccids/ccid2.c779
-rw-r--r--net/dccp/ccids/ccid2.h85
-rw-r--r--net/dccp/ccids/ccid3.c112
-rw-r--r--net/dccp/ccids/ccid3.h5
-rw-r--r--net/dccp/dccp.h133
-rw-r--r--net/dccp/diag.c2
-rw-r--r--net/dccp/feat.c586
-rw-r--r--net/dccp/feat.h29
-rw-r--r--net/dccp/input.c28
-rw-r--r--net/dccp/ipv4.c333
-rw-r--r--net/dccp/ipv6.c371
-rw-r--r--net/dccp/minisocks.c37
-rw-r--r--net/dccp/options.c291
-rw-r--r--net/dccp/output.c88
-rw-r--r--net/dccp/proto.c442
-rw-r--r--net/dccp/sysctl.c124
-rw-r--r--net/dccp/timer.c14
25 files changed, 3155 insertions, 1040 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 187ac182e24b..7e096ba8454f 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -24,6 +24,10 @@ config INET_DCCP_DIAG
def_tristate y if (IP_DCCP = y && INET_DIAG = y)
def_tristate m
+config IP_DCCP_ACKVEC
+ depends on IP_DCCP
+ def_bool N
+
source "net/dccp/ccids/Kconfig"
menu "DCCP Kernel Hacking"
@@ -36,15 +40,6 @@ config IP_DCCP_DEBUG
Just say N.
-config IP_DCCP_UNLOAD_HACK
- depends on IP_DCCP=m && IP_DCCP_CCID3=m
- bool "DCCP control sock unload hack"
- ---help---
- Enable this to be able to unload the dccp module when the it
- has only one refcount held, the control sock one. Just execute
- "rmmod dccp_ccid3 dccp"
-
- Just say N.
endmenu
endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 87b27fff6e3b..7696e219b05d 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -2,15 +2,18 @@ obj-$(CONFIG_IPV6) += dccp_ipv6.o
dccp_ipv6-y := ipv6.o
-obj-$(CONFIG_IP_DCCP) += dccp.o
+obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
-dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \
- timer.o
+dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o
+
+dccp_ipv4-y := ipv4.o
dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o
obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
+dccp-$(CONFIG_SYSCTL) += sysctl.o
+
dccp_diag-y := diag.o
obj-y += ccids/
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 2c77dafbd091..b5981e5f6b00 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -13,36 +13,83 @@
#include "dccp.h"
#include <linux/dccp.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <net/sock.h>
+static kmem_cache_t *dccp_ackvec_slab;
+static kmem_cache_t *dccp_ackvec_record_slab;
+
+static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
+{
+ struct dccp_ackvec_record *avr =
+ kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
+
+ if (avr != NULL)
+ INIT_LIST_HEAD(&avr->dccpavr_node);
+
+ return avr;
+}
+
+static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
+{
+ if (unlikely(avr == NULL))
+ return;
+ /* Check if deleting a linked record */
+ WARN_ON(!list_empty(&avr->dccpavr_node));
+ kmem_cache_free(dccp_ackvec_record_slab, avr);
+}
+
+static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
+ struct dccp_ackvec_record *avr)
+{
+ /*
+ * AVRs are sorted by seqno. Since we are sending them in order, we
+ * just add the AVR at the head of the list.
+ * -sorbo.
+ */
+ if (!list_empty(&av->dccpav_records)) {
+ const struct dccp_ackvec_record *head =
+ list_entry(av->dccpav_records.next,
+ struct dccp_ackvec_record,
+ dccpavr_node);
+ BUG_ON(before48(avr->dccpavr_ack_seqno,
+ head->dccpavr_ack_seqno));
+ }
+
+ list_add(&avr->dccpavr_node, &av->dccpav_records);
+}
+
int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
+#ifdef CONFIG_IP_DCCP_DEBUG
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT tx: " : "server tx: ";
+#endif
struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
int len = av->dccpav_vec_len + 2;
struct timeval now;
u32 elapsed_time;
unsigned char *to, *from;
+ struct dccp_ackvec_record *avr;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+ return -1;
dccp_timestamp(sk, &now);
elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
- if (elapsed_time != 0)
- dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+ if (elapsed_time != 0 &&
+ dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
return -1;
- /*
- * XXX: now we have just one ack vector sent record, so
- * we have to wait for it to be cleared.
- *
- * Of course this is not acceptable, but this is just for
- * basic testing now.
- */
- if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1)
+ avr = dccp_ackvec_record_new();
+ if (avr == NULL)
return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
@@ -55,8 +102,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
from = av->dccpav_buf + av->dccpav_buf_head;
/* Check if buf_head wraps */
- if ((int)av->dccpav_buf_head + len > av->dccpav_vec_len) {
- const u32 tailsize = av->dccpav_vec_len - av->dccpav_buf_head;
+ if ((int)av->dccpav_buf_head + len > DCCP_MAX_ACKVEC_LEN) {
+ const u32 tailsize = DCCP_MAX_ACKVEC_LEN - av->dccpav_buf_head;
memcpy(to, from, tailsize);
to += tailsize;
@@ -73,45 +120,37 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
* sequence number it used for the ack packet; ack_ptr will equal
* buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
* equal buf_nonce.
- *
- * This implemention uses just one ack record for now.
*/
- av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
- av->dccpav_ack_ptr = av->dccpav_buf_head;
- av->dccpav_ack_ackno = av->dccpav_buf_ackno;
- av->dccpav_ack_nonce = av->dccpav_buf_nonce;
- av->dccpav_sent_len = av->dccpav_vec_len;
+ avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+ avr->dccpavr_ack_ptr = av->dccpav_buf_head;
+ avr->dccpavr_ack_ackno = av->dccpav_buf_ackno;
+ avr->dccpavr_ack_nonce = av->dccpav_buf_nonce;
+ avr->dccpavr_sent_len = av->dccpav_vec_len;
+
+ dccp_ackvec_insert_avr(av, avr);
dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, "
"ack_ackno=%llu\n",
- debug_prefix, av->dccpav_sent_len,
- (unsigned long long)av->dccpav_ack_seqno,
- (unsigned long long)av->dccpav_ack_ackno);
- return -1;
+ debug_prefix, avr->dccpavr_sent_len,
+ (unsigned long long)avr->dccpavr_ack_seqno,
+ (unsigned long long)avr->dccpavr_ack_ackno);
+ return 0;
}
-struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
- const gfp_t priority)
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
- struct dccp_ackvec *av;
-
- BUG_ON(len == 0);
+ struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
- if (len > DCCP_MAX_ACKVEC_LEN)
- return NULL;
-
- av = kmalloc(sizeof(*av) + len, priority);
if (av != NULL) {
- av->dccpav_buf_len = len;
av->dccpav_buf_head =
- av->dccpav_buf_tail = av->dccpav_buf_len - 1;
- av->dccpav_buf_ackno =
- av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU;
+ av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
+ av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1;
av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
av->dccpav_ack_ptr = 0;
av->dccpav_time.tv_sec = 0;
av->dccpav_time.tv_usec = 0;
av->dccpav_sent_len = av->dccpav_vec_len = 0;
+ INIT_LIST_HEAD(&av->dccpav_records);
}
return av;
@@ -119,7 +158,20 @@ struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len,
void dccp_ackvec_free(struct dccp_ackvec *av)
{
- kfree(av);
+ if (unlikely(av == NULL))
+ return;
+
+ if (!list_empty(&av->dccpav_records)) {
+ struct dccp_ackvec_record *avr, *next;
+
+ list_for_each_entry_safe(avr, next, &av->dccpav_records,
+ dccpavr_node) {
+ list_del_init(&avr->dccpavr_node);
+ dccp_ackvec_record_delete(avr);
+ }
+ }
+
+ kmem_cache_free(dccp_ackvec_slab, av);
}
static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
@@ -146,7 +198,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
unsigned int gap;
long new_head;
- if (av->dccpav_vec_len + packets > av->dccpav_buf_len)
+ if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
return -ENOBUFS;
gap = packets - 1;
@@ -158,7 +210,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
gap + new_head + 1);
gap = -new_head;
}
- new_head += av->dccpav_buf_len;
+ new_head += DCCP_MAX_ACKVEC_LEN;
}
av->dccpav_buf_head = new_head;
@@ -251,7 +303,7 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
goto out_duplicate;
delta -= len + 1;
- if (++index == av->dccpav_buf_len)
+ if (++index == DCCP_MAX_ACKVEC_LEN)
index = 0;
}
}
@@ -259,7 +311,6 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
av->dccpav_buf_ackno = ackno;
dccp_timestamp(sk, &av->dccpav_time);
out:
- dccp_pr_debug("");
return 0;
out_duplicate:
@@ -297,44 +348,50 @@ void dccp_ackvec_print(const struct dccp_ackvec *av)
}
#endif
-static void dccp_ackvec_throw_away_ack_record(struct dccp_ackvec *av)
+static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
+ struct dccp_ackvec_record *avr)
{
- /*
- * As we're keeping track of the ack vector size (dccpav_vec_len) and
- * the sent ack vector size (dccpav_sent_len) we don't need
- * dccpav_buf_tail at all, but keep this code here as in the future
- * we'll implement a vector of ack records, as suggested in
- * draft-ietf-dccp-spec-11.txt Appendix A. -acme
- */
-#if 0
- u32 new_buf_tail = av->dccpav_ack_ptr + 1;
- if (new_buf_tail >= av->dccpav_vec_len)
- new_buf_tail -= av->dccpav_vec_len;
- av->dccpav_buf_tail = new_buf_tail;
-#endif
- av->dccpav_vec_len -= av->dccpav_sent_len;
+ struct dccp_ackvec_record *next;
+
+ av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1;
+ if (av->dccpav_buf_tail == 0)
+ av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1;
+
+ av->dccpav_vec_len -= avr->dccpavr_sent_len;
+
+ /* free records */
+ list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
+ dccpavr_node) {
+ list_del_init(&avr->dccpavr_node);
+ dccp_ackvec_record_delete(avr);
+ }
}
void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
const u64 ackno)
{
- /* Check if we actually sent an ACK vector */
- if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
- return;
+ struct dccp_ackvec_record *avr;
- if (ackno == av->dccpav_ack_seqno) {
+ /*
+ * If we traverse backwards, it should be faster when we have large
+ * windows. We will be receiving ACKs for stuff we sent a while back
+ * -sorbo.
+ */
+ list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
+ if (ackno == avr->dccpavr_ack_seqno) {
#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT rx ack: " : "server rx ack: ";
+ struct dccp_sock *dp = dccp_sk(sk);
+ const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
+ "CLIENT rx ack: " : "server rx ack: ";
#endif
- dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
- "ack_ackno=%llu, ACKED!\n",
- debug_prefix, 1,
- (unsigned long long)av->dccpav_ack_seqno,
- (unsigned long long)av->dccpav_ack_ackno);
- dccp_ackvec_throw_away_ack_record(av);
- av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
+ dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, "
+ "ack_ackno=%llu, ACKED!\n",
+ debug_prefix, 1,
+ (unsigned long long)avr->dccpavr_ack_seqno,
+ (unsigned long long)avr->dccpavr_ack_ackno);
+ dccp_ackvec_throw_record(av, avr);
+ break;
+ }
}
}
@@ -344,28 +401,20 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
const unsigned char *vector)
{
unsigned char i;
+ struct dccp_ackvec_record *avr;
/* Check if we actually sent an ACK vector */
- if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1)
- return;
- /*
- * We're in the receiver half connection, so if the received an ACK
- * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're
- * not interested.
- *
- * Extra explanation with example:
- *
- * if we received an ACK vector with ackno 50, it can only be acking
- * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
- */
- /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */
- if (before48(ackno, av->dccpav_ack_seqno)) {
- /* dccp_pr_debug_cat("yes\n"); */
+ if (list_empty(&av->dccpav_records))
return;
- }
- /* dccp_pr_debug_cat("no\n"); */
i = len;
+ /*
+ * XXX
+ * I think it might be more efficient to work backwards. See comment on
+ * rcv_ackno. -sorbo.
+ */
+ avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
+ dccpavr_node);
while (i--) {
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl;
@@ -373,14 +422,20 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
dccp_set_seqno(&ackno_end_rl, ackno - rl);
/*
- * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl,
- * av->dccpav_ack_seqno, ackno);
+ * If our AVR sequence number is greater than the ack, go
+ * forward in the AVR list until it is not so.
*/
- if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) {
+ list_for_each_entry_from(avr, &av->dccpav_records,
+ dccpavr_node) {
+ if (!after48(avr->dccpavr_ack_seqno, ackno))
+ goto found;
+ }
+ /* End of the dccpav_records list, not found, exit */
+ break;
+found:
+ if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) {
const u8 state = (*vector &
DCCP_ACKVEC_STATE_MASK) >> 6;
- /* dccp_pr_debug_cat("yes\n"); */
-
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
#ifdef CONFIG_IP_DCCP_DEBUG
struct dccp_sock *dp = dccp_sk(sk);
@@ -393,19 +448,16 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
"ACKED!\n",
debug_prefix, len,
(unsigned long long)
- av->dccpav_ack_seqno,
+ avr->dccpavr_ack_seqno,
(unsigned long long)
- av->dccpav_ack_ackno);
- dccp_ackvec_throw_away_ack_record(av);
+ avr->dccpavr_ack_ackno);
+ dccp_ackvec_throw_record(av, avr);
}
/*
- * If dccpav_ack_seqno was not received, no problem
- * we'll send another ACK vector.
+ * If it wasn't received, continue scanning... we might
+ * find another one.
*/
- av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1;
- break;
}
- /* dccp_pr_debug_cat("no\n"); */
dccp_set_seqno(&ackno, ackno_end_rl - 1);
++vector;
@@ -424,3 +476,43 @@ int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
len, value);
return 0;
}
+
+static char dccp_ackvec_slab_msg[] __initdata =
+ KERN_CRIT "DCCP: Unable to create ack vectors slab caches\n";
+
+int __init dccp_ackvec_init(void)
+{
+ dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
+ sizeof(struct dccp_ackvec), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (dccp_ackvec_slab == NULL)
+ goto out_err;
+
+ dccp_ackvec_record_slab =
+ kmem_cache_create("dccp_ackvec_record",
+ sizeof(struct dccp_ackvec_record),
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (dccp_ackvec_record_slab == NULL)
+ goto out_destroy_slab;
+
+ return 0;
+
+out_destroy_slab:
+ kmem_cache_destroy(dccp_ackvec_slab);
+ dccp_ackvec_slab = NULL;
+out_err:
+ printk(dccp_ackvec_slab_msg);
+ return -ENOBUFS;
+}
+
+void dccp_ackvec_exit(void)
+{
+ if (dccp_ackvec_slab != NULL) {
+ kmem_cache_destroy(dccp_ackvec_slab);
+ dccp_ackvec_slab = NULL;
+ }
+ if (dccp_ackvec_record_slab != NULL) {
+ kmem_cache_destroy(dccp_ackvec_record_slab);
+ dccp_ackvec_record_slab = NULL;
+ }
+}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index f7dfb5f67b87..ec7a89bb7b39 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -13,6 +13,7 @@
#include <linux/config.h>
#include <linux/compiler.h>
+#include <linux/list.h>
#include <linux/time.h>
#include <linux/types.h>
@@ -42,39 +43,57 @@
* Ack Vectors it has recently sent. For each packet sent carrying an
* Ack Vector, it remembers four variables:
*
- * @dccpav_ack_seqno - the Sequence Number used for the packet
- * (HC-Receiver seqno)
* @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
- * @dccpav_ack_ackno - the Acknowledgement Number used for the packet
- * (HC-Sender seqno)
+ * @dccpav_records - list of dccp_ackvec_record
* @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
*
- * @dccpav_buf_len - circular buffer length
* @dccpav_time - the time in usecs
* @dccpav_buf - circular buffer of acknowledgeable packets
*/
struct dccp_ackvec {
u64 dccpav_buf_ackno;
- u64 dccpav_ack_seqno;
- u64 dccpav_ack_ackno;
+ struct list_head dccpav_records;
struct timeval dccpav_time;
u8 dccpav_buf_head;
u8 dccpav_buf_tail;
u8 dccpav_ack_ptr;
u8 dccpav_sent_len;
u8 dccpav_vec_len;
- u8 dccpav_buf_len;
u8 dccpav_buf_nonce;
u8 dccpav_ack_nonce;
- u8 dccpav_buf[0];
+ u8 dccpav_buf[DCCP_MAX_ACKVEC_LEN];
+};
+
+/** struct dccp_ackvec_record - ack vector record
+ *
+ * ACK vector record as defined in Appendix A of spec.
+ *
+ * The list is sorted by dccpavr_ack_seqno
+ *
+ * @dccpavr_node - node in dccpav_records
+ * @dccpavr_ack_seqno - sequence number of the packet this record was sent on
+ * @dccpavr_ack_ackno - sequence number being acknowledged
+ * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts
+ * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent
+ * @dccpavr_sent_len - lenght of the record in dccpav_buf
+ */
+struct dccp_ackvec_record {
+ struct list_head dccpavr_node;
+ u64 dccpavr_ack_seqno;
+ u64 dccpavr_ack_ackno;
+ u8 dccpavr_ack_ptr;
+ u8 dccpavr_ack_nonce;
+ u8 dccpavr_sent_len;
};
struct sock;
struct sk_buff;
#ifdef CONFIG_IP_DCCP_ACKVEC
-extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
- const gfp_t priority);
+extern int dccp_ackvec_init(void);
+extern void dccp_ackvec_exit(void);
+
+extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
extern void dccp_ackvec_free(struct dccp_ackvec *av);
extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
@@ -92,8 +111,16 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
return av->dccpav_sent_len != av->dccpav_vec_len;
}
#else /* CONFIG_IP_DCCP_ACKVEC */
-static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len,
- const gfp_t priority)
+static inline int dccp_ackvec_init(void)
+{
+ return 0;
+}
+
+static inline void dccp_ackvec_exit(void)
+{
+}
+
+static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
return NULL;
}
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 9d8fc0e289ea..ff05e59043cd 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -13,7 +13,7 @@
#include "ccid.h"
-static struct ccid *ccids[CCID_MAX];
+static struct ccid_operations *ccids[CCID_MAX];
#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
static atomic_t ccids_lockct = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ccids_lock);
@@ -55,85 +55,202 @@ static inline void ccids_read_unlock(void)
#define ccids_read_unlock() do { } while(0)
#endif
-int ccid_register(struct ccid *ccid)
+static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...)
{
- int err;
+ kmem_cache_t *slab;
+ char slab_name_fmt[32], *slab_name;
+ va_list args;
- if (ccid->ccid_init == NULL)
- return -1;
+ va_start(args, fmt);
+ vsnprintf(slab_name_fmt, sizeof(slab_name_fmt), fmt, args);
+ va_end(args);
+
+ slab_name = kstrdup(slab_name_fmt, GFP_KERNEL);
+ if (slab_name == NULL)
+ return NULL;
+ slab = kmem_cache_create(slab_name, sizeof(struct ccid) + obj_size, 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (slab == NULL)
+ kfree(slab_name);
+ return slab;
+}
+
+static void ccid_kmem_cache_destroy(kmem_cache_t *slab)
+{
+ if (slab != NULL) {
+ const char *name = kmem_cache_name(slab);
+
+ kmem_cache_destroy(slab);
+ kfree(name);
+ }
+}
+
+int ccid_register(struct ccid_operations *ccid_ops)
+{
+ int err = -ENOBUFS;
+
+ ccid_ops->ccid_hc_rx_slab =
+ ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
+ "%s_hc_rx_sock",
+ ccid_ops->ccid_name);
+ if (ccid_ops->ccid_hc_rx_slab == NULL)
+ goto out;
+
+ ccid_ops->ccid_hc_tx_slab =
+ ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
+ "%s_hc_tx_sock",
+ ccid_ops->ccid_name);
+ if (ccid_ops->ccid_hc_tx_slab == NULL)
+ goto out_free_rx_slab;
ccids_write_lock();
err = -EEXIST;
- if (ccids[ccid->ccid_id] == NULL) {
- ccids[ccid->ccid_id] = ccid;
+ if (ccids[ccid_ops->ccid_id] == NULL) {
+ ccids[ccid_ops->ccid_id] = ccid_ops;
err = 0;
}
ccids_write_unlock();
- if (err == 0)
- pr_info("CCID: Registered CCID %d (%s)\n",
- ccid->ccid_id, ccid->ccid_name);
+ if (err != 0)
+ goto out_free_tx_slab;
+
+ pr_info("CCID: Registered CCID %d (%s)\n",
+ ccid_ops->ccid_id, ccid_ops->ccid_name);
+out:
return err;
+out_free_tx_slab:
+ ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
+ ccid_ops->ccid_hc_tx_slab = NULL;
+ goto out;
+out_free_rx_slab:
+ ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
+ ccid_ops->ccid_hc_rx_slab = NULL;
+ goto out;
}
EXPORT_SYMBOL_GPL(ccid_register);
-int ccid_unregister(struct ccid *ccid)
+int ccid_unregister(struct ccid_operations *ccid_ops)
{
ccids_write_lock();
- ccids[ccid->ccid_id] = NULL;
+ ccids[ccid_ops->ccid_id] = NULL;
ccids_write_unlock();
+
+ ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
+ ccid_ops->ccid_hc_tx_slab = NULL;
+ ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
+ ccid_ops->ccid_hc_rx_slab = NULL;
+
pr_info("CCID: Unregistered CCID %d (%s)\n",
- ccid->ccid_id, ccid->ccid_name);
+ ccid_ops->ccid_id, ccid_ops->ccid_name);
return 0;
}
EXPORT_SYMBOL_GPL(ccid_unregister);
-struct ccid *ccid_init(unsigned char id, struct sock *sk)
+struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp)
{
- struct ccid *ccid;
+ struct ccid_operations *ccid_ops;
+ struct ccid *ccid = NULL;
+ ccids_read_lock();
#ifdef CONFIG_KMOD
- if (ccids[id] == NULL)
+ if (ccids[id] == NULL) {
+ /* We only try to load if in process context */
+ ccids_read_unlock();
+ if (gfp & GFP_ATOMIC)
+ goto out;
request_module("net-dccp-ccid-%d", id);
+ ccids_read_lock();
+ }
#endif
- ccids_read_lock();
+ ccid_ops = ccids[id];
+ if (ccid_ops == NULL)
+ goto out_unlock;
- ccid = ccids[id];
- if (ccid == NULL)
- goto out;
+ if (!try_module_get(ccid_ops->ccid_owner))
+ goto out_unlock;
- if (!try_module_get(ccid->ccid_owner))
- goto out_err;
+ ccids_read_unlock();
- if (ccid->ccid_init(sk) != 0)
+ ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
+ ccid_ops->ccid_hc_tx_slab, gfp);
+ if (ccid == NULL)
goto out_module_put;
+ ccid->ccid_ops = ccid_ops;
+ if (rx) {
+ memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
+ if (ccid->ccid_ops->ccid_hc_rx_init != NULL &&
+ ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0)
+ goto out_free_ccid;
+ } else {
+ memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size);
+ if (ccid->ccid_ops->ccid_hc_tx_init != NULL &&
+ ccid->ccid_ops->ccid_hc_tx_init(ccid, sk) != 0)
+ goto out_free_ccid;
+ }
out:
- ccids_read_unlock();
return ccid;
-out_module_put:
- module_put(ccid->ccid_owner);
-out_err:
+out_unlock:
+ ccids_read_unlock();
+ goto out;
+out_free_ccid:
+ kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
+ ccid_ops->ccid_hc_tx_slab, ccid);
ccid = NULL;
+out_module_put:
+ module_put(ccid_ops->ccid_owner);
goto out;
}
-EXPORT_SYMBOL_GPL(ccid_init);
+EXPORT_SYMBOL_GPL(ccid_new);
+
+struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp)
+{
+ return ccid_new(id, sk, 1, gfp);
+}
+
+EXPORT_SYMBOL_GPL(ccid_hc_rx_new);
+
+struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp)
+{
+ return ccid_new(id, sk, 0, gfp);
+}
+
+EXPORT_SYMBOL_GPL(ccid_hc_tx_new);
-void ccid_exit(struct ccid *ccid, struct sock *sk)
+static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx)
{
+ struct ccid_operations *ccid_ops;
+
if (ccid == NULL)
return;
+ ccid_ops = ccid->ccid_ops;
+ if (rx) {
+ if (ccid_ops->ccid_hc_rx_exit != NULL)
+ ccid_ops->ccid_hc_rx_exit(sk);
+ kmem_cache_free(ccid_ops->ccid_hc_rx_slab, ccid);
+ } else {
+ if (ccid_ops->ccid_hc_tx_exit != NULL)
+ ccid_ops->ccid_hc_tx_exit(sk);
+ kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid);
+ }
ccids_read_lock();
+ if (ccids[ccid_ops->ccid_id] != NULL)
+ module_put(ccid_ops->ccid_owner);
+ ccids_read_unlock();
+}
- if (ccids[ccid->ccid_id] != NULL) {
- if (ccid->ccid_exit != NULL)
- ccid->ccid_exit(sk);
- module_put(ccid->ccid_owner);
- }
+void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
+{
+ ccid_delete(ccid, sk, 1);
+}
- ccids_read_unlock();
+EXPORT_SYMBOL_GPL(ccid_hc_rx_delete);
+
+void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
+{
+ ccid_delete(ccid, sk, 0);
}
-EXPORT_SYMBOL_GPL(ccid_exit);
+EXPORT_SYMBOL_GPL(ccid_hc_tx_delete);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index de681c6ad081..f7eb6c613414 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -23,14 +23,16 @@
struct tcp_info;
-struct ccid {
+struct ccid_operations {
unsigned char ccid_id;
const char *ccid_name;
struct module *ccid_owner;
- int (*ccid_init)(struct sock *sk);
- void (*ccid_exit)(struct sock *sk);
- int (*ccid_hc_rx_init)(struct sock *sk);
- int (*ccid_hc_tx_init)(struct sock *sk);
+ kmem_cache_t *ccid_hc_rx_slab;
+ __u32 ccid_hc_rx_obj_size;
+ kmem_cache_t *ccid_hc_tx_slab;
+ __u32 ccid_hc_tx_obj_size;
+ int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
+ int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
void (*ccid_hc_rx_exit)(struct sock *sk);
void (*ccid_hc_tx_exit)(struct sock *sk);
void (*ccid_hc_rx_packet_recv)(struct sock *sk,
@@ -39,9 +41,9 @@ struct ccid {
unsigned char option,
unsigned char len, u16 idx,
unsigned char* value);
- void (*ccid_hc_rx_insert_options)(struct sock *sk,
+ int (*ccid_hc_rx_insert_options)(struct sock *sk,
struct sk_buff *skb);
- void (*ccid_hc_tx_insert_options)(struct sock *sk,
+ int (*ccid_hc_tx_insert_options)(struct sock *sk,
struct sk_buff *skb);
void (*ccid_hc_tx_packet_recv)(struct sock *sk,
struct sk_buff *skb);
@@ -67,75 +69,58 @@ struct ccid {
int __user *optlen);
};
-extern int ccid_register(struct ccid *ccid);
-extern int ccid_unregister(struct ccid *ccid);
+extern int ccid_register(struct ccid_operations *ccid_ops);
+extern int ccid_unregister(struct ccid_operations *ccid_ops);
-extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
-extern void ccid_exit(struct ccid *ccid, struct sock *sk);
+struct ccid {
+ struct ccid_operations *ccid_ops;
+ char ccid_priv[0];
+};
-static inline void __ccid_get(struct ccid *ccid)
+static inline void *ccid_priv(const struct ccid *ccid)
{
- __module_get(ccid->ccid_owner);
+ return (void *)ccid->ccid_priv;
}
+extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx,
+ gfp_t gfp);
+
+extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk,
+ gfp_t gfp);
+extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk,
+ gfp_t gfp);
+
+extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
+extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
+
static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb, int len)
{
int rc = 0;
- if (ccid->ccid_hc_tx_send_packet != NULL)
- rc = ccid->ccid_hc_tx_send_packet(sk, skb, len);
+ if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
+ rc = ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb, len);
return rc;
}
static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
int more, int len)
{
- if (ccid->ccid_hc_tx_packet_sent != NULL)
- ccid->ccid_hc_tx_packet_sent(sk, more, len);
-}
-
-static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
-{
- int rc = 0;
- if (ccid->ccid_hc_rx_init != NULL)
- rc = ccid->ccid_hc_rx_init(sk);
- return rc;
-}
-
-static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
-{
- int rc = 0;
- if (ccid->ccid_hc_tx_init != NULL)
- rc = ccid->ccid_hc_tx_init(sk);
- return rc;
-}
-
-static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
-{
- if (ccid != NULL && ccid->ccid_hc_rx_exit != NULL &&
- dccp_sk(sk)->dccps_hc_rx_ccid_private != NULL)
- ccid->ccid_hc_rx_exit(sk);
-}
-
-static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
-{
- if (ccid != NULL && ccid->ccid_hc_tx_exit != NULL &&
- dccp_sk(sk)->dccps_hc_tx_ccid_private != NULL)
- ccid->ccid_hc_tx_exit(sk);
+ if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
+ ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len);
}
static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
- if (ccid->ccid_hc_rx_packet_recv != NULL)
- ccid->ccid_hc_rx_packet_recv(sk, skb);
+ if (ccid->ccid_ops->ccid_hc_rx_packet_recv != NULL)
+ ccid->ccid_ops->ccid_hc_rx_packet_recv(sk, skb);
}
static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
struct sk_buff *skb)
{
- if (ccid->ccid_hc_tx_packet_recv != NULL)
- ccid->ccid_hc_tx_packet_recv(sk, skb);
+ if (ccid->ccid_ops->ccid_hc_tx_packet_recv != NULL)
+ ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
}
static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
@@ -144,8 +129,8 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char* value)
{
int rc = 0;
- if (ccid->ccid_hc_tx_parse_options != NULL)
- rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx,
+ if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL)
+ rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx,
value);
return rc;
}
@@ -156,37 +141,39 @@ static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
unsigned char* value)
{
int rc = 0;
- if (ccid->ccid_hc_rx_parse_options != NULL)
- rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+ if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL)
+ rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value);
return rc;
}
-static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
+static inline int ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
{
- if (ccid->ccid_hc_tx_insert_options != NULL)
- ccid->ccid_hc_tx_insert_options(sk, skb);
+ if (ccid->ccid_ops->ccid_hc_tx_insert_options != NULL)
+ return ccid->ccid_ops->ccid_hc_tx_insert_options(sk, skb);
+ return 0;
}
-static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
+static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
+ struct sk_buff *skb)
{
- if (ccid->ccid_hc_rx_insert_options != NULL)
- ccid->ccid_hc_rx_insert_options(sk, skb);
+ if (ccid->ccid_ops->ccid_hc_rx_insert_options != NULL)
+ return ccid->ccid_ops->ccid_hc_rx_insert_options(sk, skb);
+ return 0;
}
static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
struct tcp_info *info)
{
- if (ccid->ccid_hc_rx_get_info != NULL)
- ccid->ccid_hc_rx_get_info(sk, info);
+ if (ccid->ccid_ops->ccid_hc_rx_get_info != NULL)
+ ccid->ccid_ops->ccid_hc_rx_get_info(sk, info);
}
static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
struct tcp_info *info)
{
- if (ccid->ccid_hc_tx_get_info != NULL)
- ccid->ccid_hc_tx_get_info(sk, info);
+ if (ccid->ccid_ops->ccid_hc_tx_get_info != NULL)
+ ccid->ccid_ops->ccid_hc_tx_get_info(sk, info);
}
static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
@@ -194,8 +181,8 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_hc_rx_getsockopt != NULL)
- rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len,
+ if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
}
@@ -205,8 +192,8 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_hc_tx_getsockopt != NULL)
- rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len,
+ if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
}
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 7684d83946a4..ca00191628f7 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,9 +1,39 @@
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on IP_DCCP && EXPERIMENTAL
+config IP_DCCP_CCID2
+ tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
+ depends on IP_DCCP
+ def_tristate IP_DCCP
+ select IP_DCCP_ACKVEC
+ ---help---
+ CCID 2, TCP-like Congestion Control, denotes Additive Increase,
+ Multiplicative Decrease (AIMD) congestion control with behavior
+ modelled directly on TCP, including congestion window, slow start,
+ timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum
+ bandwidth over the long term, consistent with the use of end-to-end
+ congestion control, but halves its congestion window in response to
+ each congestion event. This leads to the abrupt rate changes
+ typical of TCP. Applications should use CCID 2 if they prefer
+ maximum bandwidth utilization to steadiness of rate. This is often
+ the case for applications that are not playing their data directly
+ to the user. For example, a hypothetical application that
+ transferred files over DCCP, using application-level retransmissions
+ for lost packets, would prefer CCID 2 to CCID 3. On-line games may
+ also prefer CCID 2.
+
+ CCID 2 is further described in:
+ http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid2-10.txt
+
+ This text was extracted from:
+ http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
+
+ If in doubt, say M.
+
config IP_DCCP_CCID3
- tristate "CCID3 (TFRC) (EXPERIMENTAL)"
+ tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
depends on IP_DCCP
+ def_tristate IP_DCCP
---help---
CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
@@ -15,10 +45,15 @@ config IP_DCCP_CCID3
suitable than CCID 2 for applications such streaming media where a
relatively smooth sending rate is of importance.
- CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
- congestion control algorithms were initially described in RFC 3448.
+ CCID 3 is further described in:
+
+ http://www.icir.org/kohler/dccp/draft-ietf-dccp-ccid3-11.txt.
+
+ The TFRC congestion control algorithms were initially described in
+ RFC 3448.
- This text was extracted from draft-ietf-dccp-spec-11.txt.
+ This text was extracted from:
+ http://www.icir.org/kohler/dccp/draft-ietf-dccp-spec-13.txt
If in doubt, say M.
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
index 956f79f50743..438f20bccff7 100644
--- a/net/dccp/ccids/Makefile
+++ b/net/dccp/ccids/Makefile
@@ -2,4 +2,8 @@ obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
dccp_ccid3-y := ccid3.o
+obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o
+
+dccp_ccid2-y := ccid2.o
+
obj-y += lib/
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
new file mode 100644
index 000000000000..d4f9e2d33453
--- /dev/null
+++ b/net/dccp/ccids/ccid2.c
@@ -0,0 +1,779 @@
+/*
+ * net/dccp/ccids/ccid2.c
+ *
+ * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
+ * Changes to meet Linux coding standards, and DCCP infrastructure fixes.
+ *
+ * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This implementation should follow: draft-ietf-dccp-ccid2-10.txt
+ *
+ * BUGS:
+ * - sequence number wrapping
+ * - jiffies wrapping
+ */
+
+#include <linux/config.h>
+#include "../ccid.h"
+#include "../dccp.h"
+#include "ccid2.h"
+
+static int ccid2_debug;
+
+#undef CCID2_DEBUG
+#ifdef CCID2_DEBUG
+#define ccid2_pr_debug(format, a...) \
+ do { if (ccid2_debug) \
+ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
+ } while (0)
+#else
+#define ccid2_pr_debug(format, a...)
+#endif
+
+static const int ccid2_seq_len = 128;
+
+#ifdef CCID2_DEBUG
+static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
+{
+ int len = 0;
+ int pipe = 0;
+ struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
+
+ /* there is data in the chain */
+ if (seqp != hctx->ccid2hctx_seqt) {
+ seqp = seqp->ccid2s_prev;
+ len++;
+ if (!seqp->ccid2s_acked)
+ pipe++;
+
+ while (seqp != hctx->ccid2hctx_seqt) {
+ struct ccid2_seq *prev = seqp->ccid2s_prev;
+
+ len++;
+ if (!prev->ccid2s_acked)
+ pipe++;
+
+ /* packets are sent sequentially */
+ BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq);
+ BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent);
+ BUG_ON(len > ccid2_seq_len);
+
+ seqp = prev;
+ }
+ }
+
+ BUG_ON(pipe != hctx->ccid2hctx_pipe);
+ ccid2_pr_debug("len of chain=%d\n", len);
+
+ do {
+ seqp = seqp->ccid2s_prev;
+ len++;
+ BUG_ON(len > ccid2_seq_len);
+ } while (seqp != hctx->ccid2hctx_seqh);
+
+ BUG_ON(len != ccid2_seq_len);
+ ccid2_pr_debug("total len=%d\n", len);
+}
+#else
+#define ccid2_hc_tx_check_sanity(hctx) do {} while (0)
+#endif
+
+static int ccid2_hc_tx_send_packet(struct sock *sk,
+ struct sk_buff *skb, int len)
+{
+ struct ccid2_hc_tx_sock *hctx;
+
+ switch (DCCP_SKB_CB(skb)->dccpd_type) {
+ case 0: /* XXX data packets from userland come through like this */
+ case DCCP_PKT_DATA:
+ case DCCP_PKT_DATAACK:
+ break;
+ /* No congestion control on other packets */
+ default:
+ return 0;
+ }
+
+ hctx = ccid2_hc_tx_sk(sk);
+
+ ccid2_pr_debug("pipe=%d cwnd=%d\n", hctx->ccid2hctx_pipe,
+ hctx->ccid2hctx_cwnd);
+
+ if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd) {
+ /* OK we can send... make sure previous packet was sent off */
+ if (!hctx->ccid2hctx_sendwait) {
+ hctx->ccid2hctx_sendwait = 1;
+ return 0;
+ }
+ }
+
+ return 100; /* XXX */
+}
+
+static void ccid2_change_l_ack_ratio(struct sock *sk, int val)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ /*
+ * XXX I don't really agree with val != 2. If cwnd is 1, ack ratio
+ * should be 1... it shouldn't be allowed to become 2.
+ * -sorbo.
+ */
+ if (val != 2) {
+ const struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ int max = hctx->ccid2hctx_cwnd / 2;
+
+ /* round up */
+ if (hctx->ccid2hctx_cwnd & 1)
+ max++;
+
+ if (val > max)
+ val = max;
+ }
+
+ ccid2_pr_debug("changing local ack ratio to %d\n", val);
+ WARN_ON(val <= 0);
+ dp->dccps_l_ack_ratio = val;
+}
+
+static void ccid2_change_cwnd(struct sock *sk, int val)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ if (val == 0)
+ val = 1;
+
+ /* XXX do we need to change ack ratio? */
+ ccid2_pr_debug("change cwnd to %d\n", val);
+
+ BUG_ON(val < 1);
+ hctx->ccid2hctx_cwnd = val;
+}
+
+static void ccid2_start_rto_timer(struct sock *sk);
+
+static void ccid2_hc_tx_rto_expire(unsigned long data)
+{
+ struct sock *sk = (struct sock *)data;
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ long s;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
+ jiffies + HZ / 5);
+ goto out;
+ }
+
+ ccid2_pr_debug("RTO_EXPIRE\n");
+
+ ccid2_hc_tx_check_sanity(hctx);
+
+ /* back-off timer */
+ hctx->ccid2hctx_rto <<= 1;
+
+ s = hctx->ccid2hctx_rto / HZ;
+ if (s > 60)
+ hctx->ccid2hctx_rto = 60 * HZ;
+
+ ccid2_start_rto_timer(sk);
+
+ /* adjust pipe, cwnd etc */
+ hctx->ccid2hctx_pipe = 0;
+ hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1;
+ if (hctx->ccid2hctx_ssthresh < 2)
+ hctx->ccid2hctx_ssthresh = 2;
+ ccid2_change_cwnd(sk, 1);
+
+ /* clear state about stuff we sent */
+ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
+ hctx->ccid2hctx_ssacks = 0;
+ hctx->ccid2hctx_acks = 0;
+ hctx->ccid2hctx_sent = 0;
+
+ /* clear ack ratio state. */
+ hctx->ccid2hctx_arsent = 0;
+ hctx->ccid2hctx_ackloss = 0;
+ hctx->ccid2hctx_rpseq = 0;
+ hctx->ccid2hctx_rpdupack = -1;
+ ccid2_change_l_ack_ratio(sk, 1);
+ ccid2_hc_tx_check_sanity(hctx);
+out:
+ bh_unlock_sock(sk);
+ sock_put(sk);
+}
+
+static void ccid2_start_rto_timer(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
+
+ BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
+ sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
+ jiffies + hctx->ccid2hctx_rto);
+}
+
+static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ u64 seq;
+
+ ccid2_hc_tx_check_sanity(hctx);
+
+ BUG_ON(!hctx->ccid2hctx_sendwait);
+ hctx->ccid2hctx_sendwait = 0;
+ hctx->ccid2hctx_pipe++;
+ BUG_ON(hctx->ccid2hctx_pipe < 0);
+
+ /* There is an issue. What if another packet is sent between
+ * packet_send() and packet_sent(). Then the sequence number would be
+ * wrong.
+ * -sorbo.
+ */
+ seq = dp->dccps_gss;
+
+ hctx->ccid2hctx_seqh->ccid2s_seq = seq;
+ hctx->ccid2hctx_seqh->ccid2s_acked = 0;
+ hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
+ hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next;
+
+ ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
+ hctx->ccid2hctx_pipe);
+
+ if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) {
+ /* XXX allocate more space */
+ WARN_ON(1);
+ }
+
+ hctx->ccid2hctx_sent++;
+
+ /* Ack Ratio. Need to maintain a concept of how many windows we sent */
+ hctx->ccid2hctx_arsent++;
+ /* We had an ack loss in this window... */
+ if (hctx->ccid2hctx_ackloss) {
+ if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
+ hctx->ccid2hctx_arsent = 0;
+ hctx->ccid2hctx_ackloss = 0;
+ }
+ } else {
+ /* No acks lost up to now... */
+ /* decrease ack ratio if enough packets were sent */
+ if (dp->dccps_l_ack_ratio > 1) {
+ /* XXX don't calculate denominator each time */
+ int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
+ dp->dccps_l_ack_ratio;
+
+ denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
+
+ if (hctx->ccid2hctx_arsent >= denom) {
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
+ hctx->ccid2hctx_arsent = 0;
+ }
+ } else {
+ /* we can't increase ack ratio further [1] */
+ hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
+ }
+ }
+
+ /* setup RTO timer */
+ if (!timer_pending(&hctx->ccid2hctx_rtotimer))
+ ccid2_start_rto_timer(sk);
+
+#ifdef CCID2_DEBUG
+ ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe);
+ ccid2_pr_debug("Sent: seq=%llu\n", seq);
+ do {
+ struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
+
+ while (seqp != hctx->ccid2hctx_seqh) {
+ ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
+ seqp->ccid2s_seq, seqp->ccid2s_acked,
+ seqp->ccid2s_sent);
+ seqp = seqp->ccid2s_next;
+ }
+ } while (0);
+ ccid2_pr_debug("=========\n");
+ ccid2_hc_tx_check_sanity(hctx);
+#endif
+}
+
+/* XXX Lame code duplication!
+ * returns -1 if none was found.
+ * else returns the next offset to use in the function call.
+ */
+static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
+ unsigned char **vec, unsigned char *veclen)
+{
+ const struct dccp_hdr *dh = dccp_hdr(skb);
+ unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+ unsigned char *opt_ptr;
+ const unsigned char *opt_end = (unsigned char *)dh +
+ (dh->dccph_doff * 4);
+ unsigned char opt, len;
+ unsigned char *value;
+
+ BUG_ON(offset < 0);
+ options += offset;
+ opt_ptr = options;
+ if (opt_ptr >= opt_end)
+ return -1;
+
+ while (opt_ptr != opt_end) {
+ opt = *opt_ptr++;
+ len = 0;
+ value = NULL;
+
+ /* Check if this isn't a single byte option */
+ if (opt > DCCPO_MAX_RESERVED) {
+ if (opt_ptr == opt_end)
+ goto out_invalid_option;
+
+ len = *opt_ptr++;
+ if (len < 3)
+ goto out_invalid_option;
+ /*
+ * Remove the type and len fields, leaving
+ * just the value size
+ */
+ len -= 2;
+ value = opt_ptr;
+ opt_ptr += len;
+
+ if (opt_ptr > opt_end)
+ goto out_invalid_option;
+ }
+
+ switch (opt) {
+ case DCCPO_ACK_VECTOR_0:
+ case DCCPO_ACK_VECTOR_1:
+ *vec = value;
+ *veclen = len;
+ return offset + (opt_ptr - options);
+ }
+ }
+
+ return -1;
+
+out_invalid_option:
+ BUG_ON(1); /* should never happen... options were previously parsed ! */
+ return -1;
+}
+
+static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
+ ccid2_pr_debug("deleted RTO timer\n");
+}
+
+static inline void ccid2_new_ack(struct sock *sk,
+ struct ccid2_seq *seqp,
+ unsigned int *maxincr)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ /* slow start */
+ if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
+ hctx->ccid2hctx_acks = 0;
+
+ /* We can increase cwnd at most maxincr [ack_ratio/2] */
+ if (*maxincr) {
+ /* increase every 2 acks */
+ hctx->ccid2hctx_ssacks++;
+ if (hctx->ccid2hctx_ssacks == 2) {
+ ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+ hctx->ccid2hctx_ssacks = 0;
+ *maxincr = *maxincr - 1;
+ }
+ } else {
+ /* increased cwnd enough for this single ack */
+ hctx->ccid2hctx_ssacks = 0;
+ }
+ } else {
+ hctx->ccid2hctx_ssacks = 0;
+ hctx->ccid2hctx_acks++;
+
+ if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) {
+ ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1);
+ hctx->ccid2hctx_acks = 0;
+ }
+ }
+
+ /* update RTO */
+ if (hctx->ccid2hctx_srtt == -1 ||
+ (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) {
+ unsigned long r = jiffies - seqp->ccid2s_sent;
+ int s;
+
+ /* first measurement */
+ if (hctx->ccid2hctx_srtt == -1) {
+ ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
+ r, jiffies, seqp->ccid2s_seq);
+ hctx->ccid2hctx_srtt = r;
+ hctx->ccid2hctx_rttvar = r >> 1;
+ } else {
+ /* RTTVAR */
+ long tmp = hctx->ccid2hctx_srtt - r;
+ if (tmp < 0)
+ tmp *= -1;
+
+ tmp >>= 2;
+ hctx->ccid2hctx_rttvar *= 3;
+ hctx->ccid2hctx_rttvar >>= 2;
+ hctx->ccid2hctx_rttvar += tmp;
+
+ /* SRTT */
+ hctx->ccid2hctx_srtt *= 7;
+ hctx->ccid2hctx_srtt >>= 3;
+ tmp = r >> 3;
+ hctx->ccid2hctx_srtt += tmp;
+ }
+ s = hctx->ccid2hctx_rttvar << 2;
+ /* clock granularity is 1 when based on jiffies */
+ if (!s)
+ s = 1;
+ hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
+
+ /* must be at least a second */
+ s = hctx->ccid2hctx_rto / HZ;
+ /* DCCP doesn't require this [but I like it cuz my code sux] */
+#if 1
+ if (s < 1)
+ hctx->ccid2hctx_rto = HZ;
+#endif
+ /* max 60 seconds */
+ if (s > 60)
+ hctx->ccid2hctx_rto = HZ * 60;
+
+ hctx->ccid2hctx_lastrtt = jiffies;
+
+ ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
+ hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
+ hctx->ccid2hctx_rto, HZ, r);
+ hctx->ccid2hctx_sent = 0;
+ }
+
+ /* we got a new ack, so re-start RTO timer */
+ ccid2_hc_tx_kill_rto_timer(sk);
+ ccid2_start_rto_timer(sk);
+}
+
+static void ccid2_hc_tx_dec_pipe(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ hctx->ccid2hctx_pipe--;
+ BUG_ON(hctx->ccid2hctx_pipe < 0);
+
+ if (hctx->ccid2hctx_pipe == 0)
+ ccid2_hc_tx_kill_rto_timer(sk);
+}
+
+static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+ u64 ackno, seqno;
+ struct ccid2_seq *seqp;
+ unsigned char *vector;
+ unsigned char veclen;
+ int offset = 0;
+ int done = 0;
+ int loss = 0;
+ unsigned int maxincr = 0;
+
+ ccid2_hc_tx_check_sanity(hctx);
+ /* check reverse path congestion */
+ seqno = DCCP_SKB_CB(skb)->dccpd_seq;
+
+ /* XXX this whole "algorithm" is broken. Need to fix it to keep track
+ * of the seqnos of the dupacks so that rpseq and rpdupack are correct
+ * -sorbo.
+ */
+ /* need to bootstrap */
+ if (hctx->ccid2hctx_rpdupack == -1) {
+ hctx->ccid2hctx_rpdupack = 0;
+ hctx->ccid2hctx_rpseq = seqno;
+ } else {
+ /* check if packet is consecutive */
+ if ((hctx->ccid2hctx_rpseq + 1) == seqno)
+ hctx->ccid2hctx_rpseq++;
+ /* it's a later packet */
+ else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
+ hctx->ccid2hctx_rpdupack++;
+
+ /* check if we got enough dupacks */
+ if (hctx->ccid2hctx_rpdupack >=
+ hctx->ccid2hctx_numdupack) {
+ hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
+ hctx->ccid2hctx_rpseq = 0;
+
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio << 1);
+ }
+ }
+ }
+
+ /* check forward path congestion */
+ /* still didn't send out new data packets */
+ if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
+ return;
+
+ switch (DCCP_SKB_CB(skb)->dccpd_type) {
+ case DCCP_PKT_ACK:
+ case DCCP_PKT_DATAACK:
+ break;
+ default:
+ return;
+ }
+
+ ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+
+ /* If in slow-start, cwnd can increase at most Ack Ratio / 2 packets for
+ * this single ack. I round up.
+ * -sorbo.
+ */
+ maxincr = dp->dccps_l_ack_ratio >> 1;
+ maxincr++;
+
+ /* go through all ack vectors */
+ while ((offset = ccid2_ackvector(sk, skb, offset,
+ &vector, &veclen)) != -1) {
+ /* go through this ack vector */
+ while (veclen--) {
+ const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
+ u64 ackno_end_rl;
+
+ dccp_set_seqno(&ackno_end_rl, ackno - rl);
+ ccid2_pr_debug("ackvec start:%llu end:%llu\n", ackno,
+ ackno_end_rl);
+ /* if the seqno we are analyzing is larger than the
+ * current ackno, then move towards the tail of our
+ * seqnos.
+ */
+ while (after48(seqp->ccid2s_seq, ackno)) {
+ if (seqp == hctx->ccid2hctx_seqt) {
+ done = 1;
+ break;
+ }
+ seqp = seqp->ccid2s_prev;
+ }
+ if (done)
+ break;
+
+ /* check all seqnos in the range of the vector
+ * run length
+ */
+ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
+ const u8 state = (*vector &
+ DCCP_ACKVEC_STATE_MASK) >> 6;
+
+ /* new packet received or marked */
+ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+ !seqp->ccid2s_acked) {
+ if (state ==
+ DCCP_ACKVEC_STATE_ECN_MARKED) {
+ loss = 1;
+ } else
+ ccid2_new_ack(sk, seqp,
+ &maxincr);
+
+ seqp->ccid2s_acked = 1;
+ ccid2_pr_debug("Got ack for %llu\n",
+ seqp->ccid2s_seq);
+ ccid2_hc_tx_dec_pipe(sk);
+ }
+ if (seqp == hctx->ccid2hctx_seqt) {
+ done = 1;
+ break;
+ }
+ seqp = seqp->ccid2s_next;
+ }
+ if (done)
+ break;
+
+
+ dccp_set_seqno(&ackno, ackno_end_rl - 1);
+ vector++;
+ }
+ if (done)
+ break;
+ }
+
+ /* The state about what is acked should be correct now
+ * Check for NUMDUPACK
+ */
+ seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
+ done = 0;
+ while (1) {
+ if (seqp->ccid2s_acked) {
+ done++;
+ if (done == hctx->ccid2hctx_numdupack)
+ break;
+ }
+ if (seqp == hctx->ccid2hctx_seqt)
+ break;
+ seqp = seqp->ccid2s_prev;
+ }
+
+ /* If there are at least 3 acknowledgements, anything unacknowledged
+ * below the last sequence number is considered lost
+ */
+ if (done == hctx->ccid2hctx_numdupack) {
+ struct ccid2_seq *last_acked = seqp;
+
+ /* check for lost packets */
+ while (1) {
+ if (!seqp->ccid2s_acked) {
+ loss = 1;
+ ccid2_hc_tx_dec_pipe(sk);
+ }
+ if (seqp == hctx->ccid2hctx_seqt)
+ break;
+ seqp = seqp->ccid2s_prev;
+ }
+
+ hctx->ccid2hctx_seqt = last_acked;
+ }
+
+ /* trim acked packets in tail */
+ while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
+ if (!hctx->ccid2hctx_seqt->ccid2s_acked)
+ break;
+
+ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
+ }
+
+ if (loss) {
+ /* XXX do bit shifts guarantee a 0 as the new bit? */
+ ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1);
+ hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd;
+ if (hctx->ccid2hctx_ssthresh < 2)
+ hctx->ccid2hctx_ssthresh = 2;
+ }
+
+ ccid2_hc_tx_check_sanity(hctx);
+}
+
+static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid);
+ int seqcount = ccid2_seq_len;
+ int i;
+
+ /* XXX init variables with proper values */
+ hctx->ccid2hctx_cwnd = 1;
+ hctx->ccid2hctx_ssthresh = 10;
+ hctx->ccid2hctx_numdupack = 3;
+
+ /* XXX init ~ to window size... */
+ hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) *
+ seqcount, gfp_any());
+ if (hctx->ccid2hctx_seqbuf == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < (seqcount - 1); i++) {
+ hctx->ccid2hctx_seqbuf[i].ccid2s_next =
+ &hctx->ccid2hctx_seqbuf[i + 1];
+ hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev =
+ &hctx->ccid2hctx_seqbuf[i];
+ }
+ hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next =
+ hctx->ccid2hctx_seqbuf;
+ hctx->ccid2hctx_seqbuf->ccid2s_prev =
+ &hctx->ccid2hctx_seqbuf[seqcount - 1];
+
+ hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf;
+ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
+ hctx->ccid2hctx_sent = 0;
+ hctx->ccid2hctx_rto = 3 * HZ;
+ hctx->ccid2hctx_srtt = -1;
+ hctx->ccid2hctx_rttvar = -1;
+ hctx->ccid2hctx_lastrtt = 0;
+ hctx->ccid2hctx_rpdupack = -1;
+
+ hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire;
+ hctx->ccid2hctx_rtotimer.data = (unsigned long)sk;
+ init_timer(&hctx->ccid2hctx_rtotimer);
+
+ ccid2_hc_tx_check_sanity(hctx);
+ return 0;
+}
+
+static void ccid2_hc_tx_exit(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
+
+ ccid2_hc_tx_kill_rto_timer(sk);
+ kfree(hctx->ccid2hctx_seqbuf);
+ hctx->ccid2hctx_seqbuf = NULL;
+}
+
+static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+ const struct dccp_sock *dp = dccp_sk(sk);
+ struct ccid2_hc_rx_sock *hcrx = ccid2_hc_rx_sk(sk);
+
+ switch (DCCP_SKB_CB(skb)->dccpd_type) {
+ case DCCP_PKT_DATA:
+ case DCCP_PKT_DATAACK:
+ hcrx->ccid2hcrx_data++;
+ if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
+ dccp_send_ack(sk);
+ hcrx->ccid2hcrx_data = 0;
+ }
+ break;
+ }
+}
+
+static struct ccid_operations ccid2 = {
+ .ccid_id = 2,
+ .ccid_name = "ccid2",
+ .ccid_owner = THIS_MODULE,
+ .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
+ .ccid_hc_tx_init = ccid2_hc_tx_init,
+ .ccid_hc_tx_exit = ccid2_hc_tx_exit,
+ .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
+ .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
+ .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
+ .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
+ .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
+};
+
+module_param(ccid2_debug, int, 0444);
+MODULE_PARM_DESC(ccid2_debug, "Enable debug messages");
+
+static __init int ccid2_module_init(void)
+{
+ return ccid_register(&ccid2);
+}
+module_init(ccid2_module_init);
+
+static __exit void ccid2_module_exit(void)
+{
+ ccid_unregister(&ccid2);
+}
+module_exit(ccid2_module_exit);
+
+MODULE_AUTHOR("Andrea Bittau <a.bittau@cs.ucl.ac.uk>");
+MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("net-dccp-ccid-2");
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
new file mode 100644
index 000000000000..451a87464fa5
--- /dev/null
+++ b/net/dccp/ccids/ccid2.h
@@ -0,0 +1,85 @@
+/*
+ * net/dccp/ccids/ccid2.h
+ *
+ * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_CCID2_H_
+#define _DCCP_CCID2_H_
+
+#include <linux/dccp.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include "../ccid.h"
+
+struct sock;
+
+struct ccid2_seq {
+ u64 ccid2s_seq;
+ unsigned long ccid2s_sent;
+ int ccid2s_acked;
+ struct ccid2_seq *ccid2s_prev;
+ struct ccid2_seq *ccid2s_next;
+};
+
+/** struct ccid2_hc_tx_sock - CCID2 TX half connection
+ *
+ * @ccid2hctx_ssacks - ACKs recv in slow start
+ * @ccid2hctx_acks - ACKS recv in AI phase
+ * @ccid2hctx_sent - packets sent in this window
+ * @ccid2hctx_lastrtt -time RTT was last measured
+ * @ccid2hctx_arsent - packets sent [ack ratio]
+ * @ccid2hctx_ackloss - ack was lost in this win
+ * @ccid2hctx_rpseq - last consecutive seqno
+ * @ccid2hctx_rpdupack - dupacks since rpseq
+*/
+struct ccid2_hc_tx_sock {
+ int ccid2hctx_cwnd;
+ int ccid2hctx_ssacks;
+ int ccid2hctx_acks;
+ int ccid2hctx_ssthresh;
+ int ccid2hctx_pipe;
+ int ccid2hctx_numdupack;
+ struct ccid2_seq *ccid2hctx_seqbuf;
+ struct ccid2_seq *ccid2hctx_seqh;
+ struct ccid2_seq *ccid2hctx_seqt;
+ long ccid2hctx_rto;
+ long ccid2hctx_srtt;
+ long ccid2hctx_rttvar;
+ int ccid2hctx_sent;
+ unsigned long ccid2hctx_lastrtt;
+ struct timer_list ccid2hctx_rtotimer;
+ unsigned long ccid2hctx_arsent;
+ int ccid2hctx_ackloss;
+ u64 ccid2hctx_rpseq;
+ int ccid2hctx_rpdupack;
+ int ccid2hctx_sendwait;
+};
+
+struct ccid2_hc_rx_sock {
+ int ccid2hcrx_data;
+};
+
+static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
+{
+ return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
+}
+
+static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk)
+{
+ return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
+}
+#endif /* _DCCP_CCID2_H_ */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 35d1d347541c..b4a51d0355a5 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -46,7 +46,7 @@
* Reason for maths here is to avoid 32 bit overflow when a is big.
* With this we get close to the limit.
*/
-static inline u32 usecs_div(const u32 a, const u32 b)
+static u32 usecs_div(const u32 a, const u32 b)
{
const u32 div = a < (UINT_MAX / (USEC_PER_SEC / 10)) ? 10 :
a < (UINT_MAX / (USEC_PER_SEC / 50)) ? 50 :
@@ -76,15 +76,6 @@ static struct dccp_tx_hist *ccid3_tx_hist;
static struct dccp_rx_hist *ccid3_rx_hist;
static struct dccp_li_hist *ccid3_li_hist;
-static int ccid3_init(struct sock *sk)
-{
- return 0;
-}
-
-static void ccid3_exit(struct sock *sk)
-{
-}
-
/* TFRC sender states */
enum ccid3_hc_tx_states {
TFRC_SSTATE_NO_SENT = 1,
@@ -107,8 +98,8 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
}
#endif
-static inline void ccid3_hc_tx_set_state(struct sock *sk,
- enum ccid3_hc_tx_states state)
+static void ccid3_hc_tx_set_state(struct sock *sk,
+ enum ccid3_hc_tx_states state)
{
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
@@ -316,8 +307,6 @@ static int ccid3_hc_tx_send_packet(struct sock *sk,
switch (hctx->ccid3hctx_state) {
case TFRC_SSTATE_NO_SENT:
- hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
- hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
hctx->ccid3hctx_last_win_count = 0;
@@ -585,16 +574,15 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
}
-static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
+static int ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
{
const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
BUG_ON(hctx == NULL);
- if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
- return;
-
- DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+ if (sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)
+ DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+ return 0;
}
static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
@@ -626,7 +614,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
__FUNCTION__, dccp_role(sk), sk);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
+ opt_recv->ccid3or_loss_event_rate = ntohl(*(__be32 *)value);
ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_loss_event_rate);
@@ -647,7 +635,7 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
__FUNCTION__, dccp_role(sk), sk);
rc = -EINVAL;
} else {
- opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
+ opt_recv->ccid3or_receive_rate = ntohl(*(__be32 *)value);
ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
dccp_role(sk), sk,
opt_recv->ccid3or_receive_rate);
@@ -658,17 +646,10 @@ static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
return rc;
}
-static int ccid3_hc_tx_init(struct sock *sk)
+static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct ccid3_hc_tx_sock *hctx;
-
- dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
- if (dp->dccps_hc_tx_ccid_private == NULL)
- return -ENOMEM;
-
- hctx = ccid3_hc_tx_sk(sk);
- memset(hctx, 0, sizeof(*hctx));
+ struct ccid3_hc_tx_sock *hctx = ccid_priv(ccid);
if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
@@ -681,6 +662,9 @@ static int ccid3_hc_tx_init(struct sock *sk)
hctx->ccid3hctx_t_rto = USEC_PER_SEC;
hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+
+ hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+ hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk;
init_timer(&hctx->ccid3hctx_no_feedback_timer);
return 0;
@@ -688,7 +672,6 @@ static int ccid3_hc_tx_init(struct sock *sk)
static void ccid3_hc_tx_exit(struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
BUG_ON(hctx == NULL);
@@ -698,9 +681,6 @@ static void ccid3_hc_tx_exit(struct sock *sk)
/* Empty packet history */
dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
-
- kfree(dp->dccps_hc_tx_ccid_private);
- dp->dccps_hc_tx_ccid_private = NULL;
}
/*
@@ -727,8 +707,8 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
}
#endif
-static inline void ccid3_hc_rx_set_state(struct sock *sk,
- enum ccid3_hc_rx_states state)
+static void ccid3_hc_rx_set_state(struct sock *sk,
+ enum ccid3_hc_rx_states state)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
@@ -793,31 +773,35 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk)
dccp_send_ack(sk);
}
-static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
+static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
{
const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- u32 x_recv, pinv;
+ __be32 x_recv, pinv;
BUG_ON(hcrx == NULL);
if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
- return;
+ return 0;
DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
if (dccp_packet_without_ack(skb))
- return;
-
- if (hcrx->ccid3hcrx_elapsed_time != 0)
- dccp_insert_option_elapsed_time(sk, skb,
- hcrx->ccid3hcrx_elapsed_time);
- dccp_insert_option_timestamp(sk, skb);
+ return 0;
+
x_recv = htonl(hcrx->ccid3hcrx_x_recv);
pinv = htonl(hcrx->ccid3hcrx_pinv);
- dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
- &pinv, sizeof(pinv));
- dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
- &x_recv, sizeof(x_recv));
+
+ if ((hcrx->ccid3hcrx_elapsed_time != 0 &&
+ dccp_insert_option_elapsed_time(sk, skb,
+ hcrx->ccid3hcrx_elapsed_time)) ||
+ dccp_insert_option_timestamp(sk, skb) ||
+ dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+ &pinv, sizeof(pinv)) ||
+ dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+ &x_recv, sizeof(x_recv)))
+ return -1;
+
+ return 0;
}
/* calculate first loss interval
@@ -1047,20 +1031,13 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
}
-static int ccid3_hc_rx_init(struct sock *sk)
+static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct ccid3_hc_rx_sock *hcrx;
+ struct ccid3_hc_rx_sock *hcrx = ccid_priv(ccid);
ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
- dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
- if (dp->dccps_hc_rx_ccid_private == NULL)
- return -ENOMEM;
-
- hcrx = ccid3_hc_rx_sk(sk);
- memset(hcrx, 0, sizeof(*hcrx));
-
if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE)
hcrx->ccid3hcrx_s = dp->dccps_packet_size;
@@ -1079,7 +1056,6 @@ static int ccid3_hc_rx_init(struct sock *sk)
static void ccid3_hc_rx_exit(struct sock *sk)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
BUG_ON(hcrx == NULL);
@@ -1090,9 +1066,6 @@ static void ccid3_hc_rx_exit(struct sock *sk)
/* Empty loss interval history */
dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
-
- kfree(dp->dccps_hc_rx_ccid_private);
- dp->dccps_hc_rx_ccid_private = NULL;
}
static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
@@ -1178,12 +1151,11 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
return 0;
}
-static struct ccid ccid3 = {
+static struct ccid_operations ccid3 = {
.ccid_id = 3,
.ccid_name = "ccid3",
.ccid_owner = THIS_MODULE,
- .ccid_init = ccid3_init,
- .ccid_exit = ccid3_exit,
+ .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
.ccid_hc_tx_init = ccid3_hc_tx_init,
.ccid_hc_tx_exit = ccid3_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
@@ -1191,6 +1163,7 @@ static struct ccid ccid3 = {
.ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
.ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
.ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
+ .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock),
.ccid_hc_rx_init = ccid3_hc_rx_init,
.ccid_hc_rx_exit = ccid3_hc_rx_exit,
.ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
@@ -1241,15 +1214,6 @@ module_init(ccid3_module_init);
static __exit void ccid3_module_exit(void)
{
-#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
- /*
- * Hack to use while developing, so that we get rid of the control
- * sock, that is what keeps a refcount on dccp.ko -acme
- */
- extern void dccp_ctl_sock_exit(void);
-
- dccp_ctl_sock_exit();
-#endif
ccid_unregister(&ccid3);
if (ccid3_tx_hist != NULL) {
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 0bde4583d091..f18b96d4e5a2 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -41,6 +41,7 @@
#include <linux/time.h>
#include <linux/types.h>
#include <linux/tfrc.h>
+#include "../ccid.h"
#define TFRC_MIN_PACKET_SIZE 16
#define TFRC_STD_PACKET_SIZE 256
@@ -135,12 +136,12 @@ struct ccid3_hc_rx_sock {
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
{
- return dccp_sk(sk)->dccps_hc_tx_ccid_private;
+ return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
}
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
{
- return dccp_sk(sk)->dccps_hc_rx_ccid_private;
+ return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
}
#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 93f26dd6e6cb..1fe509148689 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -59,8 +59,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
-extern struct proto dccp_prot;
-
/* is seq1 < seq2 ? */
static inline int before48(const u64 seq1, const u64 seq2)
{
@@ -120,7 +118,6 @@ DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
-extern int dccp_send_response(struct sock *sk);
extern void dccp_send_ack(struct sock *sk);
extern void dccp_send_delayed_ack(struct sock *sk);
extern void dccp_send_sync(struct sock *sk, const u64 seq,
@@ -140,53 +137,8 @@ extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
extern const char *dccp_packet_name(const int type);
extern const char *dccp_state_name(const int state);
-static inline void dccp_set_state(struct sock *sk, const int state)
-{
- const int oldstate = sk->sk_state;
-
- dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
- dccp_role(sk), sk,
- dccp_state_name(oldstate), dccp_state_name(state));
- WARN_ON(state == oldstate);
-
- switch (state) {
- case DCCP_OPEN:
- if (oldstate != DCCP_OPEN)
- DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
- break;
-
- case DCCP_CLOSED:
- if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
- DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
-
- sk->sk_prot->unhash(sk);
- if (inet_csk(sk)->icsk_bind_hash != NULL &&
- !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
- inet_put_port(&dccp_hashinfo, sk);
- /* fall through */
- default:
- if (oldstate == DCCP_OPEN)
- DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
- }
-
- /* Change state AFTER socket is unhashed to avoid closed
- * socket sitting in hash tables.
- */
- sk->sk_state = state;
-}
-
-static inline void dccp_done(struct sock *sk)
-{
- dccp_set_state(sk, DCCP_CLOSED);
- dccp_clear_xmit_timers(sk);
-
- sk->sk_shutdown = SHUTDOWN_MASK;
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- else
- inet_csk_destroy_sock(sk);
-}
+extern void dccp_set_state(struct sock *sk, const int state);
+extern void dccp_done(struct sock *sk);
static inline void dccp_openreq_init(struct request_sock *req,
struct dccp_sock *dp,
@@ -209,10 +161,6 @@ extern struct sock *dccp_create_openreq_child(struct sock *sk,
extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-extern void dccp_v4_err(struct sk_buff *skb, u32);
-
-extern int dccp_v4_rcv(struct sk_buff *skb);
-
extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@@ -228,24 +176,30 @@ extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
-extern int dccp_v4_init_sock(struct sock *sk);
-extern int dccp_v4_destroy_sock(struct sock *sk);
+extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
+extern int dccp_destroy_sock(struct sock *sk);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
struct dst_entry *dst,
struct request_sock *req);
-extern struct sk_buff *dccp_make_reset(struct sock *sk,
- struct dst_entry *dst,
- enum dccp_reset_codes code);
extern int dccp_connect(struct sock *sk);
extern int dccp_disconnect(struct sock *sk, int flags);
+extern void dccp_hash(struct sock *sk);
extern void dccp_unhash(struct sock *sk);
extern int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
extern int dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen);
+#ifdef CONFIG_COMPAT
+extern int compat_dccp_getsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int __user *optlen);
+extern int compat_dccp_setsockopt(struct sock *sk,
+ int level, int optname,
+ char __user *optval, int optlen);
+#endif
extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size);
@@ -262,15 +216,14 @@ extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len);
extern int dccp_v4_checksum(const struct sk_buff *skb,
- const u32 saddr, const u32 daddr);
+ const __be32 saddr, const __be32 daddr);
-extern int dccp_v4_send_reset(struct sock *sk,
- enum dccp_reset_codes code);
+extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
extern void dccp_send_close(struct sock *sk, const int active);
extern int dccp_invalid_packet(struct sk_buff *skb);
static inline int dccp_bad_service_code(const struct sock *sk,
- const __u32 service)
+ const __be32 service)
{
const struct dccp_sock *dp = dccp_sk(sk);
@@ -334,41 +287,29 @@ static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
{
struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
sizeof(*dh));
-
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- dh->dccph_seq = htonl((gss >> 32)) >> 8;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- dh->dccph_seq = htonl((gss >> 32));
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
+ dh->dccph_seq2 = 0;
+ dh->dccph_seq = htons((gss >> 32) & 0xfffff);
dhx->dccph_seq_low = htonl(gss & 0xffffffff);
}
static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
const u64 gsr)
{
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- dhack->dccph_ack_nr_high = htonl((gsr >> 32));
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
+ dhack->dccph_reserved1 = 0;
+ dhack->dccph_ack_nr_high = htons(gsr >> 32);
dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
}
static inline void dccp_update_gsr(struct sock *sk, u64 seq)
{
struct dccp_sock *dp = dccp_sk(sk);
+ const struct dccp_minisock *dmsk = dccp_msk(sk);
dp->dccps_gsr = seq;
dccp_set_seqno(&dp->dccps_swl,
- (dp->dccps_gsr + 1 -
- (dp->dccps_options.dccpo_sequence_window / 4)));
+ dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4));
dccp_set_seqno(&dp->dccps_swh,
- (dp->dccps_gsr +
- (3 * dp->dccps_options.dccpo_sequence_window) / 4));
+ dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
}
static inline void dccp_update_gss(struct sock *sk, u64 seq)
@@ -378,7 +319,7 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
dp->dccps_awh = dp->dccps_gss = seq;
dccp_set_seqno(&dp->dccps_awl,
(dp->dccps_gss -
- dp->dccps_options.dccpo_sequence_window + 1));
+ dccp_msk(sk)->dccpms_sequence_window + 1));
}
static inline int dccp_ack_pending(const struct sock *sk)
@@ -386,24 +327,22 @@ static inline int dccp_ack_pending(const struct sock *sk)
const struct dccp_sock *dp = dccp_sk(sk);
return dp->dccps_timestamp_echo != 0 ||
#ifdef CONFIG_IP_DCCP_ACKVEC
- (dp->dccps_options.dccpo_send_ack_vector &&
+ (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
#endif
inet_csk_ack_scheduled(sk);
}
-extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
-extern void dccp_insert_option_elapsed_time(struct sock *sk,
+extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern int dccp_insert_option_elapsed_time(struct sock *sk,
struct sk_buff *skb,
u32 elapsed_time);
-extern void dccp_insert_option_timestamp(struct sock *sk,
+extern int dccp_insert_option_timestamp(struct sock *sk,
struct sk_buff *skb);
-extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
unsigned char option,
const void *value, unsigned char len);
-extern struct socket *dccp_ctl_socket;
-
extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
static inline suseconds_t timeval_usecs(const struct timeval *tv)
@@ -444,4 +383,18 @@ static inline void timeval_sub_usecs(struct timeval *tv,
}
}
+#ifdef CONFIG_SYSCTL
+extern int dccp_sysctl_init(void);
+extern void dccp_sysctl_exit(void);
+#else
+static inline int dccp_sysctl_init(void)
+{
+ return 0;
+}
+
+static inline void dccp_sysctl_exit(void)
+{
+}
+#endif
+
#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 3f78c00e3822..0f25dc395967 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -30,7 +30,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_backoff = icsk->icsk_backoff;
info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
- if (dp->dccps_options.dccpo_send_ack_vector)
+ if (dccp_msk(sk)->dccpms_send_ack_vector)
info->tcpi_options |= TCPI_OPT_SACK;
ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
new file mode 100644
index 000000000000..e3dd30d36c8a
--- /dev/null
+++ b/net/dccp/feat.c
@@ -0,0 +1,586 @@
+/*
+ * net/dccp/feat.c
+ *
+ * An implementation of the DCCP protocol
+ * Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include "dccp.h"
+#include "ccid.h"
+#include "feat.h"
+
+#define DCCP_FEAT_SP_NOAGREE (-123)
+
+int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
+ u8 *val, u8 len, gfp_t gfp)
+{
+ struct dccp_opt_pend *opt;
+
+ dccp_pr_debug("feat change type=%d feat=%d\n", type, feature);
+
+ /* XXX sanity check feat change request */
+
+ /* check if that feature is already being negotiated */
+ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+ /* ok we found a negotiation for this option already */
+ if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
+ dccp_pr_debug("Replacing old\n");
+ /* replace */
+ BUG_ON(opt->dccpop_val == NULL);
+ kfree(opt->dccpop_val);
+ opt->dccpop_val = val;
+ opt->dccpop_len = len;
+ opt->dccpop_conf = 0;
+ return 0;
+ }
+ }
+
+ /* negotiation for a new feature */
+ opt = kmalloc(sizeof(*opt), gfp);
+ if (opt == NULL)
+ return -ENOMEM;
+
+ opt->dccpop_type = type;
+ opt->dccpop_feat = feature;
+ opt->dccpop_len = len;
+ opt->dccpop_val = val;
+ opt->dccpop_conf = 0;
+ opt->dccpop_sc = NULL;
+
+ BUG_ON(opt->dccpop_val == NULL);
+
+ list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_change);
+
+static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ /* figure out if we are changing our CCID or the peer's */
+ const int rx = type == DCCPO_CHANGE_R;
+ const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
+ struct ccid *new_ccid;
+
+ /* Check if nothing is being changed. */
+ if (ccid_nr == new_ccid_nr)
+ return 0;
+
+ new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
+ if (new_ccid == NULL)
+ return -ENOMEM;
+
+ if (rx) {
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ dp->dccps_hc_rx_ccid = new_ccid;
+ dmsk->dccpms_rx_ccid = new_ccid_nr;
+ } else {
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_tx_ccid = new_ccid;
+ dmsk->dccpms_tx_ccid = new_ccid_nr;
+ }
+
+ return 0;
+}
+
+/* XXX taking only u8 vals */
+static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
+{
+ dccp_pr_debug("changing [%d] feat %d to %d\n", type, feat, val);
+
+ switch (feat) {
+ case DCCPF_CCID:
+ return dccp_feat_update_ccid(sk, type, val);
+ default:
+ dccp_pr_debug("IMPLEMENT changing [%d] feat %d to %d\n",
+ type, feat, val);
+ break;
+ }
+ return 0;
+}
+
+static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
+ u8 *rpref, u8 rlen)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ u8 *spref, slen, *res = NULL;
+ int i, j, rc, agree = 1;
+
+ BUG_ON(rpref == NULL);
+
+ /* check if we are the black sheep */
+ if (dp->dccps_role == DCCP_ROLE_CLIENT) {
+ spref = rpref;
+ slen = rlen;
+ rpref = opt->dccpop_val;
+ rlen = opt->dccpop_len;
+ } else {
+ spref = opt->dccpop_val;
+ slen = opt->dccpop_len;
+ }
+ /*
+ * Now we have server preference list in spref and client preference in
+ * rpref
+ */
+ BUG_ON(spref == NULL);
+ BUG_ON(rpref == NULL);
+
+ /* FIXME sanity check vals */
+
+ /* Are values in any order? XXX Lame "algorithm" here */
+ /* XXX assume values are 1 byte */
+ for (i = 0; i < slen; i++) {
+ for (j = 0; j < rlen; j++) {
+ if (spref[i] == rpref[j]) {
+ res = &spref[i];
+ break;
+ }
+ }
+ if (res)
+ break;
+ }
+
+ /* we didn't agree on anything */
+ if (res == NULL) {
+ /* confirm previous value */
+ switch (opt->dccpop_feat) {
+ case DCCPF_CCID:
+ /* XXX did i get this right? =P */
+ if (opt->dccpop_type == DCCPO_CHANGE_L)
+ res = &dccp_msk(sk)->dccpms_tx_ccid;
+ else
+ res = &dccp_msk(sk)->dccpms_rx_ccid;
+ break;
+
+ default:
+ WARN_ON(1); /* XXX implement res */
+ return -EFAULT;
+ }
+
+ dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
+ agree = 0; /* this is used for mandatory options... */
+ }
+
+ /* need to put result and our preference list */
+ /* XXX assume 1 byte vals */
+ rlen = 1 + opt->dccpop_len;
+ rpref = kmalloc(rlen, GFP_ATOMIC);
+ if (rpref == NULL)
+ return -ENOMEM;
+
+ *rpref = *res;
+ memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
+
+ /* put it in the "confirm queue" */
+ if (opt->dccpop_sc == NULL) {
+ opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
+ if (opt->dccpop_sc == NULL) {
+ kfree(rpref);
+ return -ENOMEM;
+ }
+ } else {
+ /* recycle the confirm slot */
+ BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
+ kfree(opt->dccpop_sc->dccpoc_val);
+ dccp_pr_debug("recycling confirm slot\n");
+ }
+ memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
+
+ opt->dccpop_sc->dccpoc_val = rpref;
+ opt->dccpop_sc->dccpoc_len = rlen;
+
+ /* update the option on our side [we are about to send the confirm] */
+ rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
+ if (rc) {
+ kfree(opt->dccpop_sc->dccpoc_val);
+ kfree(opt->dccpop_sc);
+ opt->dccpop_sc = 0;
+ return rc;
+ }
+
+ dccp_pr_debug("Will confirm %d\n", *rpref);
+
+ /* say we want to change to X but we just got a confirm X, suppress our
+ * change
+ */
+ if (!opt->dccpop_conf) {
+ if (*opt->dccpop_val == *res)
+ opt->dccpop_conf = 1;
+ dccp_pr_debug("won't ask for change of same feature\n");
+ }
+
+ return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
+}
+
+static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+{
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ struct dccp_opt_pend *opt;
+ int rc = 1;
+ u8 t;
+
+ /*
+ * We received a CHANGE. We gotta match it against our own preference
+ * list. If we got a CHANGE_R it means it's a change for us, so we need
+ * to compare our CHANGE_L list.
+ */
+ if (type == DCCPO_CHANGE_L)
+ t = DCCPO_CHANGE_R;
+ else
+ t = DCCPO_CHANGE_L;
+
+ /* find our preference list for this feature */
+ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+ if (opt->dccpop_type != t || opt->dccpop_feat != feature)
+ continue;
+
+ /* find the winner from the two preference lists */
+ rc = dccp_feat_reconcile(sk, opt, val, len);
+ break;
+ }
+
+ /* We didn't deal with the change. This can happen if we have no
+ * preference list for the feature. In fact, it just shouldn't
+ * happen---if we understand a feature, we should have a preference list
+ * with at least the default value.
+ */
+ BUG_ON(rc == 1);
+
+ return rc;
+}
+
+static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+{
+ struct dccp_opt_pend *opt;
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ u8 *copy;
+ int rc;
+
+ /* NN features must be change L */
+ if (type == DCCPO_CHANGE_R) {
+ dccp_pr_debug("received CHANGE_R %d for NN feat %d\n",
+ type, feature);
+ return -EFAULT;
+ }
+
+ /* XXX sanity check opt val */
+
+ /* copy option so we can confirm it */
+ opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
+ if (opt == NULL)
+ return -ENOMEM;
+
+ copy = kmalloc(len, GFP_ATOMIC);
+ if (copy == NULL) {
+ kfree(opt);
+ return -ENOMEM;
+ }
+ memcpy(copy, val, len);
+
+ opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
+ opt->dccpop_feat = feature;
+ opt->dccpop_val = copy;
+ opt->dccpop_len = len;
+
+ /* change feature */
+ rc = dccp_feat_update(sk, type, feature, *val);
+ if (rc) {
+ kfree(opt->dccpop_val);
+ kfree(opt);
+ return rc;
+ }
+
+ dccp_pr_debug("Confirming NN feature %d (val=%d)\n", feature, *copy);
+ list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+
+ return 0;
+}
+
+static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
+ u8 type, u8 feature)
+{
+ /* XXX check if other confirms for that are queued and recycle slot */
+ struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
+
+ if (opt == NULL) {
+ /* XXX what do we do? Ignoring should be fine. It's a change
+ * after all =P
+ */
+ return;
+ }
+
+ opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R :
+ DCCPO_CONFIRM_L;
+ opt->dccpop_feat = feature;
+ opt->dccpop_val = 0;
+ opt->dccpop_len = 0;
+
+ /* change feature */
+ dccp_pr_debug("Empty confirm feature %d type %d\n", feature, type);
+ list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
+}
+
+static void dccp_feat_flush_confirm(struct sock *sk)
+{
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ /* Check if there is anything to confirm in the first place */
+ int yes = !list_empty(&dmsk->dccpms_conf);
+
+ if (!yes) {
+ struct dccp_opt_pend *opt;
+
+ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+ if (opt->dccpop_conf) {
+ yes = 1;
+ break;
+ }
+ }
+ }
+
+ if (!yes)
+ return;
+
+ /* OK there is something to confirm... */
+ /* XXX check if packet is in flight? Send delayed ack?? */
+ if (sk->sk_state == DCCP_OPEN)
+ dccp_send_ack(sk);
+}
+
+int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
+{
+ int rc;
+
+ dccp_pr_debug("got feat change type=%d feat=%d\n", type, feature);
+
+ /* figure out if it's SP or NN feature */
+ switch (feature) {
+ /* deal with SP features */
+ case DCCPF_CCID:
+ rc = dccp_feat_sp(sk, type, feature, val, len);
+ break;
+
+ /* deal with NN features */
+ case DCCPF_ACK_RATIO:
+ rc = dccp_feat_nn(sk, type, feature, val, len);
+ break;
+
+ /* XXX implement other features */
+ default:
+ rc = -EFAULT;
+ break;
+ }
+
+ /* check if there were problems changing features */
+ if (rc) {
+ /* If we don't agree on SP, we sent a confirm for old value.
+ * However we propagate rc to caller in case option was
+ * mandatory
+ */
+ if (rc != DCCP_FEAT_SP_NOAGREE)
+ dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
+ }
+
+ /* generate the confirm [if required] */
+ dccp_feat_flush_confirm(sk);
+
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
+
+int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
+ u8 *val, u8 len)
+{
+ u8 t;
+ struct dccp_opt_pend *opt;
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ int rc = 1;
+ int all_confirmed = 1;
+
+ dccp_pr_debug("got feat confirm type=%d feat=%d\n", type, feature);
+
+ /* XXX sanity check type & feat */
+
+ /* locate our change request */
+ t = type == DCCPO_CONFIRM_L ? DCCPO_CHANGE_R : DCCPO_CHANGE_L;
+
+ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+ if (!opt->dccpop_conf && opt->dccpop_type == t &&
+ opt->dccpop_feat == feature) {
+ /* we found it */
+ /* XXX do sanity check */
+
+ opt->dccpop_conf = 1;
+
+ /* We got a confirmation---change the option */
+ dccp_feat_update(sk, opt->dccpop_type,
+ opt->dccpop_feat, *val);
+
+ dccp_pr_debug("feat %d type %d confirmed %d\n",
+ feature, type, *val);
+ rc = 0;
+ break;
+ }
+
+ if (!opt->dccpop_conf)
+ all_confirmed = 0;
+ }
+
+ /* fix re-transmit timer */
+ /* XXX gotta make sure that no option negotiation occurs during
+ * connection shutdown. Consider that the CLOSEREQ is sent and timer is
+ * on. if all options are confirmed it might kill timer which should
+ * remain alive until close is received.
+ */
+ if (all_confirmed) {
+ dccp_pr_debug("clear feat negotiation timer %p\n", sk);
+ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+ }
+
+ if (rc)
+ dccp_pr_debug("feat %d type %d never requested\n",
+ feature, type);
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
+
+void dccp_feat_clean(struct dccp_minisock *dmsk)
+{
+ struct dccp_opt_pend *opt, *next;
+
+ list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
+ dccpop_node) {
+ BUG_ON(opt->dccpop_val == NULL);
+ kfree(opt->dccpop_val);
+
+ if (opt->dccpop_sc != NULL) {
+ BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
+ kfree(opt->dccpop_sc->dccpoc_val);
+ kfree(opt->dccpop_sc);
+ }
+
+ kfree(opt);
+ }
+ INIT_LIST_HEAD(&dmsk->dccpms_pending);
+
+ list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
+ BUG_ON(opt == NULL);
+ if (opt->dccpop_val != NULL)
+ kfree(opt->dccpop_val);
+ kfree(opt);
+ }
+ INIT_LIST_HEAD(&dmsk->dccpms_conf);
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_clean);
+
+/* this is to be called only when a listening sock creates its child. It is
+ * assumed by the function---the confirm is not duplicated, but rather it is
+ * "passed on".
+ */
+int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
+{
+ struct dccp_minisock *olddmsk = dccp_msk(oldsk);
+ struct dccp_minisock *newdmsk = dccp_msk(newsk);
+ struct dccp_opt_pend *opt;
+ int rc = 0;
+
+ INIT_LIST_HEAD(&newdmsk->dccpms_pending);
+ INIT_LIST_HEAD(&newdmsk->dccpms_conf);
+
+ list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
+ struct dccp_opt_pend *newopt;
+ /* copy the value of the option */
+ u8 *val = kmalloc(opt->dccpop_len, GFP_ATOMIC);
+
+ if (val == NULL)
+ goto out_clean;
+ memcpy(val, opt->dccpop_val, opt->dccpop_len);
+
+ newopt = kmalloc(sizeof(*newopt), GFP_ATOMIC);
+ if (newopt == NULL) {
+ kfree(val);
+ goto out_clean;
+ }
+
+ /* insert the option */
+ memcpy(newopt, opt, sizeof(*newopt));
+ newopt->dccpop_val = val;
+ list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
+
+ /* XXX what happens with backlogs and multiple connections at
+ * once...
+ */
+ /* the master socket no longer needs to worry about confirms */
+ opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */
+
+ /* reset state for a new socket */
+ opt->dccpop_conf = 0;
+ }
+
+ /* XXX not doing anything about the conf queue */
+
+out:
+ return rc;
+
+out_clean:
+ dccp_feat_clean(newdmsk);
+ rc = -ENOMEM;
+ goto out;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_clone);
+
+static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
+ u8 *val, u8 len)
+{
+ int rc = -ENOMEM;
+ u8 *copy = kmalloc(len, GFP_KERNEL);
+
+ if (copy != NULL) {
+ memcpy(copy, val, len);
+ rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
+ if (rc)
+ kfree(copy);
+ }
+ return rc;
+}
+
+int dccp_feat_init(struct dccp_minisock *dmsk)
+{
+ int rc;
+
+ INIT_LIST_HEAD(&dmsk->dccpms_pending);
+ INIT_LIST_HEAD(&dmsk->dccpms_conf);
+
+ /* CCID L */
+ rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
+ &dmsk->dccpms_tx_ccid, 1);
+ if (rc)
+ goto out;
+
+ /* CCID R */
+ rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
+ &dmsk->dccpms_rx_ccid, 1);
+ if (rc)
+ goto out;
+
+ /* Ack ratio */
+ rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
+ &dmsk->dccpms_ack_ratio, 1);
+out:
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(dccp_feat_init);
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
new file mode 100644
index 000000000000..6048373c7186
--- /dev/null
+++ b/net/dccp/feat.h
@@ -0,0 +1,29 @@
+#ifndef _DCCP_FEAT_H
+#define _DCCP_FEAT_H
+/*
+ * net/dccp/feat.h
+ *
+ * An implementation of the DCCP protocol
+ * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+
+struct sock;
+struct dccp_minisock;
+
+extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
+ u8 *val, u8 len, gfp_t gfp);
+extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature,
+ u8 *val, u8 len);
+extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
+ u8 *val, u8 len);
+extern void dccp_feat_clean(struct dccp_minisock *dmsk);
+extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk);
+extern int dccp_feat_init(struct dccp_minisock *dmsk);
+
+#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index b6cba72b44e8..bfc53665516b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -32,7 +32,7 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb)
static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
{
- dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+ dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
dccp_fin(sk, skb);
dccp_set_state(sk, DCCP_CLOSED);
sk_wake_async(sk, 1, POLL_HUP);
@@ -56,11 +56,11 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
dccp_send_close(sk, 0);
}
-static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- if (dp->dccps_options.dccpo_send_ack_vector)
+ if (dccp_msk(sk)->dccpms_send_ack_vector)
dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
@@ -151,9 +151,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static inline int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh,
- const unsigned len)
+static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -247,7 +246,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dp->dccps_options.dccpo_send_ack_vector &&
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -300,7 +299,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
goto out_invalid_packet;
}
- if (dp->dccps_options.dccpo_send_ack_vector &&
+ if (dccp_parse_options(sk, skb))
+ goto out_invalid_packet;
+
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
@@ -321,14 +323,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
dccp_set_seqno(&dp->dccps_swl,
max48(dp->dccps_swl, dp->dccps_isr));
- if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
- ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
- ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
- /* FIXME: send appropriate RESET code */
- goto out_invalid_packet;
- }
-
dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
/*
@@ -492,7 +486,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
- if (dp->dccps_options.dccpo_send_ack_vector &&
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_ACKVEC_STATE_RECEIVED))
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index dc0487b5bace..29047995c695 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -18,8 +18,10 @@
#include <linux/random.h>
#include <net/icmp.h>
+#include <net/inet_common.h>
#include <net/inet_hashtables.h>
#include <net/inet_sock.h>
+#include <net/protocol.h>
#include <net/sock.h>
#include <net/timewait_sock.h>
#include <net/tcp_states.h>
@@ -28,14 +30,14 @@
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
+#include "feat.h"
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
- .lhash_lock = RW_LOCK_UNLOCKED,
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
-EXPORT_SYMBOL_GPL(dccp_hashinfo);
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+static struct socket *dccp_v4_ctl_socket;
static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
{
@@ -43,18 +45,6 @@ static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
inet_csk_bind_conflict);
}
-static void dccp_v4_hash(struct sock *sk)
-{
- inet_hash(&dccp_hashinfo, sk);
-}
-
-void dccp_unhash(struct sock *sk)
-{
- inet_unhash(&dccp_hashinfo, sk);
-}
-
-EXPORT_SYMBOL_GPL(dccp_unhash);
-
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
@@ -207,11 +197,12 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
} /* else let the usual retransmit timer handle it */
}
-static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
+static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb,
+ struct request_sock *req)
{
int err;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+ const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_ack_bits);
struct sk_buff *skb;
@@ -219,12 +210,12 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
return;
- skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+ skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC);
if (skb == NULL)
return;
/* Reserve space for headers. */
- skb_reserve(skb, MAX_DCCP_HEADER);
+ skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
skb->dst = dst_clone(rxskb->dst);
@@ -243,11 +234,11 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
DCCP_SKB_CB(rxskb)->dccpd_seq);
- bh_lock_sock(dccp_ctl_socket->sk);
- err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+ bh_lock_sock(dccp_v4_ctl_socket->sk);
+ err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
rxskb->nh.iph->daddr,
rxskb->nh.iph->saddr, NULL);
- bh_unlock_sock(dccp_ctl_socket->sk);
+ bh_unlock_sock(dccp_v4_ctl_socket->sk);
if (err == NET_XMIT_CN || err == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
@@ -255,12 +246,6 @@ static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
}
}
-static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
- struct request_sock *req)
-{
- dccp_v4_ctl_send_ack(skb);
-}
-
static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
struct dst_entry *dst)
{
@@ -275,7 +260,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
skb = dccp_make_response(sk, dst, req);
if (skb != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
+ struct dccp_hdr *dh = dccp_hdr(skb);
+ dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr,
+ ireq->rmt_addr);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
ireq->rmt_addr,
@@ -301,7 +289,7 @@ out:
* check at all. A more general error queue to queue errors for later handling
* is probably better.
*/
-void dccp_v4_err(struct sk_buff *skb, u32 info)
+static void dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
@@ -456,32 +444,6 @@ void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
EXPORT_SYMBOL_GPL(dccp_v4_send_check);
-int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
-{
- struct sk_buff *skb;
- /*
- * FIXME: what if rebuild_header fails?
- * Should we be doing a rebuild_header here?
- */
- int err = inet_sk_rebuild_header(sk);
-
- if (err != 0)
- return err;
-
- skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
- if (skb != NULL) {
- const struct inet_sock *inet = inet_sk(sk);
-
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- err = ip_build_and_send_pkt(skb, sk,
- inet->saddr, inet->daddr, NULL);
- if (err == NET_XMIT_CN)
- err = 0;
- }
-
- return err;
-}
-
static inline u64 dccp_v4_init_sequence(const struct sock *sk,
const struct sk_buff *skb)
{
@@ -497,9 +459,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct dccp_sock dp;
struct request_sock *req;
struct dccp_request_sock *dreq;
- const __u32 saddr = skb->nh.iph->saddr;
- const __u32 daddr = skb->nh.iph->daddr;
- const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+ const __be32 saddr = skb->nh.iph->saddr;
+ const __be32 daddr = skb->nh.iph->daddr;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -535,7 +497,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (req == NULL)
goto drop;
- /* FIXME: process options */
+ if (dccp_parse_options(sk, skb))
+ goto drop;
dccp_openreq_init(req, &dp, skb);
@@ -660,8 +623,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
return sk;
}
-int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
- const u32 daddr)
+int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr,
+ const __be32 daddr)
{
const struct dccp_hdr* dh = dccp_hdr(skb);
int checksum_len;
@@ -680,8 +643,10 @@ int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr,
IPPROTO_DCCP, tmp);
}
+EXPORT_SYMBOL_GPL(dccp_v4_checksum);
+
static int dccp_v4_verify_checksum(struct sk_buff *skb,
- const u32 saddr, const u32 daddr)
+ const __be32 saddr, const __be32 daddr)
{
struct dccp_hdr *dh = dccp_hdr(skb);
int checksum_len;
@@ -741,16 +706,17 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
return;
- dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+ dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb);
if (dst == NULL)
return;
- skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+ skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header,
+ GFP_ATOMIC);
if (skb == NULL)
goto out;
/* Reserve space for headers. */
- skb_reserve(skb, MAX_DCCP_HEADER);
+ skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
@@ -778,11 +744,11 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
rxskb->nh.iph->daddr);
- bh_lock_sock(dccp_ctl_socket->sk);
- err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+ bh_lock_sock(dccp_v4_ctl_socket->sk);
+ err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
rxskb->nh.iph->daddr,
rxskb->nh.iph->saddr, NULL);
- bh_unlock_sock(dccp_ctl_socket->sk);
+ bh_unlock_sock(dccp_v4_ctl_socket->sk);
if (err == NET_XMIT_CN || err == 0) {
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
@@ -912,7 +878,7 @@ int dccp_invalid_packet(struct sk_buff *skb)
EXPORT_SYMBOL_GPL(dccp_invalid_packet);
/* this is called when real data arrives */
-int dccp_v4_rcv(struct sk_buff *skb)
+static int dccp_v4_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
struct sock *sk;
@@ -1019,111 +985,37 @@ do_time_wait:
goto no_dccp_socket;
}
-struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
- .queue_xmit = ip_queue_xmit,
- .send_check = dccp_v4_send_check,
- .rebuild_header = inet_sk_rebuild_header,
- .conn_request = dccp_v4_conn_request,
- .syn_recv_sock = dccp_v4_request_recv_sock,
- .net_header_len = sizeof(struct iphdr),
- .setsockopt = ip_setsockopt,
- .getsockopt = ip_getsockopt,
- .addr2sockaddr = inet_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in),
+static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+ .queue_xmit = ip_queue_xmit,
+ .send_check = dccp_v4_send_check,
+ .rebuild_header = inet_sk_rebuild_header,
+ .conn_request = dccp_v4_conn_request,
+ .syn_recv_sock = dccp_v4_request_recv_sock,
+ .net_header_len = sizeof(struct iphdr),
+ .setsockopt = ip_setsockopt,
+ .getsockopt = ip_getsockopt,
+ .addr2sockaddr = inet_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ip_setsockopt,
+ .compat_getsockopt = compat_ip_getsockopt,
+#endif
};
-int dccp_v4_init_sock(struct sock *sk)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
- static int dccp_ctl_socket_init = 1;
-
- dccp_options_init(&dp->dccps_options);
- do_gettimeofday(&dp->dccps_epoch);
-
- if (dp->dccps_options.dccpo_send_ack_vector) {
- dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
- GFP_KERNEL);
- if (dp->dccps_hc_rx_ackvec == NULL)
- return -ENOMEM;
- }
-
- /*
- * FIXME: We're hardcoding the CCID, and doing this at this point makes
- * the listening (master) sock get CCID control blocks, which is not
- * necessary, but for now, to not mess with the test userspace apps,
- * lets leave it here, later the real solution is to do this in a
- * setsockopt(CCIDs-I-want/accept). -acme
- */
- if (likely(!dccp_ctl_socket_init)) {
- dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid,
- sk);
- dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid,
- sk);
- if (dp->dccps_hc_rx_ccid == NULL ||
- dp->dccps_hc_tx_ccid == NULL) {
- ccid_exit(dp->dccps_hc_rx_ccid, sk);
- ccid_exit(dp->dccps_hc_tx_ccid, sk);
- if (dp->dccps_options.dccpo_send_ack_vector) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
- return -ENOMEM;
- }
- } else
- dccp_ctl_socket_init = 0;
-
- dccp_init_xmit_timers(sk);
- icsk->icsk_rto = DCCP_TIMEOUT_INIT;
- sk->sk_state = DCCP_CLOSED;
- sk->sk_write_space = dccp_write_space;
- icsk->icsk_af_ops = &dccp_ipv4_af_ops;
- icsk->icsk_sync_mss = dccp_sync_mss;
- dp->dccps_mss_cache = 536;
- dp->dccps_role = DCCP_ROLE_UNDEFINED;
- dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_init_sock);
-
-int dccp_v4_destroy_sock(struct sock *sk)
+static int dccp_v4_init_sock(struct sock *sk)
{
- struct dccp_sock *dp = dccp_sk(sk);
+ static __u8 dccp_v4_ctl_sock_initialized;
+ int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized);
- /*
- * DCCP doesn't use sk_write_queue, just sk_send_head
- * for retransmissions
- */
- if (sk->sk_send_head != NULL) {
- kfree_skb(sk->sk_send_head);
- sk->sk_send_head = NULL;
+ if (err == 0) {
+ if (unlikely(!dccp_v4_ctl_sock_initialized))
+ dccp_v4_ctl_sock_initialized = 1;
+ inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;
}
- /* Clean up a referenced DCCP bind bucket. */
- if (inet_csk(sk)->icsk_bind_hash != NULL)
- inet_put_port(&dccp_hashinfo, sk);
-
- kfree(dp->dccps_service_list);
- dp->dccps_service_list = NULL;
-
- ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
- if (dp->dccps_options.dccpo_send_ack_vector) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- ccid_exit(dp->dccps_hc_rx_ccid, sk);
- ccid_exit(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
-
- return 0;
+ return err;
}
-EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock);
-
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->opt);
@@ -1142,7 +1034,7 @@ static struct timewait_sock_ops dccp_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct inet_timewait_sock),
};
-struct proto dccp_prot = {
+static struct proto dccp_v4_prot = {
.name = "DCCP",
.owner = THIS_MODULE,
.close = dccp_close,
@@ -1155,17 +1047,110 @@ struct proto dccp_prot = {
.sendmsg = dccp_sendmsg,
.recvmsg = dccp_recvmsg,
.backlog_rcv = dccp_v4_do_rcv,
- .hash = dccp_v4_hash,
+ .hash = dccp_hash,
.unhash = dccp_unhash,
.accept = inet_csk_accept,
.get_port = dccp_v4_get_port,
.shutdown = dccp_shutdown,
- .destroy = dccp_v4_destroy_sock,
+ .destroy = dccp_destroy_sock,
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp_sock),
.rsk_prot = &dccp_request_sock_ops,
.twsk_prot = &dccp_timewait_sock_ops,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_dccp_setsockopt,
+ .compat_getsockopt = compat_dccp_getsockopt,
+#endif
+};
+
+static struct net_protocol dccp_v4_protocol = {
+ .handler = dccp_v4_rcv,
+ .err_handler = dccp_v4_err,
+ .no_policy = 1,
+};
+
+static const struct proto_ops inet_dccp_ops = {
+ .family = PF_INET,
+ .owner = THIS_MODULE,
+ .release = inet_release,
+ .bind = inet_bind,
+ .connect = inet_stream_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = inet_accept,
+ .getname = inet_getname,
+ /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
+ .poll = dccp_poll,
+ .ioctl = inet_ioctl,
+ /* FIXME: work on inet_listen to rename it to sock_common_listen */
+ .listen = inet_dccp_listen,
+ .shutdown = inet_shutdown,
+ .setsockopt = sock_common_setsockopt,
+ .getsockopt = sock_common_getsockopt,
+ .sendmsg = inet_sendmsg,
+ .recvmsg = sock_common_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
};
-EXPORT_SYMBOL_GPL(dccp_prot);
+static struct inet_protosw dccp_v4_protosw = {
+ .type = SOCK_DCCP,
+ .protocol = IPPROTO_DCCP,
+ .prot = &dccp_v4_prot,
+ .ops = &inet_dccp_ops,
+ .capability = -1,
+ .no_check = 0,
+ .flags = INET_PROTOSW_ICSK,
+};
+
+static int __init dccp_v4_init(void)
+{
+ int err = proto_register(&dccp_v4_prot, 1);
+
+ if (err != 0)
+ goto out;
+
+ err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+ if (err != 0)
+ goto out_proto_unregister;
+
+ inet_register_protosw(&dccp_v4_protosw);
+
+ err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET,
+ SOCK_DCCP, IPPROTO_DCCP);
+ if (err)
+ goto out_unregister_protosw;
+out:
+ return err;
+out_unregister_protosw:
+ inet_unregister_protosw(&dccp_v4_protosw);
+ inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+out_proto_unregister:
+ proto_unregister(&dccp_v4_prot);
+ goto out;
+}
+
+static void __exit dccp_v4_exit(void)
+{
+ inet_unregister_protosw(&dccp_v4_protosw);
+ inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+ proto_unregister(&dccp_v4_prot);
+}
+
+module_init(dccp_v4_init);
+module_exit(dccp_v4_exit);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 80c4d048869e..65e2ab0886e6 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1,6 +1,6 @@
/*
* DCCP over IPv6
- * Linux INET6 implementation
+ * Linux INET6 implementation
*
* Based on net/dccp6/ipv6.c
*
@@ -33,6 +33,9 @@
#include "dccp.h"
#include "ipv6.h"
+/* Socket used for sending RSTs and ACKs */
+static struct socket *dccp_v6_ctl_socket;
+
static void dccp_v6_ctl_send_reset(struct sk_buff *skb);
static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
struct request_sock *req);
@@ -53,7 +56,7 @@ static void dccp_v6_hash(struct sock *sk)
{
if (sk->sk_state != DCCP_CLOSED) {
if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) {
- dccp_prot.hash(sk);
+ dccp_hash(sk);
return;
}
local_bh_disable();
@@ -63,8 +66,8 @@ static void dccp_v6_hash(struct sock *sk)
}
static inline u16 dccp_v6_check(struct dccp_hdr *dh, int len,
- struct in6_addr *saddr,
- struct in6_addr *daddr,
+ struct in6_addr *saddr,
+ struct in6_addr *daddr,
unsigned long base)
{
return csum_ipv6_magic(saddr, daddr, len, IPPROTO_DCCP, base);
@@ -79,17 +82,17 @@ static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
skb->nh.ipv6h->saddr.s6_addr32,
dh->dccph_dport,
dh->dccph_sport);
- else
- return secure_dccp_sequence_number(skb->nh.iph->daddr,
- skb->nh.iph->saddr,
- dh->dccph_dport,
- dh->dccph_sport);
+
+ return secure_dccp_sequence_number(skb->nh.iph->daddr,
+ skb->nh.iph->saddr,
+ dh->dccph_dport,
+ dh->dccph_sport);
}
-static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
+static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -102,10 +105,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
dp->dccps_role = DCCP_ROLE_CLIENT;
- if (addr_len < SIN6_LEN_RFC2133)
+ if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- if (usin->sin6_family != AF_INET6)
+ if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
memset(&fl, 0, sizeof(fl));
@@ -122,17 +125,15 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6_sock_release(flowlabel);
}
}
-
/*
- * connect() to INADDR_ANY means loopback (BSD'ism).
- */
-
- if (ipv6_addr_any(&usin->sin6_addr))
- usin->sin6_addr.s6_addr[15] = 0x1;
+ * connect() to INADDR_ANY means loopback (BSD'ism).
+ */
+ if (ipv6_addr_any(&usin->sin6_addr))
+ usin->sin6_addr.s6_addr[15] = 1;
addr_type = ipv6_addr_type(&usin->sin6_addr);
- if(addr_type & IPV6_ADDR_MULTICAST)
+ if (addr_type & IPV6_ADDR_MULTICAST)
return -ENETUNREACH;
if (addr_type & IPV6_ADDR_LINKLOCAL) {
@@ -157,9 +158,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
np->flow_label = fl.fl6_flowlabel;
/*
- * DCCP over IPv4
+ * DCCP over IPv4
*/
-
if (addr_type == IPV6_ADDR_MAPPED) {
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
@@ -177,7 +177,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_backlog_rcv = dccp_v4_do_rcv;
err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
-
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
icsk->icsk_af_ops = &dccp_ipv6_af_ops;
@@ -203,8 +202,9 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl.fl_ip_dport = usin->sin6_port;
fl.fl_ip_sport = inet->sport;
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+ if (np->opt != NULL && np->opt->srcrt != NULL) {
+ const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+
ipv6_addr_copy(&final, &fl.fl6_dst);
ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
@@ -213,10 +213,12 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
err = ip6_dst_lookup(sk, &dst, &fl);
if (err)
goto failure;
+
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ err = xfrm_lookup(&dst, &fl, sk, 0);
+ if (err < 0)
goto failure;
if (saddr == NULL) {
@@ -231,7 +233,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
ip6_dst_store(sk, dst, NULL);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
+ if (np->opt != NULL)
icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
np->opt->opt_nflen);
@@ -264,7 +266,7 @@ failure:
}
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- int type, int code, int offset, __u32 info)
+ int type, int code, int offset, __be32 info)
{
struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
@@ -305,7 +307,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* icmp should have updated the destination cache entry */
dst = __sk_dst_check(sk, np->dst_cookie);
-
if (dst == NULL) {
struct inet_sock *inet = inet_sk(sk);
struct flowi fl;
@@ -322,16 +323,17 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
fl.fl_ip_dport = inet->dport;
fl.fl_ip_sport = inet->sport;
- if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err) {
sk->sk_err_soft = -err;
goto out;
}
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+ err = xfrm_lookup(&dst, &fl, sk, 0);
+ if (err < 0) {
sk->sk_err_soft = -err;
goto out;
}
-
} else
dst_hold(dst);
@@ -355,11 +357,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
req = inet6_csk_search_req(sk, &prev, dh->dccph_dport,
&hdr->daddr, &hdr->saddr,
inet6_iif(skb));
- if (!req)
+ if (req == NULL)
goto out;
- /* ICMPs are not backlogged, hence we cannot get
- * an established socket here.
+ /*
+ * ICMPs are not backlogged, hence we cannot get an established
+ * socket here.
*/
BUG_TRAP(req->sk == NULL);
@@ -373,7 +376,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case DCCP_REQUESTING:
case DCCP_RESPOND: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
+ It can, it SYNs are crossed. --ANK */
if (!sock_owned_by_user(sk)) {
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
@@ -382,7 +385,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
* (see connect in sock.c)
*/
sk->sk_error_report(sk);
-
dccp_done(sk);
} else
sk->sk_err_soft = err;
@@ -428,14 +430,16 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
ireq6->pktopts) {
struct sk_buff *pktopts = ireq6->pktopts;
struct inet6_skb_parm *rxopt = IP6CB(pktopts);
+
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
(struct ipv6_rt_hdr *)(pktopts->nh.raw +
rxopt->srcrt));
}
- if (opt && opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
+ if (opt != NULL && opt->srcrt != NULL) {
+ const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
+
ipv6_addr_copy(&final, &fl.fl6_dst);
ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
@@ -444,15 +448,19 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
err = ip6_dst_lookup(sk, &dst, &fl);
if (err)
goto done;
+
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
- if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+
+ err = xfrm_lookup(&dst, &fl, sk, 0);
+ if (err < 0)
goto done;
}
skb = dccp_make_response(sk, dst, req);
if (skb != NULL) {
struct dccp_hdr *dh = dccp_hdr(skb);
+
dh->dccph_checksum = dccp_v6_check(dh, skb->len,
&ireq6->loc_addr,
&ireq6->rmt_addr,
@@ -466,7 +474,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
}
done:
- if (opt && opt != np->opt)
+ if (opt != NULL && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return err;
@@ -497,7 +505,7 @@ static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
struct dccp_hdr *dh = dccp_hdr(skb);
dh->dccph_checksum = csum_ipv6_magic(&np->saddr, &np->daddr,
- len, IPPROTO_DCCP,
+ len, IPPROTO_DCCP,
csum_partial((char *)dh,
dh->dccph_doff << 2,
skb->csum));
@@ -505,8 +513,8 @@ static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
{
- struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+ struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+ const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
struct sk_buff *skb;
@@ -517,20 +525,14 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
return;
if (!ipv6_unicast_destination(rxskb))
- return;
-
- /*
- * We need to grab some memory, and put together an RST,
- * and then put it into the queue to be sent.
- */
+ return;
- skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
- dccp_hdr_reset_len, GFP_ATOMIC);
- if (skb == NULL)
+ skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
+ GFP_ATOMIC);
+ if (skb == NULL)
return;
- skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
- dccp_hdr_reset_len);
+ skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
dh = dccp_hdr(skb);
@@ -568,7 +570,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
/* sk = NULL, but it is safe for now. RST socket required. */
if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(NULL, skb, &fl, NULL, 0);
+ ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0);
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
return;
@@ -578,22 +580,22 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb)
kfree_skb(skb);
}
-static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
+static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb,
+ struct request_sock *req)
{
struct flowi fl;
struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
- const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+ const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_ack_bits);
struct sk_buff *skb;
- skb = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) +
- dccp_hdr_ack_len, GFP_ATOMIC);
+ skb = alloc_skb(dccp_v6_ctl_socket->sk->sk_prot->max_header,
+ GFP_ATOMIC);
if (skb == NULL)
return;
- skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr) +
- dccp_hdr_ack_len);
+ skb_reserve(skb, dccp_v6_ctl_socket->sk->sk_prot->max_header);
skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
dh = dccp_hdr(skb);
@@ -605,7 +607,7 @@ static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
dh->dccph_dport = rxdh->dccph_sport;
dh->dccph_doff = dccp_hdr_ack_len / 4;
dh->dccph_x = 1;
-
+
dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
DCCP_SKB_CB(rxskb)->dccpd_seq);
@@ -623,7 +625,7 @@ static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) {
if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) {
- ip6_xmit(NULL, skb, &fl, NULL, 0);
+ ip6_xmit(dccp_v6_ctl_socket->sk, skb, &fl, NULL, 0);
DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
return;
}
@@ -632,12 +634,6 @@ static void dccp_v6_ctl_send_ack(struct sk_buff *rxskb)
kfree_skb(skb);
}
-static void dccp_v6_reqsk_send_ack(struct sk_buff *skb,
- struct request_sock *req)
-{
- dccp_v6_ctl_send_ack(skb);
-}
-
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -657,7 +653,6 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
&iph->saddr, dh->dccph_sport,
&iph->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
-
if (nsk != NULL) {
if (nsk->sk_state != DCCP_TIME_WAIT) {
bh_lock_sock(nsk);
@@ -678,7 +673,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
struct dccp_request_sock *dreq;
struct inet6_request_sock *ireq6;
struct ipv6_pinfo *np = inet6_sk(sk);
- const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
+ const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -686,17 +681,17 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
return dccp_v4_conn_request(sk, skb);
if (!ipv6_unicast_destination(skb))
- goto drop;
+ goto drop;
if (dccp_bad_service_code(sk, service)) {
reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
}
/*
- * There are no SYN attacks on IPv6, yet...
+ * There are no SYN attacks on IPv6, yet...
*/
if (inet_csk_reqsk_queue_is_full(sk))
- goto drop;
+ goto drop;
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
@@ -730,7 +725,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq6->iif = inet6_iif(skb);
- /*
+ /*
* Step 3: Process LISTEN state
*
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
@@ -774,9 +769,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
/*
* v6 mapped
*/
-
newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
- if (newsk == NULL)
+ if (newsk == NULL)
return NULL;
newdp6 = (struct dccp6_sock *)newsk;
@@ -822,9 +816,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (sk_acceptq_is_full(sk))
goto out_overflow;
- if (np->rxopt.bits.osrcrt == 2 &&
- opt == NULL && ireq6->pktopts) {
- struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
+ if (np->rxopt.bits.osrcrt == 2 && opt == NULL && ireq6->pktopts) {
+ const struct inet6_skb_parm *rxopt = IP6CB(ireq6->pktopts);
+
if (rxopt->srcrt)
opt = ipv6_invert_rthdr(sk,
(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
@@ -838,8 +832,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memset(&fl, 0, sizeof(fl));
fl.proto = IPPROTO_DCCP;
ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
- if (opt && opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+ if (opt != NULL && opt->srcrt != NULL) {
+ const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
+
ipv6_addr_copy(&final, &fl.fl6_dst);
ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
@@ -857,7 +852,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
goto out;
- }
+ }
newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
@@ -870,9 +865,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
*/
ip6_dst_store(newsk, dst, NULL);
- newsk->sk_route_caps = dst->dev->features &
- ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-
+ newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
+ NETIF_F_TSO);
newdp6 = (struct dccp6_sock *)newsk;
newinet = inet_sk(newsk);
newinet->pinet6 = &newdp6->inet6;
@@ -886,7 +880,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
newsk->sk_bound_dev_if = ireq6->iif;
- /* Now IPv6 options...
+ /* Now IPv6 options...
First: no IPv4 options.
*/
@@ -908,20 +902,20 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
- /* Clone native IPv6 options from listening socket (if any)
-
- Yes, keeping reference count would be much more clever,
- but we make one more one thing there: reattach optmem
- to newsk.
+ /*
+ * Clone native IPv6 options from listening socket (if any)
+ *
+ * Yes, keeping reference count would be much more clever, but we make
+ * one more one thing there: reattach optmem to newsk.
*/
- if (opt) {
+ if (opt != NULL) {
newnp->opt = ipv6_dup_options(newsk, opt);
if (opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
}
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
+ if (newnp->opt != NULL)
inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
newnp->opt->opt_flen);
@@ -938,7 +932,7 @@ out_overflow:
NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
out:
NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
- if (opt && opt != np->opt)
+ if (opt != NULL && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return NULL;
@@ -972,8 +966,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
/*
- * socket locking is here for SMP purposes as backlog rcv
- * is currently called with bh processing disabled.
+ * socket locking is here for SMP purposes as backlog rcv is currently
+ * called with bh processing disabled.
*/
/* Do Stevens' IPV6_PKTOPTIONS.
@@ -998,20 +992,20 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
- if (sk->sk_state == DCCP_LISTEN) {
+ if (sk->sk_state == DCCP_LISTEN) {
struct sock *nsk = dccp_v6_hnd_req(sk, skb);
- if (!nsk)
- goto discard;
+ if (nsk == NULL)
+ goto discard;
/*
* Queue it on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket..
*/
- if(nsk != sk) {
+ if (nsk != sk) {
if (dccp_child_process(sk, nsk, skb))
goto reset;
- if (opt_skb)
+ if (opt_skb != NULL)
__kfree_skb(opt_skb);
return 0;
}
@@ -1024,7 +1018,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
dccp_v6_ctl_send_reset(skb);
discard:
- if (opt_skb)
+ if (opt_skb != NULL)
__kfree_skb(opt_skb);
kfree_skb(skb);
return 0;
@@ -1057,7 +1051,7 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
dh->dccph_sport,
&skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
inet6_iif(skb));
- /*
+ /*
* Step 2:
* If no socket ...
* Generate Reset(No Connection) unless P.type == Reset
@@ -1066,15 +1060,14 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
if (sk == NULL)
goto no_dccp_socket;
- /*
+ /*
* Step 2:
* ... or S.state == TIMEWAIT,
* Generate Reset(No Connection) unless P.type == Reset
* Drop packet and return
*/
-
if (sk->sk_state == DCCP_TIME_WAIT)
- goto do_time_wait;
+ goto do_time_wait;
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
@@ -1113,32 +1106,40 @@ do_time_wait:
}
static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
- .queue_xmit = inet6_csk_xmit,
- .send_check = dccp_v6_send_check,
- .rebuild_header = inet6_sk_rebuild_header,
- .conn_request = dccp_v6_conn_request,
- .syn_recv_sock = dccp_v6_request_recv_sock,
- .net_header_len = sizeof(struct ipv6hdr),
- .setsockopt = ipv6_setsockopt,
- .getsockopt = ipv6_getsockopt,
- .addr2sockaddr = inet6_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in6)
+ .queue_xmit = inet6_csk_xmit,
+ .send_check = dccp_v6_send_check,
+ .rebuild_header = inet6_sk_rebuild_header,
+ .conn_request = dccp_v6_conn_request,
+ .syn_recv_sock = dccp_v6_request_recv_sock,
+ .net_header_len = sizeof(struct ipv6hdr),
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in6),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ipv6_setsockopt,
+ .compat_getsockopt = compat_ipv6_getsockopt,
+#endif
};
/*
* DCCP over IPv4 via INET6 API
*/
static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
- .queue_xmit = ip_queue_xmit,
- .send_check = dccp_v4_send_check,
- .rebuild_header = inet_sk_rebuild_header,
- .conn_request = dccp_v6_conn_request,
- .syn_recv_sock = dccp_v6_request_recv_sock,
- .net_header_len = sizeof(struct iphdr),
- .setsockopt = ipv6_setsockopt,
- .getsockopt = ipv6_getsockopt,
- .addr2sockaddr = inet6_csk_addr2sockaddr,
- .sockaddr_len = sizeof(struct sockaddr_in6)
+ .queue_xmit = ip_queue_xmit,
+ .send_check = dccp_v4_send_check,
+ .rebuild_header = inet_sk_rebuild_header,
+ .conn_request = dccp_v6_conn_request,
+ .syn_recv_sock = dccp_v6_request_recv_sock,
+ .net_header_len = sizeof(struct iphdr),
+ .setsockopt = ipv6_setsockopt,
+ .getsockopt = ipv6_getsockopt,
+ .addr2sockaddr = inet6_csk_addr2sockaddr,
+ .sockaddr_len = sizeof(struct sockaddr_in6),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ipv6_setsockopt,
+ .compat_getsockopt = compat_ipv6_getsockopt,
+#endif
};
/* NOTE: A lot of things set to zero explicitly by call to
@@ -1146,71 +1147,83 @@ static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
*/
static int dccp_v6_init_sock(struct sock *sk)
{
- int err = dccp_v4_init_sock(sk);
+ static __u8 dccp_v6_ctl_sock_initialized;
+ int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
- if (err == 0)
+ if (err == 0) {
+ if (unlikely(!dccp_v6_ctl_sock_initialized))
+ dccp_v6_ctl_sock_initialized = 1;
inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
+ }
return err;
}
static int dccp_v6_destroy_sock(struct sock *sk)
{
- dccp_v4_destroy_sock(sk);
+ dccp_destroy_sock(sk);
return inet6_destroy_sock(sk);
}
static struct proto dccp_v6_prot = {
- .name = "DCCPv6",
- .owner = THIS_MODULE,
- .close = dccp_close,
- .connect = dccp_v6_connect,
- .disconnect = dccp_disconnect,
- .ioctl = dccp_ioctl,
- .init = dccp_v6_init_sock,
- .setsockopt = dccp_setsockopt,
- .getsockopt = dccp_getsockopt,
- .sendmsg = dccp_sendmsg,
- .recvmsg = dccp_recvmsg,
- .backlog_rcv = dccp_v6_do_rcv,
- .hash = dccp_v6_hash,
- .unhash = dccp_unhash,
- .accept = inet_csk_accept,
- .get_port = dccp_v6_get_port,
- .shutdown = dccp_shutdown,
- .destroy = dccp_v6_destroy_sock,
- .orphan_count = &dccp_orphan_count,
- .max_header = MAX_DCCP_HEADER,
- .obj_size = sizeof(struct dccp6_sock),
- .rsk_prot = &dccp6_request_sock_ops,
- .twsk_prot = &dccp6_timewait_sock_ops,
+ .name = "DCCPv6",
+ .owner = THIS_MODULE,
+ .close = dccp_close,
+ .connect = dccp_v6_connect,
+ .disconnect = dccp_disconnect,
+ .ioctl = dccp_ioctl,
+ .init = dccp_v6_init_sock,
+ .setsockopt = dccp_setsockopt,
+ .getsockopt = dccp_getsockopt,
+ .sendmsg = dccp_sendmsg,
+ .recvmsg = dccp_recvmsg,
+ .backlog_rcv = dccp_v6_do_rcv,
+ .hash = dccp_v6_hash,
+ .unhash = dccp_unhash,
+ .accept = inet_csk_accept,
+ .get_port = dccp_v6_get_port,
+ .shutdown = dccp_shutdown,
+ .destroy = dccp_v6_destroy_sock,
+ .orphan_count = &dccp_orphan_count,
+ .max_header = MAX_DCCP_HEADER,
+ .obj_size = sizeof(struct dccp6_sock),
+ .rsk_prot = &dccp6_request_sock_ops,
+ .twsk_prot = &dccp6_timewait_sock_ops,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_dccp_setsockopt,
+ .compat_getsockopt = compat_dccp_getsockopt,
+#endif
};
static struct inet6_protocol dccp_v6_protocol = {
- .handler = dccp_v6_rcv,
- .err_handler = dccp_v6_err,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ .handler = dccp_v6_rcv,
+ .err_handler = dccp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
};
static struct proto_ops inet6_dccp_ops = {
- .family = PF_INET6,
- .owner = THIS_MODULE,
- .release = inet6_release,
- .bind = inet6_bind,
- .connect = inet_stream_connect,
- .socketpair = sock_no_socketpair,
- .accept = inet_accept,
- .getname = inet6_getname,
- .poll = dccp_poll,
- .ioctl = inet6_ioctl,
- .listen = inet_dccp_listen,
- .shutdown = inet_shutdown,
- .setsockopt = sock_common_setsockopt,
- .getsockopt = sock_common_getsockopt,
- .sendmsg = inet_sendmsg,
- .recvmsg = sock_common_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_stream_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = inet_accept,
+ .getname = inet6_getname,
+ .poll = dccp_poll,
+ .ioctl = inet6_ioctl,
+ .listen = inet_dccp_listen,
+ .shutdown = inet_shutdown,
+ .setsockopt = sock_common_setsockopt,
+ .getsockopt = sock_common_getsockopt,
+ .sendmsg = inet_sendmsg,
+ .recvmsg = sock_common_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
};
static struct inet_protosw dccp_v6_protosw = {
@@ -1234,8 +1247,16 @@ static int __init dccp_v6_init(void)
goto out_unregister_proto;
inet6_register_protosw(&dccp_v6_protosw);
+
+ err = inet_csk_ctl_sock_create(&dccp_v6_ctl_socket, PF_INET6,
+ SOCK_DCCP, IPPROTO_DCCP);
+ if (err != 0)
+ goto out_unregister_protosw;
out:
return err;
+out_unregister_protosw:
+ inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
+ inet6_unregister_protosw(&dccp_v6_protosw);
out_unregister_proto:
proto_unregister(&dccp_v6_prot);
goto out;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 29261fc198e7..c0349e5b0551 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -22,6 +22,7 @@
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
+#include "feat.h"
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
@@ -106,6 +107,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
const struct dccp_request_sock *dreq = dccp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk);
+ struct dccp_minisock *newdmsk = dccp_msk(newsk);
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
@@ -114,27 +116,27 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
do_gettimeofday(&newdp->dccps_epoch);
- if (newdp->dccps_options.dccpo_send_ack_vector) {
+ if (dccp_feat_clone(sk, newsk))
+ goto out_free;
+
+ if (newdmsk->dccpms_send_ack_vector) {
newdp->dccps_hc_rx_ackvec =
- dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN,
- GFP_ATOMIC);
- /*
- * XXX: We're using the same CCIDs set on the parent,
- * i.e. sk_clone copied the master sock and left the
- * CCID pointers for this child, that is why we do the
- * __ccid_get calls.
- */
+ dccp_ackvec_alloc(GFP_ATOMIC);
if (unlikely(newdp->dccps_hc_rx_ackvec == NULL))
goto out_free;
}
- if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid,
- newsk) != 0 ||
- ccid_hc_tx_init(newdp->dccps_hc_tx_ccid,
- newsk) != 0)) {
+ newdp->dccps_hc_rx_ccid =
+ ccid_hc_rx_new(newdmsk->dccpms_rx_ccid,
+ newsk, GFP_ATOMIC);
+ newdp->dccps_hc_tx_ccid =
+ ccid_hc_tx_new(newdmsk->dccpms_tx_ccid,
+ newsk, GFP_ATOMIC);
+ if (unlikely(newdp->dccps_hc_rx_ccid == NULL ||
+ newdp->dccps_hc_tx_ccid == NULL)) {
dccp_ackvec_free(newdp->dccps_hc_rx_ackvec);
- ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
- ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
+ ccid_hc_rx_delete(newdp->dccps_hc_rx_ccid, newsk);
+ ccid_hc_tx_delete(newdp->dccps_hc_tx_ccid, newsk);
out_free:
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
@@ -143,9 +145,6 @@ out_free:
return NULL;
}
- __ccid_get(newdp->dccps_hc_rx_ccid);
- __ccid_get(newdp->dccps_hc_tx_ccid);
-
/*
* Step 3: Process LISTEN state
*
@@ -155,7 +154,7 @@ out_free:
*/
/* See dccp_v4_conn_request */
- newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
+ newdmsk->dccpms_sequence_window = req->rcv_wnd;
newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
dccp_update_gsr(newsk, dreq->dreq_isr);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 0a76426c9aea..e9feb2a0c770 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -21,19 +21,23 @@
#include "ackvec.h"
#include "ccid.h"
#include "dccp.h"
+#include "feat.h"
-/* stores the default values for new connection. may be changed with sysctl */
-static const struct dccp_options dccpo_default_values = {
- .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW,
- .dccpo_rx_ccid = DCCPF_INITIAL_CCID,
- .dccpo_tx_ccid = DCCPF_INITIAL_CCID,
- .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR,
- .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT,
-};
+int dccp_feat_default_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
+int dccp_feat_default_rx_ccid = DCCPF_INITIAL_CCID;
+int dccp_feat_default_tx_ccid = DCCPF_INITIAL_CCID;
+int dccp_feat_default_ack_ratio = DCCPF_INITIAL_ACK_RATIO;
+int dccp_feat_default_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
+int dccp_feat_default_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT;
-void dccp_options_init(struct dccp_options *dccpo)
+void dccp_minisock_init(struct dccp_minisock *dmsk)
{
- memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
+ dmsk->dccpms_sequence_window = dccp_feat_default_sequence_window;
+ dmsk->dccpms_rx_ccid = dccp_feat_default_rx_ccid;
+ dmsk->dccpms_tx_ccid = dccp_feat_default_tx_ccid;
+ dmsk->dccpms_ack_ratio = dccp_feat_default_ack_ratio;
+ dmsk->dccpms_send_ack_vector = dccp_feat_default_send_ack_vector;
+ dmsk->dccpms_send_ndp_count = dccp_feat_default_send_ndp_count;
}
static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
@@ -69,9 +73,12 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
unsigned char opt, len;
unsigned char *value;
u32 elapsed_time;
+ int rc;
+ int mandatory = 0;
memset(opt_recv, 0, sizeof(*opt_recv));
+ opt = len = 0;
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
@@ -100,6 +107,12 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
switch (opt) {
case DCCPO_PADDING:
break;
+ case DCCPO_MANDATORY:
+ if (mandatory)
+ goto out_invalid_option;
+ if (pkt_type != DCCP_PKT_DATA)
+ mandatory = 1;
+ break;
case DCCPO_NDP_COUNT:
if (len > 3)
goto out_invalid_option;
@@ -108,12 +121,37 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
dccp_pr_debug("%sNDP count=%d\n", debug_prefix,
opt_recv->dccpor_ndp);
break;
+ case DCCPO_CHANGE_L:
+ /* fall through */
+ case DCCPO_CHANGE_R:
+ if (len < 2)
+ goto out_invalid_option;
+ rc = dccp_feat_change_recv(sk, opt, *value, value + 1,
+ len - 1);
+ /*
+ * When there is a change error, change_recv is
+ * responsible for dealing with it. i.e. reply with an
+ * empty confirm.
+ * If the change was mandatory, then we need to die.
+ */
+ if (rc && mandatory)
+ goto out_invalid_option;
+ break;
+ case DCCPO_CONFIRM_L:
+ /* fall through */
+ case DCCPO_CONFIRM_R:
+ if (len < 2)
+ goto out_invalid_option;
+ if (dccp_feat_confirm_recv(sk, opt, *value,
+ value + 1, len - 1))
+ goto out_invalid_option;
+ break;
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
if (pkt_type == DCCP_PKT_DATA)
- continue;
+ break;
- if (dp->dccps_options.dccpo_send_ack_vector &&
+ if (dccp_msk(sk)->dccpms_send_ack_vector &&
dccp_ackvec_parse(sk, skb, opt, value, len))
goto out_invalid_option;
break;
@@ -121,7 +159,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
if (len != 4)
goto out_invalid_option;
- opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
+ opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value);
dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
dccp_timestamp(sk, &dp->dccps_timestamp_time);
@@ -135,7 +173,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
if (len != 4 && len != 6 && len != 8)
goto out_invalid_option;
- opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
+ opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ",
debug_prefix,
@@ -149,9 +187,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
break;
if (len == 6)
- elapsed_time = ntohs(*(u16 *)(value + 4));
+ elapsed_time = ntohs(*(__be16 *)(value + 4));
else
- elapsed_time = ntohl(*(u32 *)(value + 4));
+ elapsed_time = ntohl(*(__be32 *)(value + 4));
/* Give precedence to the biggest ELAPSED_TIME */
if (elapsed_time > opt_recv->dccpor_elapsed_time)
@@ -165,9 +203,9 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
continue;
if (len == 2)
- elapsed_time = ntohs(*(u16 *)value);
+ elapsed_time = ntohs(*(__be16 *)value);
else
- elapsed_time = ntohl(*(u32 *)value);
+ elapsed_time = ntohl(*(__be32 *)value);
if (elapsed_time > opt_recv->dccpor_elapsed_time)
opt_recv->dccpor_elapsed_time = elapsed_time;
@@ -208,8 +246,15 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
sk, opt, len);
break;
}
+
+ if (opt != DCCPO_MANDATORY)
+ mandatory = 0;
}
+ /* mandatory was the last byte in option list -> reset connection */
+ if (mandatory)
+ goto out_invalid_option;
+
return 0;
out_invalid_option:
@@ -219,6 +264,8 @@ out_invalid_option:
return -1;
}
+EXPORT_SYMBOL_GPL(dccp_parse_options);
+
static void dccp_encode_value_var(const u32 value, unsigned char *to,
const unsigned int len)
{
@@ -237,17 +284,14 @@ static inline int dccp_ndp_len(const int ndp)
return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
}
-void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
const unsigned char option,
const void *value, const unsigned char len)
{
unsigned char *to;
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
- LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
- "%d option!\n", option);
- return;
- }
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN)
+ return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
@@ -256,11 +300,12 @@ void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
*to++ = len + 2;
memcpy(to, value, len);
+ return 0;
}
EXPORT_SYMBOL_GPL(dccp_insert_option);
-static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
+static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
int ndp = dp->dccps_ndp_count;
@@ -276,7 +321,7 @@ static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
const int len = ndp_len + 2;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
- return;
+ return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
@@ -285,6 +330,8 @@ static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
*ptr++ = len;
dccp_encode_value_var(ndp, ptr, ndp_len);
}
+
+ return 0;
}
static inline int dccp_elapsed_time_len(const u32 elapsed_time)
@@ -292,27 +339,18 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
}
-void dccp_insert_option_elapsed_time(struct sock *sk,
- struct sk_buff *skb,
- u32 elapsed_time)
+int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
+ u32 elapsed_time)
{
-#ifdef CONFIG_IP_DCCP_DEBUG
- struct dccp_sock *dp = dccp_sk(sk);
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT TX opt: " : "server TX opt: ";
-#endif
const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
const int len = 2 + elapsed_time_len;
unsigned char *to;
if (elapsed_time_len == 0)
- return;
+ return 0;
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
- LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to "
- "insert elapsed time!\n");
- return;
- }
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+ return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
@@ -321,17 +359,14 @@ void dccp_insert_option_elapsed_time(struct sock *sk,
*to++ = len;
if (elapsed_time_len == 2) {
- const u16 var16 = htons((u16)elapsed_time);
+ const __be16 var16 = htons((u16)elapsed_time);
memcpy(to, &var16, 2);
} else {
- const u32 var32 = htonl(elapsed_time);
+ const __be32 var32 = htonl(elapsed_time);
memcpy(to, &var32, 4);
}
- dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
- debug_prefix, elapsed_time,
- len,
- (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
+ return 0;
}
EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
@@ -352,32 +387,27 @@ void dccp_timestamp(const struct sock *sk, struct timeval *tv)
EXPORT_SYMBOL_GPL(dccp_timestamp);
-void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
{
struct timeval tv;
- u32 now;
-
+ __be32 now;
+
dccp_timestamp(sk, &tv);
- now = timeval_usecs(&tv) / 10;
+ now = htonl(timeval_usecs(&tv) / 10);
/* yes this will overflow but that is the point as we want a
* 10 usec 32 bit timer which mean it wraps every 11.9 hours */
- now = htonl(now);
- dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+ return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
}
EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
-static void dccp_insert_option_timestamp_echo(struct sock *sk,
- struct sk_buff *skb)
+static int dccp_insert_option_timestamp_echo(struct sock *sk,
+ struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
-#ifdef CONFIG_IP_DCCP_DEBUG
- const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
- "CLIENT TX opt: " : "server TX opt: ";
-#endif
struct timeval now;
- u32 tstamp_echo;
+ __be32 tstamp_echo;
u32 elapsed_time;
int len, elapsed_time_len;
unsigned char *to;
@@ -387,11 +417,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk,
elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
len = 6 + elapsed_time_len;
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
- LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
- "timestamp echo!\n");
- return;
- }
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+ return -1;
DCCP_SKB_CB(skb)->dccpd_opt_len += len;
@@ -402,51 +429,149 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk,
tstamp_echo = htonl(dp->dccps_timestamp_echo);
memcpy(to, &tstamp_echo, 4);
to += 4;
-
+
if (elapsed_time_len == 2) {
- const u16 var16 = htons((u16)elapsed_time);
+ const __be16 var16 = htons((u16)elapsed_time);
memcpy(to, &var16, 2);
} else if (elapsed_time_len == 4) {
- const u32 var32 = htonl(elapsed_time);
+ const __be32 var32 = htonl(elapsed_time);
memcpy(to, &var32, 4);
}
- dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
- debug_prefix, dp->dccps_timestamp_echo,
- len,
- (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
-
dp->dccps_timestamp_echo = 0;
dp->dccps_timestamp_time.tv_sec = 0;
dp->dccps_timestamp_time.tv_usec = 0;
+ return 0;
}
-void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat,
+ u8 *val, u8 len)
+{
+ u8 *to;
+
+ if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) {
+ LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small"
+ " to insert feature %d option!\n", feat);
+ return -1;
+ }
+
+ DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3;
+
+ to = skb_push(skb, len + 3);
+ *to++ = type;
+ *to++ = len + 3;
+ *to++ = feat;
+
+ if (len)
+ memcpy(to, val, len);
+ dccp_pr_debug("option %d feat %d len %d\n", type, feat, len);
+
+ return 0;
+}
+
+static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ struct dccp_opt_pend *opt, *next;
+ int change = 0;
+
+ /* confirm any options [NN opts] */
+ list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
+ dccp_insert_feat_opt(skb, opt->dccpop_type,
+ opt->dccpop_feat, opt->dccpop_val,
+ opt->dccpop_len);
+ /* fear empty confirms */
+ if (opt->dccpop_val)
+ kfree(opt->dccpop_val);
+ kfree(opt);
+ }
+ INIT_LIST_HEAD(&dmsk->dccpms_conf);
+
+ /* see which features we need to send */
+ list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
+ /* see if we need to send any confirm */
+ if (opt->dccpop_sc) {
+ dccp_insert_feat_opt(skb, opt->dccpop_type + 1,
+ opt->dccpop_feat,
+ opt->dccpop_sc->dccpoc_val,
+ opt->dccpop_sc->dccpoc_len);
+
+ BUG_ON(!opt->dccpop_sc->dccpoc_val);
+ kfree(opt->dccpop_sc->dccpoc_val);
+ kfree(opt->dccpop_sc);
+ opt->dccpop_sc = NULL;
+ }
+
+ /* any option not confirmed, re-send it */
+ if (!opt->dccpop_conf) {
+ dccp_insert_feat_opt(skb, opt->dccpop_type,
+ opt->dccpop_feat, opt->dccpop_val,
+ opt->dccpop_len);
+ change++;
+ }
+ }
+
+ /* Retransmit timer.
+ * If this is the master listening sock, we don't set a timer on it. It
+ * should be fine because if the dude doesn't receive our RESPONSE
+ * [which will contain the CHANGE] he will send another REQUEST which
+ * will "retrnasmit" the change.
+ */
+ if (change && dp->dccps_role != DCCP_ROLE_LISTEN) {
+ dccp_pr_debug("reset feat negotiation timer %p\n", sk);
+
+ /* XXX don't reset the timer on re-transmissions. I.e. reset it
+ * only when sending new stuff i guess. Currently the timer
+ * never backs off because on re-transmission it just resets it!
+ */
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ inet_csk(sk)->icsk_rto, DCCP_RTO_MAX);
+ }
+
+ return 0;
+}
+
+int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
- if (dp->dccps_options.dccpo_send_ndp_count)
- dccp_insert_option_ndp(sk, skb);
+ if (dmsk->dccpms_send_ndp_count &&
+ dccp_insert_option_ndp(sk, skb))
+ return -1;
if (!dccp_packet_without_ack(skb)) {
- if (dp->dccps_options.dccpo_send_ack_vector &&
- dccp_ackvec_pending(dp->dccps_hc_rx_ackvec))
- dccp_insert_option_ackvec(sk, skb);
- if (dp->dccps_timestamp_echo != 0)
- dccp_insert_option_timestamp_echo(sk, skb);
+ if (dmsk->dccpms_send_ack_vector &&
+ dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) &&
+ dccp_insert_option_ackvec(sk, skb))
+ return -1;
+
+ if (dp->dccps_timestamp_echo != 0 &&
+ dccp_insert_option_timestamp_echo(sk, skb))
+ return -1;
}
if (dp->dccps_hc_rx_insert_options) {
- ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+ if (ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb))
+ return -1;
dp->dccps_hc_rx_insert_options = 0;
}
if (dp->dccps_hc_tx_insert_options) {
- ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+ if (ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb))
+ return -1;
dp->dccps_hc_tx_insert_options = 0;
}
+ /* Feature negotiation */
+ /* Data packets can't do feat negotiation */
+ if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA &&
+ DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK &&
+ dccp_insert_options_feat(sk, skb))
+ return -1;
+
/* XXX: insert other options when appropriate */
if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
@@ -459,4 +584,6 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
}
}
+
+ return 0;
}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index efd7ffb903a1..7409e4a3abdf 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -27,7 +27,7 @@ static inline void dccp_event_ack_sent(struct sock *sk)
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
-static inline void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
+static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
{
skb_set_owner_w(skb, sk);
WARN_ON(sk->sk_send_head);
@@ -49,7 +49,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
struct dccp_hdr *dh;
/* XXX For now we're using only 48 bits sequence numbers */
- const int dccp_header_size = sizeof(*dh) +
+ const u32 dccp_header_size = sizeof(*dh) +
sizeof(struct dccp_hdr_ext) +
dccp_packet_hdr_len(dcb->dccpd_type);
int err, set_ack = 1;
@@ -64,6 +64,10 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
case DCCP_PKT_DATAACK:
break;
+ case DCCP_PKT_REQUEST:
+ set_ack = 0;
+ /* fall through */
+
case DCCP_PKT_SYNC:
case DCCP_PKT_SYNCACK:
ackno = dcb->dccpd_seq;
@@ -79,7 +83,11 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
}
dcb->dccpd_seq = dp->dccps_gss;
- dccp_insert_options(sk, skb);
+
+ if (dccp_insert_options(sk, skb)) {
+ kfree_skb(skb);
+ return -EPROTO;
+ }
skb->h.raw = skb_push(skb, dccp_header_size);
dh = dccp_hdr(skb);
@@ -275,17 +283,16 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
{
struct dccp_hdr *dh;
struct dccp_request_sock *dreq;
- const int dccp_header_size = sizeof(struct dccp_hdr) +
+ const u32 dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
- struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
- dccp_header_size, 1,
+ struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
- skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+ skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
skb->csum = 0;
@@ -293,7 +300,11 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
dreq = dccp_rsk(req);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
- dccp_insert_options(sk, skb);
+
+ if (dccp_insert_options(sk, skb)) {
+ kfree_skb(skb);
+ return NULL;
+ }
skb->h.raw = skb_push(skb, dccp_header_size);
@@ -310,32 +321,28 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
- dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
- inet_rsk(req)->rmt_addr);
-
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
EXPORT_SYMBOL_GPL(dccp_make_response);
-struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
- const enum dccp_reset_codes code)
+static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
+ const enum dccp_reset_codes code)
{
struct dccp_hdr *dh;
struct dccp_sock *dp = dccp_sk(sk);
- const int dccp_header_size = sizeof(struct dccp_hdr) +
+ const u32 dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_reset);
- struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
- dccp_header_size, 1,
+ struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
- skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+ skb_reserve(skb, sk->sk_prot->max_header);
skb->dst = dst_clone(dst);
skb->csum = 0;
@@ -345,7 +352,11 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
DCCP_SKB_CB(skb)->dccpd_reset_code = code;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss;
- dccp_insert_options(sk, skb);
+
+ if (dccp_insert_options(sk, skb)) {
+ kfree_skb(skb);
+ return NULL;
+ }
skb->h.raw = skb_push(skb, dccp_header_size);
@@ -362,14 +373,34 @@ struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
dccp_hdr_reset(skb)->dccph_reset_code = code;
-
- dh->dccph_checksum = dccp_v4_checksum(skb, inet_sk(sk)->saddr,
- inet_sk(sk)->daddr);
+ inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb);
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
return skb;
}
+int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
+{
+ /*
+ * FIXME: what if rebuild_header fails?
+ * Should we be doing a rebuild_header here?
+ */
+ int err = inet_sk_rebuild_header(sk);
+
+ if (err == 0) {
+ struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache,
+ code);
+ if (skb != NULL) {
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
+ if (err == NET_XMIT_CN)
+ err = 0;
+ }
+ }
+
+ return err;
+}
+
/*
* Do all connect socket setups that can be done AF independent.
*/
@@ -405,12 +436,12 @@ int dccp_connect(struct sock *sk)
dccp_connect_init(sk);
- skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
+ skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
if (unlikely(skb == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
- skb_reserve(skb, MAX_DCCP_HEADER);
+ skb_reserve(skb, sk->sk_prot->max_header);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
skb->csum = 0;
@@ -431,7 +462,8 @@ void dccp_send_ack(struct sock *sk)
{
/* If we have been reset, we may not send again. */
if (sk->sk_state != DCCP_CLOSED) {
- struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+ struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header,
+ GFP_ATOMIC);
if (skb == NULL) {
inet_csk_schedule_ack(sk);
@@ -443,7 +475,7 @@ void dccp_send_ack(struct sock *sk)
}
/* Reserve space for headers */
- skb_reserve(skb, MAX_DCCP_HEADER);
+ skb_reserve(skb, sk->sk_prot->max_header);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
dccp_transmit_skb(sk, skb);
@@ -490,14 +522,14 @@ void dccp_send_sync(struct sock *sk, const u64 seq,
* dccp_transmit_skb() will set the ownership to this
* sock.
*/
- struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+ struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
if (skb == NULL)
/* FIXME: how to make sure the sync is sent? */
return;
/* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_DCCP_HEADER);
+ skb_reserve(skb, sk->sk_prot->max_header);
skb->csum = 0;
DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
DCCP_SKB_CB(skb)->dccpd_seq = seq;
@@ -505,6 +537,8 @@ void dccp_send_sync(struct sock *sk, const u64 seq,
dccp_transmit_skb(sk, skb);
}
+EXPORT_SYMBOL_GPL(dccp_send_sync);
+
/*
* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
* cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 65b11ea90d85..1ff7328b0e17 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -23,9 +23,7 @@
#include <linux/random.h>
#include <net/checksum.h>
-#include <net/inet_common.h>
#include <net/inet_sock.h>
-#include <net/protocol.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -37,6 +35,7 @@
#include "ccid.h"
#include "dccp.h"
+#include "feat.h"
DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
@@ -46,12 +45,66 @@ atomic_t dccp_orphan_count = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(dccp_orphan_count);
-static struct net_protocol dccp_protocol = {
- .handler = dccp_v4_rcv,
- .err_handler = dccp_v4_err,
- .no_policy = 1,
+struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
+ .lhash_lock = RW_LOCK_UNLOCKED,
+ .lhash_users = ATOMIC_INIT(0),
+ .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
};
+EXPORT_SYMBOL_GPL(dccp_hashinfo);
+
+void dccp_set_state(struct sock *sk, const int state)
+{
+ const int oldstate = sk->sk_state;
+
+ dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
+ dccp_role(sk), sk,
+ dccp_state_name(oldstate), dccp_state_name(state));
+ WARN_ON(state == oldstate);
+
+ switch (state) {
+ case DCCP_OPEN:
+ if (oldstate != DCCP_OPEN)
+ DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+ break;
+
+ case DCCP_CLOSED:
+ if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+ DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
+
+ sk->sk_prot->unhash(sk);
+ if (inet_csk(sk)->icsk_bind_hash != NULL &&
+ !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+ inet_put_port(&dccp_hashinfo, sk);
+ /* fall through */
+ default:
+ if (oldstate == DCCP_OPEN)
+ DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
+ }
+
+ /* Change state AFTER socket is unhashed to avoid closed
+ * socket sitting in hash tables.
+ */
+ sk->sk_state = state;
+}
+
+EXPORT_SYMBOL_GPL(dccp_set_state);
+
+void dccp_done(struct sock *sk)
+{
+ dccp_set_state(sk, DCCP_CLOSED);
+ dccp_clear_xmit_timers(sk);
+
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_state_change(sk);
+ else
+ inet_csk_destroy_sock(sk);
+}
+
+EXPORT_SYMBOL_GPL(dccp_done);
+
const char *dccp_packet_name(const int type)
{
static const char *dccp_packet_names[] = {
@@ -96,6 +149,120 @@ const char *dccp_state_name(const int state)
EXPORT_SYMBOL_GPL(dccp_state_name);
+void dccp_hash(struct sock *sk)
+{
+ inet_hash(&dccp_hashinfo, sk);
+}
+
+EXPORT_SYMBOL_GPL(dccp_hash);
+
+void dccp_unhash(struct sock *sk)
+{
+ inet_unhash(&dccp_hashinfo, sk);
+}
+
+EXPORT_SYMBOL_GPL(dccp_unhash);
+
+int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ dccp_minisock_init(&dp->dccps_minisock);
+ do_gettimeofday(&dp->dccps_epoch);
+
+ /*
+ * FIXME: We're hardcoding the CCID, and doing this at this point makes
+ * the listening (master) sock get CCID control blocks, which is not
+ * necessary, but for now, to not mess with the test userspace apps,
+ * lets leave it here, later the real solution is to do this in a
+ * setsockopt(CCIDs-I-want/accept). -acme
+ */
+ if (likely(ctl_sock_initialized)) {
+ int rc = dccp_feat_init(dmsk);
+
+ if (rc)
+ return rc;
+
+ if (dmsk->dccpms_send_ack_vector) {
+ dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
+ if (dp->dccps_hc_rx_ackvec == NULL)
+ return -ENOMEM;
+ }
+ dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
+ sk, GFP_KERNEL);
+ dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
+ sk, GFP_KERNEL);
+ if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
+ dp->dccps_hc_tx_ccid == NULL)) {
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ if (dmsk->dccpms_send_ack_vector) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ return -ENOMEM;
+ }
+ } else {
+ /* control socket doesn't need feat nego */
+ INIT_LIST_HEAD(&dmsk->dccpms_pending);
+ INIT_LIST_HEAD(&dmsk->dccpms_conf);
+ }
+
+ dccp_init_xmit_timers(sk);
+ icsk->icsk_rto = DCCP_TIMEOUT_INIT;
+ sk->sk_state = DCCP_CLOSED;
+ sk->sk_write_space = dccp_write_space;
+ icsk->icsk_sync_mss = dccp_sync_mss;
+ dp->dccps_mss_cache = 536;
+ dp->dccps_role = DCCP_ROLE_UNDEFINED;
+ dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
+ dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_init_sock);
+
+int dccp_destroy_sock(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_minisock *dmsk = dccp_msk(sk);
+
+ /*
+ * DCCP doesn't use sk_write_queue, just sk_send_head
+ * for retransmissions
+ */
+ if (sk->sk_send_head != NULL) {
+ kfree_skb(sk->sk_send_head);
+ sk->sk_send_head = NULL;
+ }
+
+ /* Clean up a referenced DCCP bind bucket. */
+ if (inet_csk(sk)->icsk_bind_hash != NULL)
+ inet_put_port(&dccp_hashinfo, sk);
+
+ kfree(dp->dccps_service_list);
+ dp->dccps_service_list = NULL;
+
+ if (dmsk->dccpms_send_ack_vector) {
+ dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
+ dp->dccps_hc_rx_ackvec = NULL;
+ }
+ ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+
+ /* clean up feature negotiation state */
+ dccp_feat_clean(dmsk);
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(dccp_destroy_sock);
+
static inline int dccp_listen_start(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -183,7 +350,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
mask |= POLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM;
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
/* Connected? */
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
@@ -220,7 +387,7 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
EXPORT_SYMBOL_GPL(dccp_ioctl);
-static int dccp_setsockopt_service(struct sock *sk, const u32 service,
+static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
char __user *optval, int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -255,18 +422,46 @@ static int dccp_setsockopt_service(struct sock *sk, const u32 service,
return 0;
}
-int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+/* byte 1 is feature. the rest is the preference list */
+static int dccp_setsockopt_change(struct sock *sk, int type,
+ struct dccp_so_feat __user *optval)
+{
+ struct dccp_so_feat opt;
+ u8 *val;
+ int rc;
+
+ if (copy_from_user(&opt, optval, sizeof(opt)))
+ return -EFAULT;
+
+ val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
+ if (!val)
+ return -ENOMEM;
+
+ if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
+ rc = -EFAULT;
+ goto out_free_val;
+ }
+
+ rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
+ val, opt.dccpsf_len, GFP_KERNEL);
+ if (rc)
+ goto out_free_val;
+
+out:
+ return rc;
+
+out_free_val:
+ kfree(val);
+ goto out;
+}
+
+static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
{
struct dccp_sock *dp;
int err;
int val;
- if (level != SOL_DCCP)
- return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
- optname, optval,
- optlen);
-
if (optlen < sizeof(int))
return -EINVAL;
@@ -284,6 +479,25 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_PACKET_SIZE:
dp->dccps_packet_size = val;
break;
+
+ case DCCP_SOCKOPT_CHANGE_L:
+ if (optlen != sizeof(struct dccp_so_feat))
+ err = -EINVAL;
+ else
+ err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
+ (struct dccp_so_feat *)
+ optval);
+ break;
+
+ case DCCP_SOCKOPT_CHANGE_R:
+ if (optlen != sizeof(struct dccp_so_feat))
+ err = -EINVAL;
+ else
+ err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
+ (struct dccp_so_feat *)
+ optval);
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -293,10 +507,33 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
return err;
}
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
+{
+ if (level != SOL_DCCP)
+ return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
+ optname, optval,
+ optlen);
+ return do_dccp_setsockopt(sk, level, optname, optval, optlen);
+}
+
EXPORT_SYMBOL_GPL(dccp_setsockopt);
+#ifdef CONFIG_COMPAT
+int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
+{
+ if (level != SOL_DCCP)
+ return inet_csk_compat_setsockopt(sk, level, optname,
+ optval, optlen);
+ return do_dccp_setsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
+#endif
+
static int dccp_getsockopt_service(struct sock *sk, int len,
- u32 __user *optval,
+ __be32 __user *optval,
int __user *optlen)
{
const struct dccp_sock *dp = dccp_sk(sk);
@@ -326,16 +563,12 @@ out:
return err;
}
-int dccp_getsockopt(struct sock *sk, int level, int optname,
+static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
struct dccp_sock *dp;
int val, len;
- if (level != SOL_DCCP)
- return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
- optname, optval,
- optlen);
if (get_user(len, optlen))
return -EFAULT;
@@ -351,7 +584,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
break;
case DCCP_SOCKOPT_SERVICE:
return dccp_getsockopt_service(sk, len,
- (u32 __user *)optval, optlen);
+ (__be32 __user *)optval, optlen);
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
len, (u32 __user *)optval, optlen);
@@ -368,8 +601,31 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
return 0;
}
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ if (level != SOL_DCCP)
+ return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
+ optname, optval,
+ optlen);
+ return do_dccp_getsockopt(sk, level, optname, optval, optlen);
+}
+
EXPORT_SYMBOL_GPL(dccp_getsockopt);
+#ifdef CONFIG_COMPAT
+int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ if (level != SOL_DCCP)
+ return inet_csk_compat_getsockopt(sk, level, optname,
+ optval, optlen);
+ return do_dccp_getsockopt(sk, level, optname, optval, optlen);
+}
+
+EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
+#endif
+
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
@@ -679,84 +935,7 @@ void dccp_shutdown(struct sock *sk, int how)
EXPORT_SYMBOL_GPL(dccp_shutdown);
-static const struct proto_ops inet_dccp_ops = {
- .family = PF_INET,
- .owner = THIS_MODULE,
- .release = inet_release,
- .bind = inet_bind,
- .connect = inet_stream_connect,
- .socketpair = sock_no_socketpair,
- .accept = inet_accept,
- .getname = inet_getname,
- /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
- .poll = dccp_poll,
- .ioctl = inet_ioctl,
- /* FIXME: work on inet_listen to rename it to sock_common_listen */
- .listen = inet_dccp_listen,
- .shutdown = inet_shutdown,
- .setsockopt = sock_common_setsockopt,
- .getsockopt = sock_common_getsockopt,
- .sendmsg = inet_sendmsg,
- .recvmsg = sock_common_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-extern struct net_proto_family inet_family_ops;
-
-static struct inet_protosw dccp_v4_protosw = {
- .type = SOCK_DCCP,
- .protocol = IPPROTO_DCCP,
- .prot = &dccp_prot,
- .ops = &inet_dccp_ops,
- .capability = -1,
- .no_check = 0,
- .flags = INET_PROTOSW_ICSK,
-};
-
-/*
- * This is the global socket data structure used for responding to
- * the Out-of-the-blue (OOTB) packets. A control sock will be created
- * for this socket at the initialization time.
- */
-struct socket *dccp_ctl_socket;
-
-static char dccp_ctl_socket_err_msg[] __initdata =
- KERN_ERR "DCCP: Failed to create the control socket.\n";
-
-static int __init dccp_ctl_sock_init(void)
-{
- int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
- &dccp_ctl_socket);
- if (rc < 0)
- printk(dccp_ctl_socket_err_msg);
- else {
- dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
- inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
-
- /* Unhash it so that IP input processing does not even
- * see it, we do not wish this socket to see incoming
- * packets.
- */
- dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
- }
-
- return rc;
-}
-
-#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
-void dccp_ctl_sock_exit(void)
-{
- if (dccp_ctl_socket != NULL) {
- sock_release(dccp_ctl_socket);
- dccp_ctl_socket = NULL;
- }
-}
-
-EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
-#endif
-
-static int __init init_dccp_v4_mibs(void)
+static int __init dccp_mib_init(void)
{
int rc = -ENOMEM;
@@ -778,6 +957,13 @@ out_free_one:
}
+static void dccp_mib_exit(void)
+{
+ free_percpu(dccp_statistics[0]);
+ free_percpu(dccp_statistics[1]);
+ dccp_statistics[0] = dccp_statistics[1] = NULL;
+}
+
static int thash_entries;
module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
@@ -794,17 +980,14 @@ static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
- int rc = proto_register(&dccp_prot, 1);
-
- if (rc)
- goto out;
+ int rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out_proto_unregister;
+ goto out;
/*
* Size and allocate the main established and bind bucket
@@ -866,27 +1049,23 @@ static int __init dccp_init(void)
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
}
- if (init_dccp_v4_mibs())
+ rc = dccp_mib_init();
+ if (rc)
goto out_free_dccp_bhash;
- rc = -EAGAIN;
- if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
- goto out_free_dccp_v4_mibs;
-
- inet_register_protosw(&dccp_v4_protosw);
+ rc = dccp_ackvec_init();
+ if (rc)
+ goto out_free_dccp_mib;
- rc = dccp_ctl_sock_init();
+ rc = dccp_sysctl_init();
if (rc)
- goto out_unregister_protosw;
+ goto out_ackvec_exit;
out:
return rc;
-out_unregister_protosw:
- inet_unregister_protosw(&dccp_v4_protosw);
- inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
-out_free_dccp_v4_mibs:
- free_percpu(dccp_statistics[0]);
- free_percpu(dccp_statistics[1]);
- dccp_statistics[0] = dccp_statistics[1] = NULL;
+out_ackvec_exit:
+ dccp_ackvec_exit();
+out_free_dccp_mib:
+ dccp_mib_exit();
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
dccp_hashinfo.bhash = NULL;
@@ -896,23 +1075,12 @@ out_free_dccp_ehash:
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_hashinfo.bind_bucket_cachep = NULL;
-out_proto_unregister:
- proto_unregister(&dccp_prot);
goto out;
}
-static const char dccp_del_proto_err_msg[] __exitdata =
- KERN_ERR "can't remove dccp net_protocol\n";
-
static void __exit dccp_fini(void)
{
- inet_unregister_protosw(&dccp_v4_protosw);
-
- if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
- printk(dccp_del_proto_err_msg);
-
- free_percpu(dccp_statistics[0]);
- free_percpu(dccp_statistics[1]);
+ dccp_mib_exit();
free_pages((unsigned long)dccp_hashinfo.bhash,
get_order(dccp_hashinfo.bhash_size *
sizeof(struct inet_bind_hashbucket)));
@@ -920,19 +1088,13 @@ static void __exit dccp_fini(void)
get_order(dccp_hashinfo.ehash_size *
sizeof(struct inet_ehash_bucket)));
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
- proto_unregister(&dccp_prot);
+ dccp_ackvec_exit();
+ dccp_sysctl_exit();
}
module_init(dccp_init);
module_exit(dccp_fini);
-/*
- * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
- * values directly, Also cover the case where the protocol is not specified,
- * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
- */
-MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
-MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
new file mode 100644
index 000000000000..64c89e9c229e
--- /dev/null
+++ b/net/dccp/sysctl.c
@@ -0,0 +1,124 @@
+/*
+ * net/dccp/sysctl.c
+ *
+ * An implementation of the DCCP protocol
+ * Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License v2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+
+#ifndef CONFIG_SYSCTL
+#error This file should not be compiled without CONFIG_SYSCTL defined
+#endif
+
+extern int dccp_feat_default_sequence_window;
+extern int dccp_feat_default_rx_ccid;
+extern int dccp_feat_default_tx_ccid;
+extern int dccp_feat_default_ack_ratio;
+extern int dccp_feat_default_send_ack_vector;
+extern int dccp_feat_default_send_ndp_count;
+
+static struct ctl_table dccp_default_table[] = {
+ {
+ .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW,
+ .procname = "seq_window",
+ .data = &dccp_feat_default_sequence_window,
+ .maxlen = sizeof(dccp_feat_default_sequence_window),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_DCCP_DEFAULT_RX_CCID,
+ .procname = "rx_ccid",
+ .data = &dccp_feat_default_rx_ccid,
+ .maxlen = sizeof(dccp_feat_default_rx_ccid),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_DCCP_DEFAULT_TX_CCID,
+ .procname = "tx_ccid",
+ .data = &dccp_feat_default_tx_ccid,
+ .maxlen = sizeof(dccp_feat_default_tx_ccid),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_DCCP_DEFAULT_ACK_RATIO,
+ .procname = "ack_ratio",
+ .data = &dccp_feat_default_ack_ratio,
+ .maxlen = sizeof(dccp_feat_default_ack_ratio),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_DCCP_DEFAULT_SEND_ACKVEC,
+ .procname = "send_ackvec",
+ .data = &dccp_feat_default_send_ack_vector,
+ .maxlen = sizeof(dccp_feat_default_send_ack_vector),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_DCCP_DEFAULT_SEND_NDP,
+ .procname = "send_ndp",
+ .data = &dccp_feat_default_send_ndp_count,
+ .maxlen = sizeof(dccp_feat_default_send_ndp_count),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ { .ctl_name = 0, }
+};
+
+static struct ctl_table dccp_table[] = {
+ {
+ .ctl_name = NET_DCCP_DEFAULT,
+ .procname = "default",
+ .mode = 0555,
+ .child = dccp_default_table,
+ },
+ { .ctl_name = 0, },
+};
+
+static struct ctl_table dccp_dir_table[] = {
+ {
+ .ctl_name = NET_DCCP,
+ .procname = "dccp",
+ .mode = 0555,
+ .child = dccp_table,
+ },
+ { .ctl_name = 0, },
+};
+
+static struct ctl_table dccp_root_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = dccp_dir_table,
+ },
+ { .ctl_name = 0, },
+};
+
+static struct ctl_table_header *dccp_table_header;
+
+int __init dccp_sysctl_init(void)
+{
+ dccp_table_header = register_sysctl_table(dccp_root_table, 1);
+
+ return dccp_table_header != NULL ? 0 : -ENOMEM;
+}
+
+void dccp_sysctl_exit(void)
+{
+ if (dccp_table_header != NULL) {
+ unregister_sysctl_table(dccp_table_header);
+ dccp_table_header = NULL;
+ }
+}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index aa34b576e228..5244415e5f18 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -31,7 +31,7 @@ static void dccp_write_err(struct sock *sk)
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
sk->sk_error_report(sk);
- dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+ dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
}
@@ -141,6 +141,17 @@ static void dccp_retransmit_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ /* retransmit timer is used for feature negotiation throughout
+ * connection. In this case, no packet is re-transmitted, but rather an
+ * ack is generated and pending changes are splaced into its options.
+ */
+ if (sk->sk_send_head == NULL) {
+ dccp_pr_debug("feat negotiation retransmit timeout %p\n", sk);
+ if (sk->sk_state == DCCP_OPEN)
+ dccp_send_ack(sk);
+ goto backoff;
+ }
+
/*
* sk->sk_send_head has to have one skb with
* DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
@@ -177,6 +188,7 @@ static void dccp_retransmit_timer(struct sock *sk)
goto out;
}
+backoff:
icsk->icsk_backoff++;
icsk->icsk_retransmits++;