From a5f4cea74f1397bb29d0bbdabeb05bd05a23a741 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 21:42:42 -0400 Subject: sctp: Use correct address family in sctp_getsockopt_peer_addrs() The function should use the address family of the address when trying to determine the length of the structure. Signed-off-by: Vlad Yasevich diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13d8229..1282a0e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4384,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, transports) { memcpy(&temp, &from->ipaddr, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); - addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; + addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) return -ENOMEM; if (copy_to_user(to, &temp, addrlen)) -- cgit v0.10.2 From c17b02b38aa99ef806c7066ef19a6f51122304f1 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 21:42:43 -0400 Subject: sctp: send SHUTDOWN-ACK chunk back to the source. SHUTDOWN-ACK is alaways sent to the primary path at the first time, but should better transmit SHUTDOWN-ACK chunk to the same destination transport address from which it received the SHUTDOWN chunk. Based on the work from Wei Yongjun . Signed-off-by: Vlad Yasevich diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 4c5bed9..49fb9ac 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, { struct sctp_transport *t; - t = sctp_assoc_choose_alter_transport(asoc, + if (chunk->transport) + t = chunk->transport; + else { + t = sctp_assoc_choose_alter_transport(asoc, asoc->shutdown_last_sent_to); + chunk->transport = t; + } asoc->shutdown_last_sent_to = t; asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; - chunk->transport = t; } /* Helper function to change the state of an association. */ -- cgit v0.10.2 From bd69b981a354be40cc709f3046f0c56f00da6163 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Apr 2010 21:42:43 -0400 Subject: sctp: assure at least one T3-rtx timer is running if a FORWARD TSN is sent PR-SCTP extension section 3.5 Sender Side Implementation of PR-SCTP: C5) If a FORWARD TSN is sent, the sender MUST assure that at least one T3-rtx timer is running. So this patch fix to assure at least one T3-rtx timer is running if a FORWARD TSN is or will to sent. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abfc0b8..16d451a 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -854,6 +854,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (status != SCTP_XMIT_OK) { /* put the chunk back */ list_add(&chunk->list, &q->control_chunk_list); + } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) { + /* PR-SCTP C5) If a FORWARD TSN is sent, the + * sender MUST assure that at least one T3-rtx + * timer is running. + */ + sctp_transport_reset_timers(transport, 0); } break; -- cgit v0.10.2 From 52688d6ec977e69b164e0bd3de51d43cf6d4b7b3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Apr 2010 21:42:44 -0400 Subject: sctp: discard ABORT chunk with zero verification tag in COOKIE-WAIT state In current implementation if ABORT chunk is received with T flag is set and zero verification tag in COOKIE-WAIT state, the ABORT chunk will be always accepted. This is because in COOKIE-WAIT state, the endpoint does not know the peer's verification tag, and it's zero in the endpoint. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 851c813..273a8bb 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -437,7 +437,7 @@ sctp_vtag_verify_either(const struct sctp_chunk *chunk, */ if ((!sctp_test_T_bit(chunk) && (ntohl(chunk->sctp_hdr->vtag) == asoc->c.my_vtag)) || - (sctp_test_T_bit(chunk) && + (sctp_test_T_bit(chunk) && asoc->c.peer_vtag && (ntohl(chunk->sctp_hdr->vtag) == asoc->c.peer_vtag))) { return 1; } -- cgit v0.10.2 From 6429d3dc4bd6251b01c11b851e23a4d60f079e06 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Apr 2010 21:42:44 -0400 Subject: sctp: missing set src and dest port while lookup output route While lookup the output route, we do not set the src and dest port. This will cause we got a wrong route if we had set the outbund transport to IPsec with src or dst port. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 704298f..1827498 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; + fl.fl_ip_dport = daddr->v4.sin_port; fl.proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); fl.oif = asoc->base.sk->sk_bound_dev_if; + fl.fl_ip_sport = htons(asoc->base.bind_addr.port); } - if (saddr) + if (saddr) { fl.fl4_src = saddr->v4.sin_addr.s_addr; + fl.fl_ip_sport = saddr->v4.sin_port; + } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl.fl4_dst, &fl.fl4_src); @@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; + fl.fl_ip_sport = laddr->a.v4.sin_port; if (!ip_route_output_key(&init_net, &rt, &fl)) { dst = &rt->u.dst; goto out_unlock; -- cgit v0.10.2 From bc4f841a05364b2572bcc266e9fd7e9cf5f06d5b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Apr 2010 22:38:53 -0400 Subject: sctp: fix to retranmit at least one DATA chunk While doing retranmit, if control chunk exists, such as FORWARD TSN chunk, and the DATA chunk can not be bundled with this control chunk because of PMTU limit, no DATA chunk will be retranmitted in the current implementation. This patch makes sure to retranmit at least one DATA chunk in this case. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 16d451a..e333d58 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (fast_rtx && !chunk->fast_retransmit) continue; +redo: /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: + if (!pkt->has_data && !pkt->has_cookie_echo) { + /* If this packet did not contain DATA then + * retransmission did not happen, so do it + * again. We'll ignore the error here since + * control chunks are already freed so there + * is nothing we can do. + */ + sctp_packet_transmit(pkt); + goto redo; + } + /* Send this packet. */ error = sctp_packet_transmit(pkt); -- cgit v0.10.2 From fbdf501c9374966a56829ecca3a7f25d2b49a305 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:39:26 -0400 Subject: sctp: Do no select unconfirmed transports for retransmissions An unconfirmed transport is one that we have not been able to reach since the beginning. There is no point in trying to retrasnmit data on those transports. Also, the specification forbids it due to security issues. Reported-by: Frank Schuster Signed-off-by: Vlad Yasevich diff --git a/net/sctp/associola.c b/net/sctp/associola.c index df5abbf..de830c2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -762,7 +762,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, asoc->peer.retran_path = peer; } - if (asoc->peer.active_path == asoc->peer.retran_path) { + if (asoc->peer.active_path == asoc->peer.retran_path && + peer->state != SCTP_UNCONFIRMED) { asoc->peer.retran_path = peer; } @@ -1318,7 +1319,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Keep track of the next transport in case * we don't find any active transport. */ - if (!next) + if (t->state != SCTP_UNCONFIRMED && !next) next = t; } } -- cgit v0.10.2 From ec7b9519509061bbc09a43284c3570aa492e07f0 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: use sctp_chunk_is_data macro to decide a chunk is data chunk sctp_chunk_is_data macro is defined to decide that whether a chunk is data chunk or not. Signed-off-by: Shan Wei Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e333d58..a4fe7de 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) /* If it is data, queue it up, otherwise, send it * immediately. */ - if (SCTP_CID_DATA == chunk->chunk_hdr->type) { + if (sctp_chunk_is_data(chunk)) { /* Is it OK to queue data chunks? */ /* From 9. Termination of Association * -- cgit v0.10.2 From 787a51a0878f7bee3a9a83040077301e1556b69a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: implement sctp association probing module This patch implement sctp association probing module, the module will be called sctp_probe. This module allows for capturing the changes to SCTP association state in response to incoming packets. It is used for debugging SCTP congestion control algorithms. Usage: $ modprobe sctp_probe [full=n] [port=n] [bufsize=n] $ cat /proc/net/sctpprobe The output format is: TIME ASSOC LPORT RPORT MTU RWND UNACK ... The output will be like this: 9.226086 c4064c48 9000 8000 1500 53352 1 *192.168.0.19 1 4380 54784 1252 0 1500 9.287195 c4064c48 9000 8000 1500 45144 5 *192.168.0.19 1 5880 54784 6500 0 1500 9.289130 c4064c48 9000 8000 1500 42724 5 *192.168.0.19 1 7380 54784 6500 0 1500 9.620332 c4064c48 9000 8000 1500 48284 4 *192.168.0.19 1 8880 54784 5200 0 1500 ...... Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 58b3e88..126b014 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -37,6 +37,18 @@ menuconfig IP_SCTP if IP_SCTP +config NET_SCTPPROBE + tristate "SCTP: Association probing" + depends on PROC_FS && KPROBES + ---help--- + This module allows for capturing the changes to SCTP association + state in response to incoming packets. It is used for debugging + SCTP congestion control algorithms. If you don't understand + what was just said, you don't need it: say N. + + To compile this code as a module, choose M here: the + module will be called sctp_probe. + config SCTP_DBG_MSG bool "SCTP: Debug messages" help diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 6b79473..5c30b7a 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_IP_SCTP) += sctp.o +obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ protocol.o endpointola.o associola.o \ @@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o ssnmap.o auth.o +sctp_probe-y := probe.o + sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o sctp-$(CONFIG_PROC_FS) += proc.o sctp-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sctp/probe.c b/net/sctp/probe.c new file mode 100644 index 0000000..8f025d5 --- /dev/null +++ b/net/sctp/probe.c @@ -0,0 +1,213 @@ +/* + * sctp_probe - Observe the SCTP flow with kprobes. + * + * The idea for this came from Werner Almesberger's umlsim + * Copyright (C) 2004, Stephen Hemminger + * + * Modified for SCTP from Stephen Hemminger's code + * Copyright (C) 2010, Wei Yongjun + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Wei Yongjun "); +MODULE_DESCRIPTION("SCTP snooper"); +MODULE_LICENSE("GPL"); + +static int port __read_mostly = 0; +MODULE_PARM_DESC(port, "Port to match (0=all)"); +module_param(port, int, 0); + +static int bufsize __read_mostly = 64 * 1024; +MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)"); +module_param(bufsize, int, 0); + +static int full __read_mostly = 1; +MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)"); +module_param(full, int, 0); + +static const char procname[] = "sctpprobe"; + +static struct { + struct kfifo fifo; + spinlock_t lock; + wait_queue_head_t wait; + struct timespec tstart; +} sctpw; + +static void printl(const char *fmt, ...) +{ + va_list args; + int len; + char tbuf[256]; + + va_start(args, fmt); + len = vscnprintf(tbuf, sizeof(tbuf), fmt, args); + va_end(args); + + kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + wake_up(&sctpw.wait); +} + +static int sctpprobe_open(struct inode *inode, struct file *file) +{ + kfifo_reset(&sctpw.fifo); + getnstimeofday(&sctpw.tstart); + + return 0; +} + +static ssize_t sctpprobe_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + int error = 0, cnt = 0; + unsigned char *tbuf; + + if (!buf) + return -EINVAL; + + if (len == 0) + return 0; + + tbuf = vmalloc(len); + if (!tbuf) + return -ENOMEM; + + error = wait_event_interruptible(sctpw.wait, + kfifo_len(&sctpw.fifo) != 0); + if (error) + goto out_free; + + cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock); + error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0; + +out_free: + vfree(tbuf); + + return error ? error : cnt; +} + +static const struct file_operations sctpprobe_fops = { + .owner = THIS_MODULE, + .open = sctpprobe_open, + .read = sctpprobe_read, +}; + +sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_transport *sp; + static __u32 lcwnd = 0; + struct timespec now; + + sp = asoc->peer.primary_path; + + if ((full || sp->cwnd != lcwnd) && + (!port || asoc->peer.port == port || + ep->base.bind_addr.port == port)) { + lcwnd = sp->cwnd; + + getnstimeofday(&now); + now = timespec_sub(now, sctpw.tstart); + + printl("%lu.%06lu ", (unsigned long) now.tv_sec, + (unsigned long) now.tv_nsec / NSEC_PER_USEC); + + printl("%p %5d %5d %5d %8d %5d ", asoc, + ep->base.bind_addr.port, asoc->peer.port, + asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data); + + list_for_each_entry(sp, &asoc->peer.transport_addr_list, + transports) { + if (sp == asoc->peer.primary_path) + printl("*"); + + if (sp->ipaddr.sa.sa_family == AF_INET) + printl("%pI4 ", &sp->ipaddr.v4.sin_addr); + else + printl("%pI6 ", &sp->ipaddr.v6.sin6_addr); + + printl("%2u %8u %8u %8u %8u %8u ", + sp->state, sp->cwnd, sp->ssthresh, + sp->flight_size, sp->partial_bytes_acked, + sp->pathmtu); + } + printl("\n"); + } + + jprobe_return(); + return 0; +} + +static struct jprobe sctp_recv_probe = { + .kp = { + .symbol_name = "sctp_sf_eat_sack_6_2", + }, + .entry = jsctp_sf_eat_sack, +}; + +static __init int sctpprobe_init(void) +{ + int ret = -ENOMEM; + + init_waitqueue_head(&sctpw.wait); + spin_lock_init(&sctpw.lock); + if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL)) + return ret; + + if (!proc_net_fops_create(&init_net, procname, S_IRUSR, + &sctpprobe_fops)) + goto free_kfifo; + + ret = register_jprobe(&sctp_recv_probe); + if (ret) + goto remove_proc; + + pr_info("SCTP probe registered (port=%d)\n", port); + + return 0; + +remove_proc: + proc_net_remove(&init_net, procname); +free_kfifo: + kfifo_free(&sctpw.fifo); + return ret; +} + +static __exit void sctpprobe_exit(void) +{ + kfifo_free(&sctpw.fifo); + proc_net_remove(&init_net, procname); + unregister_jprobe(&sctp_recv_probe); +} + +module_init(sctpprobe_init); +module_exit(sctpprobe_exit); -- cgit v0.10.2 From b99a4d53a74ac25eb4b930eef6c745579149c571 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: cleanup: remove duplicate assignment This assignment isn't needed because we did it earlier already. Also another reason to delete the assignment is because it triggers a Smatch warning about checking for NULL pointers after a dereference. Reported-by: Vlad Yasevich Signed-off-by: Dan Carpenter Signed-off-by: Vlad Yasevich diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17cb400e..33aed1c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -419,10 +419,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (!retval) goto nomem_chunk; - /* Per the advice in RFC 2960 6.4, send this reply to - * the source of the INIT packet. + /* RFC 2960 6.4 Multi-homed SCTP Endpoints + * + * An endpoint SHOULD transmit reply chunks (e.g., SACK, + * HEARTBEAT ACK, * etc.) to the same destination transport + * address from which it received the DATA or control chunk + * to which it is replying. + * + * [INIT ACK back to where the INIT came from.] */ retval->transport = chunk->transport; + retval->subh.init_hdr = sctp_addto_chunk(retval, sizeof(initack), &initack); retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); @@ -461,18 +468,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, /* We need to remove the const qualifier at this point. */ retval->asoc = (struct sctp_association *) asoc; - /* RFC 2960 6.4 Multi-homed SCTP Endpoints - * - * An endpoint SHOULD transmit reply chunks (e.g., SACK, - * HEARTBEAT ACK, * etc.) to the same destination transport - * address from which it received the DATA or control chunk - * to which it is replying. - * - * [INIT ACK back to where the INIT came from.] - */ - if (chunk) - retval->transport = chunk->transport; - nomem_chunk: kfree(cookie); nomem_cookie: -- cgit v0.10.2 From d598b166ced20d9b9281ea3527c0e18405ddb803 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: Make sure we always return valid retransmit path commit 4951feda0c60d1ef681f1a270afdd617924ab041 sctp: Do no select unconfirmed transports for retransmissions added code to make sure that we do not select unconfirmed paths for data transmission. This caused a problem when there are only 2 paths, 1 unconfirmed and 1 unreachable. In that case, the next retransmit path returned is NULL and that causes a kernel crash. The solution is to only change retransmit paths if we found one to use. Reported-by: Frank Schuster Signed-off-b: Vlad Yasevich diff --git a/net/sctp/associola.c b/net/sctp/associola.c index de830c2..fab9cb2 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1324,7 +1324,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) } } - asoc->peer.retran_path = t; + if (t) + asoc->peer.retran_path = t; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" " %p addr: ", -- cgit v0.10.2 From ae19c54866450f6c6f79223ca7d37965859a54e1 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: remove 'resent' bit from the chunk The 'resent' bit is used to make sure that we don't update rto estimate based on retransmitted chunks. However, we already have the 'rto_pending' bit that we test when need to update rto, so 'resent' bit is just extra. Additionally, we currently have a bug in that we always set a 'resent' bit and thus rto estimate is only updated by Heartbeats. Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ff30177..03deffb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -757,7 +757,6 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ - resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ @@ -1065,7 +1064,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *, struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *); void sctp_transport_free(struct sctp_transport *); -void sctp_transport_reset_timers(struct sctp_transport *, int); +void sctp_transport_reset_timers(struct sctp_transport *); void sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); diff --git a/net/sctp/output.c b/net/sctp/output.c index fad261d..35e49b9 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet) list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { + /* 6.3.1 C4) When data is in flight and when allowed + * by rule C5, a new RTT measurement MUST be made each + * round trip. Furthermore, new RTT measurements + * SHOULD be made no more than once per round-trip + * for a given destination transport address. + */ - if (!chunk->resent) { - - /* 6.3.1 C4) When data is in flight and when allowed - * by rule C5, a new RTT measurement MUST be made each - * round trip. Furthermore, new RTT measurements - * SHOULD be made no more than once per round-trip - * for a given destination transport address. - */ - - if (!tp->rto_pending) { - chunk->rtt_in_progress = 1; - tp->rto_pending = 1; - } + if (!tp->rto_pending) { + chunk->rtt_in_progress = 1; + tp->rto_pending = 1; } - - chunk->resent = 1; - has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a4fe7de..4e551ba 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1405,7 +1405,6 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && - !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 33aed1c..24effdf 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1205,7 +1205,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, INIT_LIST_HEAD(&retval->list); retval->skb = skb; retval->asoc = (struct sctp_association *)asoc; - retval->resent = 0; retval->has_tsn = 0; retval->has_ssn = 0; retval->rtt_in_progress = 0; -- cgit v0.10.2 From d9efc2231b28bc199f9de4dd594248b7341188e5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: Do not force T3 timer on fast retransmissions. We don't need to force the T3 timer any more and it's actually wrong to do as it causes too long of a delay. The timer will be started if one is not running, but if one is running, we leave it alone. Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4e551ba..786c4ff 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -659,14 +659,6 @@ redo: if (chunk->fast_retransmit == SCTP_NEED_FRTX) chunk->fast_retransmit = SCTP_DONT_FRTX; - /* Force start T3-rtx timer when fast retransmitting - * the earliest outstanding TSN - */ - if (!timer && fast_rtx && - ntohl(chunk->subh.data_hdr->tsn) == - asoc->ctsn_ack_point + 1) - timer = 2; - q->empty = 0; break; } @@ -871,7 +863,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) * sender MUST assure that at least one T3-rtx * timer is running. */ - sctp_transport_reset_timers(transport, 0); + sctp_transport_reset_timers(transport); } break; @@ -924,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport, - start_timer-1); + sctp_transport_reset_timers(transport); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -1058,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport, 0); + sctp_transport_reset_timers(transport); q->empty = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index be4d63d..0ebb97f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -195,7 +195,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport, int force) +void sctp_transport_reset_timers(struct sctp_transport *transport) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -205,7 +205,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force) * address. */ - if (force || !timer_pending(&transport->T3_rtx_timer)) + if (!timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); -- cgit v0.10.2 From c0058a35aacc79406e867ec33c5cb75624fd5860 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:09 -0400 Subject: sctp: Save some room in the sctp_transport by using bitfields Saves some room in the sctp_transport structure. Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 03deffb..9072dd6 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -877,7 +877,33 @@ struct sctp_transport { /* Reference counting. */ atomic_t refcnt; - int dead; + int dead:1, + /* RTO-Pending : A flag used to track if one of the DATA + * chunks sent to this address is currently being + * used to compute a RTT. If this flag is 0, + * the next DATA chunk sent to this destination + * should be used to compute a RTT and this flag + * should be set. Every time the RTT + * calculation completes (i.e. the DATA chunk + * is SACK'd) clear this flag. + */ + rto_pending:1, + + /* + * hb_sent : a flag that signals that we have a pending + * heartbeat. + */ + hb_sent:1, + + /* Flag to track the current fast recovery state */ + fast_recovery:1, + + /* Is the Path MTU update pending on this tranport */ + pmtu_pending:1, + + /* Is this structure kfree()able? */ + malloced:1; + /* This is the peer's IP address and port. */ union sctp_addr ipaddr; @@ -907,22 +933,6 @@ struct sctp_transport { /* SRTT : The current smoothed round trip time. */ __u32 srtt; - /* RTO-Pending : A flag used to track if one of the DATA - * chunks sent to this address is currently being - * used to compute a RTT. If this flag is 0, - * the next DATA chunk sent to this destination - * should be used to compute a RTT and this flag - * should be set. Every time the RTT - * calculation completes (i.e. the DATA chunk - * is SACK'd) clear this flag. - * hb_sent : a flag that signals that we have a pending heartbeat. - */ - __u8 rto_pending; - __u8 hb_sent; - - /* Flag to track the current fast recovery state */ - __u8 fast_recovery; - /* * These are the congestion stats. */ @@ -975,9 +985,6 @@ struct sctp_transport { */ __u16 pathmaxrxt; - /* is the Path MTU update pending on this tranport */ - __u8 pmtu_pending; - /* PMTU : The current known path MTU. */ __u32 pathmtu; @@ -1021,8 +1028,6 @@ struct sctp_transport { /* This is the list of transports that have chunks to send. */ struct list_head send_ready; - int malloced; /* Is this structure kfree()able? */ - /* State information saved for SFR_CACC algorithm. The key * idea in SFR_CACC is to maintain state at the sender on a * per-destination basis when a changeover happens. -- cgit v0.10.2 From b2cf9b6bd93af1cc047d3356f1c6cc9367fe3731 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: update transport initializations Right now, sctp transports are not fully initialized and when adding any new fields, they have to be explicitely initialized. This is prone to mistakes. So we switch to calling kzalloc() which makes things much simpler. Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 5915155..289241d 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -546,7 +546,7 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\ #define WORD_ROUND(s) (((s)+3)&~3) /* Make a new instance of type. */ -#define t_new(type, flags) (type *)kmalloc(sizeof(type), flags) +#define t_new(type, flags) (type *)kzalloc(sizeof(type), flags) /* Compare two timevals. */ #define tv_lt(s, t) \ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fab9cb2..37753cd 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); - /* Init all variables to a known value. */ - memset(asoc, 0, sizeof(struct sctp_association)); - /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; sctp_endpoint_hold(asoc->ep); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 905fda5..2f8763b 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sctp_shared_key *null_key; int err; - memset(ep, 0, sizeof(struct sctp_endpoint)); - ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 0ebb97f..854228b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, /* Copy in the address. */ peer->ipaddr = *addr; peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); - peer->asoc = NULL; - - peer->dst = NULL; memset(&peer->saddr, 0, sizeof(union sctp_addr)); /* From 6.3.1 RTO Calculation: @@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rto = msecs_to_jiffies(sctp_rto_initial); - peer->rtt = 0; - peer->rttvar = 0; - peer->srtt = 0; - peer->rto_pending = 0; - peer->hb_sent = 0; - peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; - peer->init_sent_count = 0; - peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; - peer->hbinterval = 0; /* Initialize the default path max_retrans. */ peer->pathmaxrxt = sctp_max_retrans_path; - peer->error_count = 0; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); - peer->T3_rtx_timer.expires = 0; - peer->hb_timer.expires = 0; - setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, (unsigned long)peer); setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, @@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); atomic_set(&peer->refcnt, 1); - peer->dead = 0; - - peer->malloced = 0; - - /* Initialize the state information for SFR-CACC */ - peer->cacc.changeover_active = 0; - peer->cacc.cycling_changeover = 0; - peer->cacc.next_tsn_at_change = 0; - peer->cacc.cacc_saw_newack = 0; return peer; } -- cgit v0.10.2 From cf9b4812e18aab6f86ff998bd7425a9e823269c3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: fast recovery algorithm is per association. SCTP fast recovery algorithm really applies per association and impacts all transports. Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9072dd6..d463296 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -895,9 +895,6 @@ struct sctp_transport { */ hb_sent:1, - /* Flag to track the current fast recovery state */ - fast_recovery:1, - /* Is the Path MTU update pending on this tranport */ pmtu_pending:1, @@ -952,9 +949,6 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ - /* TSN marking the fast recovery exit point */ - __u32 fast_recovery_exit; - /* Destination */ struct dst_entry *dst; /* Source address. */ @@ -1723,6 +1717,12 @@ struct sctp_association { /* Highest TSN that is acknowledged by incoming SACKs. */ __u32 highest_sacked; + /* TSN marking the fast recovery exit point */ + __u32 fast_recovery_exit; + + /* Flag to track the current fast recovery state */ + __u8 fast_recovery; + /* The number of unacknowledged data chunks. Reported through * the SCTP_STATUS sockopt. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 854228b..fccf494 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -378,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) void sctp_transport_raise_cwnd(struct sctp_transport *transport, __u32 sack_ctsn, __u32 bytes_acked) { + struct sctp_association *asoc = transport->asoc; __u32 cwnd, ssthresh, flight_size, pba, pmtu; cwnd = transport->cwnd; flight_size = transport->flight_size; /* See if we need to exit Fast Recovery first */ - if (transport->fast_recovery && - TSN_lte(transport->fast_recovery_exit, sack_ctsn)) - transport->fast_recovery = 0; + if (asoc->fast_recovery && + TSN_lte(asoc->fast_recovery_exit, sack_ctsn)) + asoc->fast_recovery = 0; /* The appropriate cwnd increase algorithm is performed if, and only * if the cumulative TSN whould advanced and the congestion window is @@ -415,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, * 2) the destination's path MTU. This upper bound protects * against the ACK-Splitting attack outlined in [SAVAGE99]. */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; if (bytes_acked > pmtu) @@ -466,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, void sctp_transport_lower_cwnd(struct sctp_transport *transport, sctp_lower_cwnd_t reason) { + struct sctp_association *asoc = transport->asoc; + switch (reason) { case SCTP_LOWER_CWND_T3_RTX: /* RFC 2960 Section 7.2.3, sctpimpguide @@ -476,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * partial_bytes_acked = 0 */ transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); - transport->cwnd = transport->asoc->pathmtu; + 4*asoc->pathmtu); + transport->cwnd = asoc->pathmtu; - /* T3-rtx also clears fast recovery on the transport */ - transport->fast_recovery = 0; + /* T3-rtx also clears fast recovery */ + asoc->fast_recovery = 0; break; case SCTP_LOWER_CWND_FAST_RTX: @@ -496,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ - if (transport->fast_recovery) + if (asoc->fast_recovery) return; /* Mark Fast recovery */ - transport->fast_recovery = 1; - transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + asoc->fast_recovery = 1; + asoc->fast_recovery_exit = asoc->next_tsn - 1; transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; break; @@ -524,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, if (time_after(jiffies, transport->last_time_ecne_reduced + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); transport->cwnd = transport->ssthresh; transport->last_time_ecne_reduced = jiffies; } @@ -540,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * interval. */ transport->cwnd = max(transport->cwnd/2, - 4*transport->asoc->pathmtu); + 4*asoc->pathmtu); break; } @@ -625,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t) t->error_count = 0; t->rto_pending = 0; t->hb_sent = 0; - t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; -- cgit v0.10.2 From 65883371894be2631603d5d412f90f8c09290fef Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: rwnd_press should be cumulative rwnd_press tracks the pressure on the recieve window. Every timer the receive buffer overlows, we truncate the receive window and then grow it back. However, if we don't track the cumulative presser, it's possible to reach a situation when receive buffer is empty, but rwnd stays truncated. Signed-off-by: Vlad Yasevich diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 37753cd..65f9a7c 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1482,7 +1482,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len) if (asoc->rwnd >= len) { asoc->rwnd -= len; if (over) { - asoc->rwnd_press = asoc->rwnd; + asoc->rwnd_press += asoc->rwnd; asoc->rwnd = 0; } } else { -- cgit v0.10.2 From ea862c8d1f4a0d193979c7412c3b946f600721ce Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: correctly mark missing chunks in fast recovery According to RFC 4960 Section 7.2.4: If an endpoint is in Fast Recovery and a SACK arrives that advances the Cumulative TSN Ack Point, the miss indications are incremented for all TSNs reported missing in the SACK. Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 786c4ff..b491a1a 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1154,6 +1154,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; int gap_ack_blocks; + u8 accum_moved = 0; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; @@ -1232,16 +1233,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } + /* Move the Cumulative TSN Ack Point if appropriate. */ + if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) { + asoc->ctsn_ack_point = sack_ctsn; + accum_moved = 1; + } + if (gap_ack_blocks) { + + if (asoc->fast_recovery && accum_moved) + highest_new_tsn = highest_tsn; + list_for_each_entry(transport, transport_list, transports) sctp_mark_missing(q, &transport->transmitted, transport, highest_new_tsn, count_of_newacks); } - /* Move the Cumulative TSN Ack Point if appropriate. */ - if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) - asoc->ctsn_ack_point = sack_ctsn; - /* Update unack_data field in the assoc. */ sctp_sack_update_unack_data(asoc, sack); @@ -1685,7 +1692,8 @@ static void sctp_mark_missing(struct sctp_outq *q, struct sctp_chunk *chunk; __u32 tsn; char do_fast_retransmit = 0; - struct sctp_transport *primary = q->asoc->peer.primary_path; + struct sctp_association *asoc = q->asoc; + struct sctp_transport *primary = asoc->peer.primary_path; list_for_each_entry(chunk, transmitted_queue, transmitted_list) { -- cgit v0.10.2 From bfa0d9843ac5feb9667990706b4524390fee4df9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: Optimize computation of highest new tsn in SACK. Right now, if the highest tsn in the SACK doesn't change, we'll end up scanning the transmitted lists on the transports twice: once for locating the highest _new_ tsn, and once for actually tagging chunks as acked. This is a waste, since we can record the highest _new_ tsn at the same time as tagging chunks. Long ago this was not possible because we would try to mark chunks as missing at the same time as tagging them acked and this approach didn't work. Now that the two steps are separate, we can re-use the old approach. Signed-off-by: Vlad Yasevich diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index b491a1a..5d05717 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn); + __u32 *highest_new_tsn); static void sctp_mark_missing(struct sctp_outq *q, struct list_head *transmitted_queue, @@ -1109,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc, assoc->unack_data = unack_data; } -/* Return the highest new tsn that is acknowledged by the given SACK chunk. */ -static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack, - struct sctp_association *asoc) -{ - struct sctp_transport *transport; - struct sctp_chunk *chunk; - __u32 highest_new_tsn, tsn; - struct list_head *transport_list = &asoc->peer.transport_addr_list; - - highest_new_tsn = ntohl(sack->cum_tsn_ack); - - list_for_each_entry(transport, transport_list, transports) { - list_for_each_entry(chunk, &transport->transmitted, - transmitted_list) { - tsn = ntohl(chunk->subh.data_hdr->tsn); - - if (!chunk->tsn_gap_acked && - TSN_lt(highest_new_tsn, tsn) && - sctp_acked(sack, tsn)) - highest_new_tsn = tsn; - } - } - - return highest_new_tsn; -} - /* This is where we REALLY process a SACK. * * Process the SACK against the outqueue. Mostly, this just frees @@ -1203,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) if (gap_ack_blocks) highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); - if (TSN_lt(asoc->highest_sacked, highest_tsn)) { - highest_new_tsn = highest_tsn; + if (TSN_lt(asoc->highest_sacked, highest_tsn)) asoc->highest_sacked = highest_tsn; - } else { - highest_new_tsn = sctp_highest_new_tsn(sack, asoc); - } + highest_new_tsn = sack_ctsn; /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ - sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); + sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1223,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) */ list_for_each_entry(transport, transport_list, transports) { sctp_check_transmitted(q, &transport->transmitted, - transport, sack, highest_new_tsn); + transport, sack, &highest_new_tsn); /* * SFR-CACC algorithm: * C) Let count_of_newacks be the number of @@ -1331,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, struct list_head *transmitted_queue, struct sctp_transport *transport, struct sctp_sackhdr *sack, - __u32 highest_new_tsn_in_sack) + __u32 *highest_new_tsn_in_sack) { struct list_head *lchunk; struct sctp_chunk *tchunk; @@ -1419,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); -- cgit v0.10.2 From 0e3aef8d09a8c11e3fb83cdcb24b5bc7421b3726 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 30 Apr 2010 22:41:10 -0400 Subject: sctp: Tag messages that can be Nagle delayed at creation. When we create the sctp_datamsg and fragment the user data, we know exactly if we are sending full segments or not and how they might be bundled. During this time, we can mark messages a Nagle capable or not. This makes the check at transmit time much simpler. Signed-off-by: Vlad Yasevich diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d463296..9d44aef 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -643,17 +643,15 @@ struct sctp_pf { struct sctp_datamsg { /* Chunks waiting to be submitted to lower layer. */ struct list_head chunks; - /* Chunks that have been transmitted. */ - size_t msg_size; /* Reference counting. */ atomic_t refcnt; /* When is this message no longer interesting to the peer? */ unsigned long expires_at; /* Did the messenge fail to send? */ int send_error; - char send_failed; - /* Control whether chunks from this message can be abandoned. */ - char can_abandon; + u8 send_failed:1, + can_abandon:1, /* can chunks from this message can be abandoned. */ + can_delay; /* should this message be Nagle delayed */ }; struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3eab6db..476caaf 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) msg->send_failed = 0; msg->send_error = 0; msg->can_abandon = 0; + msg->can_delay = 1; msg->expires_at = 0; INIT_LIST_HEAD(&msg->chunks); - msg->msg_size = 0; } /* Allocate and initialize datamsg. */ @@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu { sctp_datamsg_hold(msg); chunk->msg = msg; - msg->msg_size += chunk->skb->len; } @@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, if (msg_len >= first_len) { msg_len -= first_len; whole = 1; + msg->can_delay = 0; } /* How many full sized? How many bytes leftover? */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 35e49b9..a646681 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -674,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, * Don't delay large message writes that may have been * fragmeneted into small peices. */ - if ((len < max) && (chunk->msg->msg_size < max)) { + if ((len < max) && chunk->msg->can_delay) { retval = SCTP_XMIT_NAGLE_DELAY; goto finish; } -- cgit v0.10.2