From 37f7f421cce13435fdc0d870caf51141e5ebf079 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Sep 2005 16:51:01 -0700 Subject: [NET]: Do not leak MSG_CMSG_COMPAT into userspace. Noticed by Sridhar Samudrala. Signed-off-by: David S. Miller diff --git a/net/socket.c b/net/socket.c index c699e93..f926447 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err < 0) goto out_freeiov; } - err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); + err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), + COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) -- cgit v0.10.2 From 0c10c5d96865ce611d6a780888eff0ef4fab358b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Sep 2005 16:58:33 -0700 Subject: [DCCP]: More precisely set reset_code when sending RESET packets Moving the setting of DCCP_SKB_CB(skb)->dccpd_reset_code to the places where events happen that trigger sending a RESET packet. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller diff --git a/net/dccp/input.c b/net/dccp/input.c index c74034c..062e9f8 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } out_invalid_packet: - return 1; /* dccp_v4_do_rcv will send a reset, but... - FIXME: the reset code should be - DCCP_RESET_CODE_PACKET_ERROR */ + /* dccp_v4_do_rcv will send a reset */ + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + return 1; } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; @@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dh->dccph_type == DCCP_PKT_RESET) goto discard; - /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ + /* Caller (dccp_v4_do_rcv) will send Reset */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; } @@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ) + if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); @@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (dp->dccps_options.dccpo_send_ack_vector) { if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, - DCCP_SKB_CB(skb)->dccpd_seq, + dcb->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; /* @@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNC); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); @@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_PKT_SYNCACK); + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2afaa46..e09907d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -669,12 +669,16 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) + (RTCF_BROADCAST | RTCF_MULTICAST)) { + reset_code = DCCP_RESET_CODE_NO_CONNECTION; goto drop; + } /* * TW buckets are converted to open requests without @@ -718,7 +722,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_isr = dcb->dccpd_seq; dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; @@ -735,6 +739,7 @@ drop_and_free: __reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + dcb->dccpd_reset_code = reset_code; return -1; } @@ -1005,7 +1010,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); discard: kfree_skb(skb); -- cgit v0.10.2 From 67e6b629212fa9ffb7420e8a88a41806af637e28 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 16 Sep 2005 16:58:40 -0700 Subject: [DCCP]: Introduce DCCP_SOCKOPT_SERVICE As discussed in the dccp@vger mailing list: Now applications have to use setsockopt(DCCP_SOCKOPT_SERVICE, service[s]), prior to calling listen() and connect(). An array of unsigned ints can be passed meaning that the listening sock accepts connection requests for several services. With this we can ditch struct sockaddr_dccp and use only sockaddr_in (and sockaddr_in6 in the future). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 8bf4bac..0e72708 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -4,16 +4,6 @@ #include #include -/* Structure describing an Internet (DCCP) socket address. */ -struct sockaddr_dccp { - __u16 sdccp_family; /* Address family */ - __u16 sdccp_port; /* Port number */ - __u32 sdccp_addr; /* Internet address */ - __u32 sdccp_service; /* Service */ - /* Pad to size of `struct sockaddr': 16 bytes . */ - __u32 sdccp_pad; -}; - /** * struct dccp_hdr - generic part of DCCP packet header * @@ -188,6 +178,9 @@ enum { /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 +#define DCCP_SOCKOPT_SERVICE 2 + +#define DCCP_SERVICE_LIST_MAX_LEN 32 #ifdef __KERNEL__ @@ -382,6 +375,25 @@ enum dccp_role { DCCP_ROLE_SERVER, }; +struct dccp_service_list { + __u32 dccpsl_nr; + __u32 dccpsl_list[0]; +}; + +#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) + +static inline int dccp_list_has_service(const struct dccp_service_list *sl, + const u32 service) +{ + if (likely(sl != NULL)) { + u32 i = sl->dccpsl_nr; + while (i--) + if (sl->dccpsl_list[i] == service) + return 1; + } + return 0; +} + /** * struct dccp_sock - DCCP socket state * @@ -417,7 +429,8 @@ struct dccp_sock { __u64 dccps_gss; __u64 dccps_gsr; __u64 dccps_gar; - unsigned long dccps_service; + __u32 dccps_service; + struct dccp_service_list *dccps_service_list; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_packet_size; @@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } +static inline int dccp_service_not_initialized(const struct sock *sk) +{ + return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; +} + static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 95c4630..be7a660 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk, extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { - __u8 dccpd_type; - __u8 dccpd_reset_code; - __u8 dccpd_service; - __u8 dccpd_ccval; + __u8 dccpd_type:4; + __u8 dccpd_ccval:4; + __u8 dccpd_reset_code; + __u16 dccpd_opt_len; __u64 dccpd_seq; __u64 dccpd_ack_seq; - int dccpd_opt_len; }; #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index e09907d..94a440b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_role = DCCP_ROLE_CLIENT; + if (dccp_service_not_initialized(sk)) + return -EPROTO; + if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } +static inline int dccp_bad_service_code(const struct sock *sk, + const __u32 service) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_service == service) + return 0; + return !dccp_list_has_service(dp->dccps_service_list, service); +} + int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -669,6 +682,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; + const __u32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; @@ -680,6 +694,10 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop; } + if (dccp_bad_service_code(sk, service)) { + reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -722,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = service; if (dccp_v4_send_response(sk, req, dst)) goto drop_and_free; @@ -1284,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk) sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; + dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; return 0; } @@ -1305,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); + if (dp->dccps_service_list != NULL) { + kfree(dp->dccps_service_list); + dp->dccps_service_list = NULL; + } + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 18461bc..933e10d 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct inet_connection_sock *newicsk = inet_csk(sk); struct dccp_sock *newdp = dccp_sk(newsk); + newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackpkts = NULL; - newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + newdp->dccps_service_list = NULL; + newdp->dccps_service = dreq->dreq_service; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { diff --git a/net/dccp/output.c b/net/dccp/output.c index ea6d0e9..156b1d2 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = - dcb->dccpd_service; + dp->dccps_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; + struct dccp_request_sock *dreq; const int dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); @@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); skb->csum = 0; + dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); @@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, inet_rsk(req)->rmt_addr); @@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - /* FIXME: set service to something meaningful, coming - * from userspace*/ - DCCP_SKB_CB(skb)->dccpd_service = 0; skb->csum = 0; skb_set_owner_w(skb, sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 18a0e69..9bda286 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name); static inline int dccp_listen_start(struct sock *sk) { - dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + struct dccp_sock *dp = dccp_sk(sk); + + dp->dccps_role = DCCP_ROLE_LISTEN; + /* + * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) + * before calling listen() + */ + if (dccp_service_not_initialized(sk)) + return -EPROTO; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static int dccp_setsockopt_service(struct sock *sk, const u32 service, + char __user *optval, int optlen) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_service_list *sl = NULL; + + if (service == DCCP_SERVICE_INVALID_VALUE || + optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) + return -EINVAL; + + if (optlen > sizeof(service)) { + sl = kmalloc(optlen, GFP_KERNEL); + if (sl == NULL) + return -ENOMEM; + + sl->dccpsl_nr = optlen / sizeof(u32) - 1; + if (copy_from_user(sl->dccpsl_list, + optval + sizeof(service), + optlen - sizeof(service)) || + dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { + kfree(sl); + return -EFAULT; + } + } + + lock_sock(sk); + dp->dccps_service = service; + + if (dp->dccps_service_list != NULL) + kfree(dp->dccps_service_list); + + dp->dccps_service_list = sl; + release_sock(sk); + return 0; +} + int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - lock_sock(sk); + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_setsockopt_service(sk, val, optval, optlen); + lock_sock(sk); dp = dccp_sk(sk); err = 0; @@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, return err; } +static int dccp_getsockopt_service(struct sock *sk, int len, + u32 __user *optval, + int __user *optlen) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const struct dccp_service_list *sl; + int err = -ENOENT, slen = 0, total_len = sizeof(u32); + + lock_sock(sk); + if (dccp_service_not_initialized(sk)) + goto out; + + if ((sl = dp->dccps_service_list) != NULL) { + slen = sl->dccpsl_nr * sizeof(u32); + total_len += slen; + } + + err = -EINVAL; + if (total_len > len) + goto out; + + err = 0; + if (put_user(total_len, optlen) || + put_user(dp->dccps_service, optval) || + (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) + err = -EFAULT; +out: + release_sock(sk); + return err; +} + int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (optname == DCCP_SOCKOPT_SERVICE) + return dccp_getsockopt_service(sk, len, + (u32 __user *)optval, optlen); + len = min_t(unsigned int, len, sizeof(int)); if (len < 0) return -EINVAL; -- cgit v0.10.2 From 1cbf07478bbf3e350a2025bc5ea23fedaa95855a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Sep 2005 16:59:20 -0700 Subject: [TG3]: Add AMD K8 to list of write-reorder chipsets. Thanks to Andy Stewart for the report and testing debug patches from Michael Chan. Signed-off-by: David S. Miller diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 7599f52..52eae9f 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -67,8 +67,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.39" -#define DRV_MODULE_RELDATE "September 5, 2005" +#define DRV_MODULE_VERSION "3.40" +#define DRV_MODULE_RELDATE "September 15, 2005" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -9271,6 +9271,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) static struct pci_device_id write_reorder_chipsets[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, + PCI_DEVICE_ID_AMD_K8_NB) }, { }, }; u32 misc_ctrl_reg; @@ -9285,7 +9287,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags2 |= TG3_FLG2_SUN_570X; #endif - /* If we have an AMD 762 chipset, write + /* If we have an AMD 762 or K8 chipset, write * reordering to the mailbox registers done by the host * controller can cause major troubles. We read back from * every mailbox register write to force the writes to be diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 72fe338..3dc1618 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -491,6 +491,7 @@ #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 #define PCI_VENDOR_ID_AMD 0x1022 +#define PCI_DEVICE_ID_AMD_K8_NB 0x1100 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v0.10.2 From 4451362445b2d83886003f1d739b94e4f000eeeb Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 16 Sep 2005 16:59:46 -0700 Subject: [NETFILTER] CLUSTERIP: introduce reference counting for entries The CLUSTERIP target creates a procfs entry for all different cluster IPs. Although more than one rules can refer to a single cluster IP (and thus a single config structure), removal of the procfs entry is done unconditionally in destroy(). In more complicated situations involving deferred dereferencing of the config structure by procfs and creating a new rule with the same cluster IP it's also possible that no entry will be created for the new rule. This patch fixes the problem by counting the number of entries referencing a given config structure and moving the config list manipulation and procfs entry deletion parts to the clusterip_config_entry_put() function. Signed-off-by: KOVACS Krisztian Signed-off-by: Harald Welte Signed-off-by: David S. Miller diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d38913..adbf4d7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -49,6 +49,8 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ + atomic_t entries; /* number of entries/rules + * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ @@ -76,23 +78,48 @@ static struct proc_dir_entry *clusterip_procdir; #endif static inline void -clusterip_config_get(struct clusterip_config *c) { +clusterip_config_get(struct clusterip_config *c) +{ atomic_inc(&c->refcount); } static inline void -clusterip_config_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->refcount)) { +clusterip_config_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->refcount)) + kfree(c); +} + +/* increase the count of entries(rules) using/referencing this config */ +static inline void +clusterip_config_entry_get(struct clusterip_config *c) +{ + atomic_inc(&c->entries); +} + +/* decrease the count of entries using/referencing this config. If last + * entry(rule) is removed, remove the config from lists, but don't free it + * yet, since proc-files could still be holding references */ +static inline void +clusterip_config_entry_put(struct clusterip_config *c) +{ + if (atomic_dec_and_test(&c->entries)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); + dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); - kfree(c); + + /* In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own, + * so it's safe to remove the entry even if it's in use. */ +#ifdef CONFIG_PROC_FS + remove_proc_entry(c->pde->name, c->pde->parent); +#endif } } - static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { @@ -111,7 +138,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip) +clusterip_config_find_get(u_int32_t clusterip, int entry) { struct clusterip_config *c; @@ -122,6 +149,8 @@ clusterip_config_find_get(u_int32_t clusterip) return NULL; } atomic_inc(&c->refcount); + if (entry) + atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c; @@ -148,6 +177,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); + atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS /* create proc dir entry */ @@ -415,8 +445,26 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr); - if (!config) { + config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + if (config) { + if (cipinfo->config != NULL) { + /* Case A: This is an entry that gets reloaded, since + * it still has a cipinfo->config pointer. Simply + * increase the entry refcount and return */ + if (cipinfo->config != config) { + printk(KERN_ERR "CLUSTERIP: Reloaded entry " + "has invalid config pointer!\n"); + return 0; + } + clusterip_config_entry_get(cipinfo->config); + } else { + /* Case B: This is a new rule referring to an existing + * clusterip config. */ + cipinfo->config = config; + clusterip_config_entry_get(cipinfo->config); + } + } else { + /* Case C: This is a completely new clusterip config */ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return 0; @@ -443,10 +491,9 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } + cipinfo->config = config; } - cipinfo->config = config; - return 1; } @@ -455,13 +502,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) { struct ipt_clusterip_tgt_info *cipinfo = matchinfo; - /* we first remove the proc entry and then drop the reference - * count. In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own */ -#ifdef CONFIG_PROC_FS - remove_proc_entry(cipinfo->config->pde->name, - cipinfo->config->pde->parent); -#endif + /* if no more entries are referencing the config, remove it + * from the list and destroy the proc entry */ + clusterip_config_entry_put(cipinfo->config); + clusterip_config_put(cipinfo->config); } @@ -533,7 +577,7 @@ arp_mangle(unsigned int hook, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip); + c = clusterip_config_find_get(payload->src_ip, 0); if (!c) return NF_ACCEPT; -- cgit v0.10.2 From 136e92bbec0a6d4c2dd1e5b5ac869ab5470547a4 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Fri, 16 Sep 2005 17:00:04 -0700 Subject: [NETFILTER] CLUSTERIP: use a bitmap to store node responsibility data Instead of maintaining an array containing a list of nodes this instance is responsible for let's use a simple bitmap. This provides the following features: * clusterip_responsible() and the add_node()/delete_node() operations become very simple and don't need locking * the config structure is much smaller In spite of the completely different internal data representation the user-space interface remains almost unchanged; the only difference is that the proc file does not list nodes in the order they were added. (The target info structure remains the same.) Signed-off-by: KOVACS Krisztian Signed-off-by: Harald Welte Signed-off-by: David S. Miller diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index adbf4d7..9bcb398 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,7 @@ #include #include -#define CLUSTERIP_VERSION "0.7" +#define CLUSTERIP_VERSION "0.8" #define DEBUG_CLUSTERIP @@ -56,8 +57,7 @@ struct clusterip_config { u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - u_int16_t num_local_nodes; /* number of local nodes */ - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ + unsigned long local_nodes; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -68,8 +68,7 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list _AND_ the configurable - * data within all structurses (num_local_nodes, local_nodes[]) */ +/* clusterip_lock protects the clusterip_configs list */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -156,6 +155,17 @@ clusterip_config_find_get(u_int32_t clusterip, int entry) return c; } +static void +clusterip_config_init_nodelist(struct clusterip_config *c, + const struct ipt_clusterip_tgt_info *i) +{ + int n; + + for (n = 0; n < i->num_local_nodes; n++) { + set_bit(i->local_nodes[n] - 1, &c->local_nodes); + } +} + static struct clusterip_config * clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev) @@ -172,8 +182,7 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - c->num_local_nodes = i->num_local_nodes; - memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); + clusterip_config_init_nodelist(c, i); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); @@ -201,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - if (c->num_local_nodes >= CLUSTERIP_MAX_NODES - || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - - /* check if we alrady have this number in our array */ - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - write_unlock_bh(&clusterip_lock); - return 1; - } - } - c->local_nodes[c->num_local_nodes++] = nodenum; + /* check if we already have this number in our bitfield */ + if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + return 1; - write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - int i; - - write_lock_bh(&clusterip_lock); - - if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { - write_unlock_bh(&clusterip_lock); + if (nodenum == 0 || + nodenum > c->num_total_nodes) return 1; - } - for (i = 0; i < c->num_local_nodes; i++) { - if (c->local_nodes[i] == nodenum) { - int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); - memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); - c->num_local_nodes--; - write_unlock_bh(&clusterip_lock); - return 0; - } - } + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) + return 0; - write_unlock_bh(&clusterip_lock); return 1; } @@ -315,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - int i; - - read_lock_bh(&clusterip_lock); - - if (config->num_local_nodes == 0) { - read_unlock_bh(&clusterip_lock); - return 0; - } - - for (i = 0; i < config->num_local_nodes; i++) { - if (config->local_nodes[i] == hash) { - read_unlock_bh(&clusterip_lock); - return 1; - } - } - - read_unlock_bh(&clusterip_lock); - - return 0; + return test_bit(hash - 1, &config->local_nodes); } /*********************************************************************** @@ -618,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS +struct clusterip_seq_position { + unsigned int pos; /* position */ + unsigned int weight; /* number of bits set == size */ + unsigned int bit; /* current bit */ + unsigned long val; /* current value */ +}; + static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int *nodeidx; - - read_lock_bh(&clusterip_lock); - if (*pos >= c->num_local_nodes) + unsigned int weight; + u_int32_t local_nodes; + struct clusterip_seq_position *idx; + + /* FIXME: possible race */ + local_nodes = c->local_nodes; + weight = hweight32(local_nodes); + if (*pos >= weight) return NULL; - nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); - if (!nodeidx) + idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); + if (!idx) return ERR_PTR(-ENOMEM); - *nodeidx = *pos; - return nodeidx; + idx->pos = *pos; + idx->weight = weight; + idx->bit = ffs(local_nodes); + idx->val = local_nodes; + clear_bit(idx->bit - 1, &idx->val); + + return idx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - *pos = ++(*nodeidx); - if (*pos >= c->num_local_nodes) { + *pos = ++idx->pos; + if (*pos >= idx->weight) { kfree(v); return NULL; } - return nodeidx; + idx->bit = ffs(idx->val); + clear_bit(idx->bit - 1, &idx->val); + return idx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); - - read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; - unsigned int *nodeidx = (unsigned int *)v; + struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; - if (*nodeidx != 0) + if (idx->pos != 0) seq_putc(s, ','); - seq_printf(s, "%u", c->local_nodes[*nodeidx]); - if (*nodeidx == c->num_local_nodes-1) + seq_printf(s, "%u", idx->bit); + + if (idx->pos == idx->weight - 1) seq_putc(s, '\n'); return 0; -- cgit v0.10.2 From a8f39143ac67ffa2e26ce48aaac6bf5dc7dae95f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 16 Sep 2005 17:00:38 -0700 Subject: [NETFILTER]: Fix oops in conntrack event cache ip_ct_refresh_acct() can be called without a valid "skb" pointer. This used to work, since ct_add_counters() deals with that fact. However, the recently-added event cache doesn't handle this at all. This patch is a quick fix that is supposed to be replaced soon by a cleaner solution during the pending redesign of the event cache. Signed-off-by: Harald Welte Signed-off-by: David S. Miller diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 19cba16..f8cd8e4 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - ip_conntrack_event_cache(IPCT_REFRESH, skb); + /* FIXME: We loose some REFRESH events if this function + * is called without an skb. I'll fix this later -HW */ + if (skb) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); -- cgit v0.10.2 From 22abe310bc4b0c684fd3716af6b6116ff1011707 Mon Sep 17 00:00:00 2001 From: Peter Hagervall Date: Fri, 16 Sep 2005 17:01:03 -0700 Subject: [TG3]: Sparse fixes for tg3 Change 0 to NULL where appropriate. Signed-off-by: Peter Hagervall Acked-by: Jeff Garzik Signed-off-by: David S. Miller diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 52eae9f..06b02c5 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -9534,7 +9534,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->write32_rx_mbox = tg3_write_indirect_mbox; iounmap(tp->regs); - tp->regs = 0; + tp->regs = NULL; pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); pci_cmd &= ~PCI_COMMAND_MEMORY; @@ -10682,7 +10682,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, err_out_iounmap: if (tp->regs) { iounmap(tp->regs); - tp->regs = 0; + tp->regs = NULL; } err_out_free_dev: @@ -10707,7 +10707,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) unregister_netdev(dev); if (tp->regs) { iounmap(tp->regs); - tp->regs = 0; + tp->regs = NULL; } free_netdev(dev); pci_release_regions(pdev); -- cgit v0.10.2 From 777ed97f3e549832845d70dcae96cb36c41a543a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 17 Sep 2005 00:41:02 -0700 Subject: [NETFILTER] Fix Kconfig dependencies for nfnetlink/ctnetlink Signed-off-by: Harald Welte Signed-off-by: David S. Miller diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 30aa8e2..85190de 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration" # connection tracking, helpers and protocols config IP_NF_CONNTRACK tristate "Connection tracking (required for masq/NAT)" + select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS IF unsure, say `N'. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + default IP_NF_CONNTRACK if NETFILTER_NETLINK=y + default m if NETFILTER_NETLINK=m + help + This option enables support for a netlink-based userspace interface + + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface' - depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - help - This option enables support for a netlink-based userspace interface - endmenu -- cgit v0.10.2 From 9eb0eec74d9425f4376df653304783339a027d79 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 17 Sep 2005 00:41:21 -0700 Subject: [NETFILTER] move nfnetlink options to right location in kconfig menu Signed-off-by: Harald Welte Signed-off-by: David S. Miller diff --git a/net/Kconfig b/net/Kconfig index 2bdd562..60f6f32 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,6 +140,7 @@ config BRIDGE_NETFILTER If unsure, say N. +source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" @@ -206,8 +207,6 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. -source "net/netfilter/Kconfig" - endmenu endmenu -- cgit v0.10.2 From eb8edb085716621605cc2e7131a6369d2223d992 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 17 Sep 2005 00:42:26 -0700 Subject: [DCCP]: Add MAINTAINERS and CREDITS entries Also remove the SPX entry in MAINTAINERS, forgot to do that when I removed it. Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller diff --git a/CREDITS b/CREDITS index f553f8c..a347520 100644 --- a/CREDITS +++ b/CREDITS @@ -2211,6 +2211,15 @@ D: OV511 driver S: (address available on request) S: USA +N: Ian McDonald +E: iam4@cs.waikato.ac.nz +E: imcdnzl@gmail.com +W: http://wand.net.nz/~iam4 +W: http://imcdnzl.blogspot.com +D: DCCP, CCID3 +S: Hamilton +S: New Zealand + N: Patrick McHardy E: kaber@trash.net P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 @@ -2246,19 +2255,12 @@ S: D-90453 Nuernberg S: Germany N: Arnaldo Carvalho de Melo -E: acme@conectiva.com.br -E: acme@kernel.org -E: acme@gnu.org -W: http://bazar2.conectiva.com.br/~acme -W: http://advogato.org/person/acme +E: acme@mandriva.com +E: acme@ghostprotocols.net +W: http://oops.ghostprotocols.net:81/blog/ P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 -D: wanrouter hacking -D: misc Makefile, Config.in, drivers and network stacks fixes -D: IPX & LLC network stacks maintainer -D: Cyclom 2X synchronous card driver -D: wl3501 PCMCIA wireless card driver -D: i18n for minicom, net-tools, util-linux, fetchmail, etc -S: Conectiva S.A. +D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks +S: Mandriva S: R. Tocantins, 89 - Cristo Rei S: 80050-430 - Curitiba - Paraná S: Brazil diff --git a/MAINTAINERS b/MAINTAINERS index d1e0eb4..dc8f3ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -686,6 +686,13 @@ P: Guennadi Liakhovetski M: g.liakhovetski@gmx.de S: Maintained +DCCP PROTOCOL +P: Arnaldo Carvalho de Melo +M: acme@mandriva.com +L: dccp@vger.kernel.org +W: http://www.wlug.org.nz/DCCP +S: Maintained + DECnet NETWORK LAYER P: Patrick Caulfield M: patrick@tykepenguin.com @@ -2271,12 +2278,6 @@ M: R.E.Wolff@BitWizard.nl L: linux-kernel@vger.kernel.org ? S: Supported -SPX NETWORK LAYER -P: Jay Schulist -M: jschlst@samba.org -L: netdev@vger.kernel.org -S: Supported - SRM (Alpha) environment access P: Jan-Benedict Glaw M: jbglaw@lug-owl.de -- cgit v0.10.2 From c58ec93245a1fb7354f9e331960380827b9f41db Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 17 Sep 2005 00:46:27 -0700 Subject: [TG3]: Fix 4GB boundary tx handling Fix and simplify the workaround code for the 4GB boundary tx buffer hardware bug. 1. Need to unmap the original SKB's dma addresses if a new SKB cannot be allocated. 2. Need to pass the base flag to tigon3_4gb_hwbug_workaround() or TSO won't work properly. 3. The guilty entry and length parameters for tigon3_4gb_hwbug_workaround() are removed as they are not necessary. 4. Remove assumption that only one fragment can hit the 4GB boundary. Another fragment can hit 8GB for example. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 06b02c5..81f4aed 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3442,31 +3442,47 @@ static void tg3_tx_timeout(struct net_device *dev) schedule_work(&tp->reset_task); } +/* Test for DMA buffers crossing any 4GB boundaries: 4G, 8G, etc */ +static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) +{ + u32 base = (u32) mapping & 0xffffffff; + + return ((base > 0xffffdcc0) && + (base + len + 8 < base)); +} + static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32); static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, - u32 guilty_entry, int guilty_len, - u32 last_plus_one, u32 *start, u32 mss) + u32 last_plus_one, u32 *start, + u32 base_flags, u32 mss) { struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC); - dma_addr_t new_addr; + dma_addr_t new_addr = 0; u32 entry = *start; - int i; + int i, ret = 0; if (!new_skb) { - dev_kfree_skb(skb); - return -1; + ret = -1; + } else { + /* New SKB is guaranteed to be linear. */ + entry = *start; + new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len, + PCI_DMA_TODEVICE); + /* Make sure new skb does not cross any 4G boundaries. + * Drop the packet if it does. + */ + if (tg3_4g_overflow_test(new_addr, new_skb->len)) { + ret = -1; + dev_kfree_skb(new_skb); + new_skb = NULL; + } else { + tg3_set_txd(tp, entry, new_addr, new_skb->len, + base_flags, 1 | (mss << 1)); + *start = NEXT_TX(entry); + } } - /* New SKB is guaranteed to be linear. */ - entry = *start; - new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len, - PCI_DMA_TODEVICE); - tg3_set_txd(tp, entry, new_addr, new_skb->len, - (skb->ip_summed == CHECKSUM_HW) ? - TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1)); - *start = NEXT_TX(entry); - /* Now clean up the sw ring entries. */ i = 0; while (entry != last_plus_one) { @@ -3491,7 +3507,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, dev_kfree_skb(skb); - return 0; + return ret; } static void tg3_set_txd(struct tg3 *tp, int entry, @@ -3517,19 +3533,10 @@ static void tg3_set_txd(struct tg3 *tp, int entry, txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; } -static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) -{ - u32 base = (u32) mapping & 0xffffffff; - - return ((base > 0xffffdcc0) && - (base + len + 8 < base)); -} - static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); dma_addr_t mapping; - unsigned int i; u32 len, entry, base_flags, mss; int would_hit_hwbug; @@ -3624,7 +3631,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) would_hit_hwbug = 0; if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = entry + 1; + would_hit_hwbug = 1; tg3_set_txd(tp, entry, mapping, len, base_flags, (skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); @@ -3648,12 +3655,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) tp->tx_buffers[entry].skb = NULL; pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping); - if (tg3_4g_overflow_test(mapping, len)) { - /* Only one should match. */ - if (would_hit_hwbug) - BUG(); - would_hit_hwbug = entry + 1; - } + if (tg3_4g_overflow_test(mapping, len)) + would_hit_hwbug = 1; if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) tg3_set_txd(tp, entry, mapping, len, @@ -3669,34 +3672,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (would_hit_hwbug) { u32 last_plus_one = entry; u32 start; - unsigned int len = 0; - - would_hit_hwbug -= 1; - entry = entry - 1 - skb_shinfo(skb)->nr_frags; - entry &= (TG3_TX_RING_SIZE - 1); - start = entry; - i = 0; - while (entry != last_plus_one) { - if (i == 0) - len = skb_headlen(skb); - else - len = skb_shinfo(skb)->frags[i-1].size; - - if (entry == would_hit_hwbug) - break; - i++; - entry = NEXT_TX(entry); - - } + start = entry - 1 - skb_shinfo(skb)->nr_frags; + start &= (TG3_TX_RING_SIZE - 1); /* If the workaround fails due to memory/mapping * failure, silently drop this packet. */ - if (tigon3_4gb_hwbug_workaround(tp, skb, - entry, len, - last_plus_one, - &start, mss)) + if (tigon3_4gb_hwbug_workaround(tp, skb, last_plus_one, + &start, base_flags, mss)) goto out_unlock; entry = start; -- cgit v0.10.2