From 2e290f43ddb2331db2e308da206fe154bec91a7d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 18 May 2007 15:11:01 +1000 Subject: [CRYPTO] Kconfig: Use menuconfig objects Use menuconfigs instead of menus, so the whole menu can be disabled at once instead of going through all options. Signed-off-by: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Herbert Xu diff --git a/crypto/Kconfig b/crypto/Kconfig index 4ca0ab3..935301e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -2,9 +2,7 @@ # Cryptographic API Configuration # -menu "Cryptographic options" - -config CRYPTO +menuconfig CRYPTO bool "Cryptographic API" help This option provides the core Cryptographic API. @@ -463,5 +461,3 @@ config CRYPTO_TEST source "drivers/crypto/Kconfig" endif # if CRYPTO - -endmenu -- cgit v0.10.2 From fe3c5206adc5d7395828185ab73e9a522655b984 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 19 May 2007 17:51:40 +1000 Subject: [CRYPTO] api: Wake up all waiters when larval completes Right now when a larval matures or when it dies of an error we only wake up one waiter. This would cause other waiters to timeout unnecessarily. This patch changes it to use complete_all to wake up all waiters. Signed-off-by: Herbert Xu diff --git a/crypto/algapi.c b/crypto/algapi.c index f137a432..38aa9e99 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -34,7 +34,7 @@ void crypto_larval_error(const char *name, u32 type, u32 mask) if (alg) { if (crypto_is_larval(alg)) { struct crypto_larval *larval = (void *)alg; - complete(&larval->completion); + complete_all(&larval->completion); } crypto_mod_put(alg); } @@ -164,7 +164,7 @@ static int __crypto_register_alg(struct crypto_alg *alg, continue; larval->adult = alg; - complete(&larval->completion); + complete_all(&larval->completion); continue; } diff --git a/crypto/api.c b/crypto/api.c index 33734fd..4ccc5af 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -144,7 +144,7 @@ static void crypto_larval_kill(struct crypto_alg *alg) down_write(&crypto_alg_sem); list_del(&alg->cra_list); up_write(&crypto_alg_sem); - complete(&larval->completion); + complete_all(&larval->completion); crypto_alg_put(alg); } -- cgit v0.10.2 From ca7c39385ce1a7b44894a4b225a4608624e90730 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Sat, 19 May 2007 19:51:21 +1000 Subject: [CRYPTO] api: Handle unaligned keys in setkey setkey() in {cipher,blkcipher,ablkcipher,hash}.c does not respect the requested alignment by the algorithm. This patch fixes it. The extra memory is allocated by kmalloc() with GFP_ATOMIC flag. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 9348ddd..d45fa16 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -19,16 +19,41 @@ #include #include +static int setkey_unaligned(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int keylen) +{ + struct ablkcipher_alg *cipher = crypto_ablkcipher_alg(tfm); + unsigned long alignmask = crypto_ablkcipher_alignmask(tfm); + int ret; + u8 *buffer, *alignbuffer; + unsigned long absize; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cipher->setkey(tfm, alignbuffer, keylen); + memset(alignbuffer, 0, absize); + kfree(buffer); + return ret; +} + static int setkey(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int keylen) { struct ablkcipher_alg *cipher = crypto_ablkcipher_alg(tfm); + unsigned long alignmask = crypto_ablkcipher_alignmask(tfm); if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } + if ((unsigned long)key & alignmask) + return setkey_unaligned(tfm, key, keylen); + return cipher->setkey(tfm, key, keylen); } diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 8edf40c..40a3dcf 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -336,16 +336,41 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc, return blkcipher_walk_next(desc, walk); } +static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +{ + struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher; + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + int ret; + u8 *buffer, *alignbuffer; + unsigned long absize; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cipher->setkey(tfm, alignbuffer, keylen); + memset(alignbuffer, 0, absize); + kfree(buffer); + return ret; +} + static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct blkcipher_alg *cipher = &tfm->__crt_alg->cra_blkcipher; + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; } + if ((unsigned long)key & alignmask) + return setkey_unaligned(tfm, key, keylen); + return cipher->setkey(tfm, key, keylen); } diff --git a/crypto/cipher.c b/crypto/cipher.c index 333aab2..0b2650c 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -20,16 +20,43 @@ #include #include "internal.h" +static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) +{ + struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher; + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + int ret; + u8 *buffer, *alignbuffer; + unsigned long absize; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = cia->cia_setkey(tfm, alignbuffer, keylen); + memset(alignbuffer, 0, absize); + kfree(buffer); + return ret; + +} + static int setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher; - + unsigned long alignmask = crypto_tfm_alg_alignmask(tfm); + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; - } else - return cia->cia_setkey(tfm, key, keylen); + } + + if ((unsigned long)key & alignmask) + return setkey_unaligned(tfm, key, keylen); + + return cia->cia_setkey(tfm, key, keylen); } static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *, diff --git a/crypto/hash.c b/crypto/hash.c index 4ccd22d..4d75ca7 100644 --- a/crypto/hash.c +++ b/crypto/hash.c @@ -22,6 +22,42 @@ static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type, return alg->cra_ctxsize; } +static int hash_setkey_unaligned(struct crypto_hash *crt, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_hash_tfm(crt); + struct hash_alg *alg = &tfm->__crt_alg->cra_hash; + unsigned long alignmask = crypto_hash_alignmask(crt); + int ret; + u8 *buffer, *alignbuffer; + unsigned long absize; + + absize = keylen + alignmask; + buffer = kmalloc(absize, GFP_ATOMIC); + if (!buffer) + return -ENOMEM; + + alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); + memcpy(alignbuffer, key, keylen); + ret = alg->setkey(crt, alignbuffer, keylen); + memset(alignbuffer, 0, absize); + kfree(buffer); + return ret; +} + +static int hash_setkey(struct crypto_hash *crt, const u8 *key, + unsigned int keylen) +{ + struct crypto_tfm *tfm = crypto_hash_tfm(crt); + struct hash_alg *alg = &tfm->__crt_alg->cra_hash; + unsigned long alignmask = crypto_hash_alignmask(crt); + + if ((unsigned long)key & alignmask) + return hash_setkey_unaligned(crt, key, keylen); + + return alg->setkey(crt, key, keylen); +} + static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) { struct hash_tfm *crt = &tfm->crt_hash; @@ -34,7 +70,7 @@ static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask) crt->update = alg->update; crt->final = alg->final; crt->digest = alg->digest; - crt->setkey = alg->setkey; + crt->setkey = hash_setkey; crt->digestsize = alg->digestsize; return 0; -- cgit v0.10.2 From e69ff734e15eb7f61621f8764ce0a2181823a737 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 8 Jun 2007 16:26:08 +1000 Subject: [CRYPTO] cipher: Remove obsolete fields from cipher_tfm This removes all the unused block cipher fields from cipher_tfm. Signed-off-by: Herbert Xu diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 0de7e2a..357e8cf 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -295,28 +295,8 @@ struct blkcipher_tfm { }; struct cipher_tfm { - void *cit_iv; - unsigned int cit_ivsize; - u32 cit_mode; int (*cit_setkey)(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen); - int (*cit_encrypt)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - int (*cit_encrypt_iv)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes, u8 *iv); - int (*cit_decrypt)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - int (*cit_decrypt_iv)(struct crypto_tfm *tfm, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes, u8 *iv); - void (*cit_xor_block)(u8 *dst, const u8 *src); void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -- cgit v0.10.2 From e559e91cce3af215d78b7262360f19b95978aab3 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Fri, 22 Jun 2007 19:47:35 +0800 Subject: [CRYPTO] api: Allow ablkcipher with no queues Evgeniy's hifn driver and probably mine don't use ablkcipher->queue at all. The show method of ablkcipher will access this field without checking if it is valid. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index d45fa16..1c166b4 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -91,8 +91,10 @@ static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "min keysize : %u\n", ablkcipher->min_keysize); seq_printf(m, "max keysize : %u\n", ablkcipher->max_keysize); seq_printf(m, "ivsize : %u\n", ablkcipher->ivsize); - seq_printf(m, "qlen : %u\n", ablkcipher->queue->qlen); - seq_printf(m, "max qlen : %u\n", ablkcipher->queue->max_qlen); + if (ablkcipher->queue) { + seq_printf(m, "qlen : %u\n", ablkcipher->queue->qlen); + seq_printf(m, "max qlen : %u\n", ablkcipher->queue->max_qlen); + } } const struct crypto_type crypto_ablkcipher_type = { -- cgit v0.10.2 From 08d1f2155cd5b21bb3848f46d9747afb1ccd249d Mon Sep 17 00:00:00 2001 From: Andy Green Date: Tue, 10 Jul 2007 19:29:37 +0200 Subject: [PATCH] mac80211: Monitor mode radiotap injection docs Add monitor mode radiotap injection docs. Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt new file mode 100644 index 0000000..53ef7a0 --- /dev/null +++ b/Documentation/networking/mac80211-injection.txt @@ -0,0 +1,59 @@ +How to use packet injection with mac80211 +========================================= + +mac80211 now allows arbitrary packets to be injected down any Monitor Mode +interface from userland. The packet you inject needs to be composed in the +following format: + + [ radiotap header ] + [ ieee80211 header ] + [ payload ] + +The radiotap format is discussed in +./Documentation/networking/radiotap-headers.txt. + +Despite 13 radiotap argument types are currently defined, most only make sense +to appear on received packets. Currently three kinds of argument are used by +the injection code, although it knows to skip any other arguments that are +present (facilitating replay of captured radiotap headers directly): + + - IEEE80211_RADIOTAP_RATE - u8 arg in 500kbps units (0x02 --> 1Mbps) + + - IEEE80211_RADIOTAP_ANTENNA - u8 arg, 0x00 = ant1, 0x01 = ant2 + + - IEEE80211_RADIOTAP_DBM_TX_POWER - u8 arg, dBm + +Here is an example valid radiotap header defining these three parameters + + 0x00, 0x00, // <-- radiotap version + 0x0b, 0x00, // <- radiotap header length + 0x04, 0x0c, 0x00, 0x00, // <-- bitmap + 0x6c, // <-- rate + 0x0c, //<-- tx power + 0x01 //<-- antenna + +The ieee80211 header follows immediately afterwards, looking for example like +this: + + 0x08, 0x01, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, + 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, + 0x10, 0x86 + +Then lastly there is the payload. + +After composing the packet contents, it is sent by send()-ing it to a logical +mac80211 interface that is in Monitor mode. Libpcap can also be used, +(which is easier than doing the work to bind the socket to the right +interface), along the following lines: + + ppcap = pcap_open_live(szInterfaceName, 800, 1, 20, szErrbuf); +... + r = pcap_inject(ppcap, u8aSendBuffer, nLength); + +You can also find sources for a complete inject test applet here: + +http://penumbra.warmcat.com/_twk/tiki-index.php?page=packetspammer + +Andy Green diff --git a/Documentation/networking/radiotap-headers.txt b/Documentation/networking/radiotap-headers.txt new file mode 100644 index 0000000..e29e027 --- /dev/null +++ b/Documentation/networking/radiotap-headers.txt @@ -0,0 +1,87 @@ +How to use radiotap headers +=========================== + +Pointer to the radiotap include file +------------------------------------ + +Radiotap headers are variable-length and extensible, you can get most of the +information you need to know on them from: + +./include/net/ieee80211_radiotap.h + +This document gives an overview and warns on some corner cases. + + +Structure of the header +----------------------- + +There is a fixed portion at the start which contains a u32 bitmap that defines +if the possible argument associated with that bit is present or not. So if b0 +of the it_present member of ieee80211_radiotap_header is set, it means that +the header for argument index 0 (IEEE80211_RADIOTAP_TSFT) is present in the +argument area. + + < 8-byte ieee80211_radiotap_header > + [ ] + [ ... ] + +At the moment there are only 13 possible argument indexes defined, but in case +we run out of space in the u32 it_present member, it is defined that b31 set +indicates that there is another u32 bitmap following (shown as "possible +argument bitmap extensions..." above), and the start of the arguments is moved +forward 4 bytes each time. + +Note also that the it_len member __le16 is set to the total number of bytes +covered by the ieee80211_radiotap_header and any arguments following. + + +Requirements for arguments +-------------------------- + +After the fixed part of the header, the arguments follow for each argument +index whose matching bit is set in the it_present member of +ieee80211_radiotap_header. + + - the arguments are all stored little-endian! + + - the argument payload for a given argument index has a fixed size. So + IEEE80211_RADIOTAP_TSFT being present always indicates an 8-byte argument is + present. See the comments in ./include/net/ieee80211_radiotap.h for a nice + breakdown of all the argument sizes + + - the arguments must be aligned to a boundary of the argument size using + padding. So a u16 argument must start on the next u16 boundary if it isn't + already on one, a u32 must start on the next u32 boundary and so on. + + - "alignment" is relative to the start of the ieee80211_radiotap_header, ie, + the first byte of the radiotap header. The absolute alignment of that first + byte isn't defined. So even if the whole radiotap header is starting at, eg, + address 0x00000003, still the first byte of the radiotap header is treated as + 0 for alignment purposes. + + - the above point that there may be no absolute alignment for multibyte + entities in the fixed radiotap header or the argument region means that you + have to take special evasive action when trying to access these multibyte + entities. Some arches like Blackfin cannot deal with an attempt to + dereference, eg, a u16 pointer that is pointing to an odd address. Instead + you have to use a kernel API get_unaligned() to dereference the pointer, + which will do it bytewise on the arches that require that. + + - The arguments for a given argument index can be a compound of multiple types + together. For example IEEE80211_RADIOTAP_CHANNEL has an argument payload + consisting of two u16s of total length 4. When this happens, the padding + rule is applied dealing with a u16, NOT dealing with a 4-byte single entity. + + +Example valid radiotap header +----------------------------- + + 0x00, 0x00, // <-- radiotap version + pad byte + 0x0b, 0x00, // <- radiotap header length + 0x04, 0x0c, 0x00, 0x00, // <-- bitmap + 0x6c, // <-- rate (in 500kHz units) + 0x0c, //<-- tx power + 0x01 //<-- antenna + + +Andy Green -- cgit v0.10.2 From 179f831bc33104d14deb54a52b7a8b43433f8ccc Mon Sep 17 00:00:00 2001 From: Andy Green Date: Tue, 10 Jul 2007 19:29:38 +0200 Subject: [PATCH] cfg80211: Radiotap parser Generic code to walk through the fields in a radiotap header, accounting for nasties like extended "field present" bitfields and alignment rules Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/Documentation/networking/radiotap-headers.txt b/Documentation/networking/radiotap-headers.txt index e29e027..953331c 100644 --- a/Documentation/networking/radiotap-headers.txt +++ b/Documentation/networking/radiotap-headers.txt @@ -84,4 +84,69 @@ Example valid radiotap header 0x01 //<-- antenna +Using the Radiotap Parser +------------------------- + +If you are having to parse a radiotap struct, you can radically simplify the +job by using the radiotap parser that lives in net/wireless/radiotap.c and has +its prototypes available in include/net/cfg80211.h. You use it like this: + +#include + +/* buf points to the start of the radiotap header part */ + +int MyFunction(u8 * buf, int buflen) +{ + int pkt_rate_100kHz = 0, antenna = 0, pwr = 0; + struct ieee80211_radiotap_iterator iterator; + int ret = ieee80211_radiotap_iterator_init(&iterator, buf, buflen); + + while (!ret) { + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* radiotap "rate" u8 is in + * 500kbps units, eg, 0x02=1Mbps + */ + pkt_rate_100kHz = (*iterator.this_arg) * 5; + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* radiotap uses 0 for 1st ant */ + antenna = *iterator.this_arg); + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + pwr = *iterator.this_arg; + break; + + default: + break; + } + } /* while more rt headers */ + + if (ret != -ENOENT) + return TXRX_DROP; + + /* discard the radiotap header part */ + buf += iterator.max_length; + buflen -= iterator.max_length; + + ... + +} + Andy Green diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 88171f8..7edaef6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -11,6 +11,44 @@ * Copyright 2006 Johannes Berg */ + +/* Radiotap header iteration + * implemented in net/wireless/radiotap.c + * docs in Documentation/networking/radiotap-headers.txt + */ +/** + * struct ieee80211_radiotap_iterator - tracks walk thru present radiotap args + * @rtheader: pointer to the radiotap header we are walking through + * @max_length: length of radiotap header in cpu byte ordering + * @this_arg_index: IEEE80211_RADIOTAP_... index of current arg + * @this_arg: pointer to current radiotap arg + * @arg_index: internal next argument index + * @arg: internal next argument pointer + * @next_bitmap: internal pointer to next present u32 + * @bitmap_shifter: internal shifter for curr u32 bitmap, b0 set == arg present + */ + +struct ieee80211_radiotap_iterator { + struct ieee80211_radiotap_header *rtheader; + int max_length; + int this_arg_index; + u8 *this_arg; + + int arg_index; + u8 *arg; + __le32 *next_bitmap; + u32 bitmap_shifter; +}; + +extern int ieee80211_radiotap_iterator_init( + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length); + +extern int ieee80211_radiotap_iterator_next( + struct ieee80211_radiotap_iterator *iterator); + + /* from net/wireless.h */ struct wiphy; diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 3a96ae6..092116e 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o -cfg80211-y += core.o sysfs.o +cfg80211-y += core.o sysfs.o radiotap.o diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c new file mode 100644 index 0000000..68c11d0 --- /dev/null +++ b/net/wireless/radiotap.c @@ -0,0 +1,257 @@ +/* + * Radiotap parser + * + * Copyright 2007 Andy Green + */ + +#include +#include +#include + +/* function prototypes and related defs are in include/net/cfg80211.h */ + +/** + * ieee80211_radiotap_iterator_init - radiotap parser iterator initialization + * @iterator: radiotap_iterator to initialize + * @radiotap_header: radiotap header to parse + * @max_length: total length we can parse into (eg, whole packet length) + * + * Returns: 0 or a negative error code if there is a problem. + * + * This function initializes an opaque iterator struct which can then + * be passed to ieee80211_radiotap_iterator_next() to visit every radiotap + * argument which is present in the header. It knows about extended + * present headers and handles them. + * + * How to use: + * call __ieee80211_radiotap_iterator_init() to init a semi-opaque iterator + * struct ieee80211_radiotap_iterator (no need to init the struct beforehand) + * checking for a good 0 return code. Then loop calling + * __ieee80211_radiotap_iterator_next()... it returns either 0, + * -ENOENT if there are no more args to parse, or -EINVAL if there is a problem. + * The iterator's @this_arg member points to the start of the argument + * associated with the current argument index that is present, which can be + * found in the iterator's @this_arg_index member. This arg index corresponds + * to the IEEE80211_RADIOTAP_... defines. + * + * Radiotap header length: + * You can find the CPU-endian total radiotap header length in + * iterator->max_length after executing ieee80211_radiotap_iterator_init() + * successfully. + * + * Alignment Gotcha: + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + * + * Example code: + * See Documentation/networking/radiotap-headers.txt + */ + +int ieee80211_radiotap_iterator_init( + struct ieee80211_radiotap_iterator *iterator, + struct ieee80211_radiotap_header *radiotap_header, + int max_length) +{ + /* Linux only supports version 0 radiotap format */ + if (radiotap_header->it_version) + return -EINVAL; + + /* sanity check for allowed length and radiotap length field */ + if (max_length < le16_to_cpu(get_unaligned(&radiotap_header->it_len))) + return -EINVAL; + + iterator->rtheader = radiotap_header; + iterator->max_length = le16_to_cpu(get_unaligned( + &radiotap_header->it_len)); + iterator->arg_index = 0; + iterator->bitmap_shifter = le32_to_cpu(get_unaligned( + &radiotap_header->it_present)); + iterator->arg = (u8 *)radiotap_header + sizeof(*radiotap_header); + iterator->this_arg = NULL; + + /* find payload start allowing for extended bitmap(s) */ + + if (unlikely(iterator->bitmap_shifter & (1<arg)) & + (1<arg += sizeof(u32); + + /* + * check for insanity where the present bitmaps + * keep claiming to extend up to or even beyond the + * stated radiotap header length + */ + + if (((ulong)iterator->arg - + (ulong)iterator->rtheader) > iterator->max_length) + return -EINVAL; + } + + iterator->arg += sizeof(u32); + + /* + * no need to check again for blowing past stated radiotap + * header length, because ieee80211_radiotap_iterator_next + * checks it before it is dereferenced + */ + } + + /* we are all initialized happily */ + + return 0; +} +EXPORT_SYMBOL(ieee80211_radiotap_iterator_init); + + +/** + * ieee80211_radiotap_iterator_next - return next radiotap parser iterator arg + * @iterator: radiotap_iterator to move to next arg (if any) + * + * Returns: 0 if there is an argument to handle, + * -ENOENT if there are no more args or -EINVAL + * if there is something else wrong. + * + * This function provides the next radiotap arg index (IEEE80211_RADIOTAP_*) + * in @this_arg_index and sets @this_arg to point to the + * payload for the field. It takes care of alignment handling and extended + * present fields. @this_arg can be changed by the caller (eg, + * incremented to move inside a compound argument like + * IEEE80211_RADIOTAP_CHANNEL). The args pointed to are in + * little-endian format whatever the endianess of your CPU. + * + * Alignment Gotcha: + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + +int ieee80211_radiotap_iterator_next( + struct ieee80211_radiotap_iterator *iterator) +{ + + /* + * small length lookup table for all radiotap types we heard of + * starting from b0 in the bitmap, so we can walk the payload + * area of the radiotap header + * + * There is a requirement to pad args, so that args + * of a given length must begin at a boundary of that length + * -- but note that compound args are allowed (eg, 2 x u16 + * for IEEE80211_RADIOTAP_CHANNEL) so total arg length is not + * a reliable indicator of alignment requirement. + * + * upper nybble: content alignment for arg + * lower nybble: content length for arg + */ + + static const u8 rt_sizes[] = { + [IEEE80211_RADIOTAP_TSFT] = 0x88, + [IEEE80211_RADIOTAP_FLAGS] = 0x11, + [IEEE80211_RADIOTAP_RATE] = 0x11, + [IEEE80211_RADIOTAP_CHANNEL] = 0x24, + [IEEE80211_RADIOTAP_FHSS] = 0x22, + [IEEE80211_RADIOTAP_DBM_ANTSIGNAL] = 0x11, + [IEEE80211_RADIOTAP_DBM_ANTNOISE] = 0x11, + [IEEE80211_RADIOTAP_LOCK_QUALITY] = 0x22, + [IEEE80211_RADIOTAP_TX_ATTENUATION] = 0x22, + [IEEE80211_RADIOTAP_DB_TX_ATTENUATION] = 0x22, + [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, + [IEEE80211_RADIOTAP_ANTENNA] = 0x11, + [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, + [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11 + /* + * add more here as they are defined in + * include/net/ieee80211_radiotap.h + */ + }; + + /* + * for every radiotap entry we can at + * least skip (by knowing the length)... + */ + + while (iterator->arg_index < sizeof(rt_sizes)) { + int hit = 0; + int pad; + + if (!(iterator->bitmap_shifter & 1)) + goto next_entry; /* arg not present */ + + /* + * arg is present, account for alignment padding + * 8-bit args can be at any alignment + * 16-bit args must start on 16-bit boundary + * 32-bit args must start on 32-bit boundary + * 64-bit args must start on 64-bit boundary + * + * note that total arg size can differ from alignment of + * elements inside arg, so we use upper nybble of length + * table to base alignment on + * + * also note: these alignments are ** relative to the + * start of the radiotap header **. There is no guarantee + * that the radiotap header itself is aligned on any + * kind of boundary. + * + * the above is why get_unaligned() is used to dereference + * multibyte elements from the radiotap area + */ + + pad = (((ulong)iterator->arg) - + ((ulong)iterator->rtheader)) & + ((rt_sizes[iterator->arg_index] >> 4) - 1); + + if (pad) + iterator->arg += + (rt_sizes[iterator->arg_index] >> 4) - pad; + + /* + * this is what we will return to user, but we need to + * move on first so next call has something fresh to test + */ + iterator->this_arg_index = iterator->arg_index; + iterator->this_arg = iterator->arg; + hit = 1; + + /* internally move on the size of this arg */ + iterator->arg += rt_sizes[iterator->arg_index] & 0x0f; + + /* + * check for insanity where we are given a bitmap that + * claims to have more arg content than the length of the + * radiotap section. We will normally end up equalling this + * max_length on the last arg, never exceeding it. + */ + + if (((ulong)iterator->arg - (ulong)iterator->rtheader) > + iterator->max_length) + return -EINVAL; + + next_entry: + iterator->arg_index++; + if (unlikely((iterator->arg_index & 31) == 0)) { + /* completed current u32 bitmap */ + if (iterator->bitmap_shifter & 1) { + /* b31 was set, there is more */ + /* move to next u32 bitmap */ + iterator->bitmap_shifter = le32_to_cpu( + get_unaligned(iterator->next_bitmap)); + iterator->next_bitmap++; + } else + /* no more bitmaps: end */ + iterator->arg_index = sizeof(rt_sizes); + } else /* just try the next bit */ + iterator->bitmap_shifter >>= 1; + + /* if we found a valid arg earlier, return it now */ + if (hit) + return 0; + } + + /* we don't know how to handle any more args, we're done */ + return -ENOENT; +} +EXPORT_SYMBOL(ieee80211_radiotap_iterator_next); -- cgit v0.10.2 From e4c967c6d88ca94365dd8e2a7bbd22eedb8d7ae7 Mon Sep 17 00:00:00 2001 From: Andy Green Date: Tue, 10 Jul 2007 19:32:07 +0200 Subject: [PATCH] mac80211: Monitor mode radiotap-based packet injection Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 4e84f24..8b57eaa 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ieee80211_common.h" #include "ieee80211_i.h" @@ -1118,7 +1119,138 @@ ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) } -static void inline +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ + +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap( + struct ieee80211_txrx_data *tx, + struct sk_buff *skb, struct ieee80211_tx_control *control) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + + /* + * default control situation for all injected packets + * FIXME: this does not suit all usage cases, expand to allow control + */ + + control->retry_limit = 1; /* no retry */ + control->key_idx = -1; /* no encryption key */ + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT); + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | + IEEE80211_TXCTL_NO_ACK; + control->antenna_sel_tx = 0; /* default to default antenna */ + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate > target_rate) + continue; + + control->rate = r; + + if (r->flags & IEEE80211_RATE_PREAMBLE2) + control->tx_rate = r->val2; + else + control->tx_rate = r->val; + + /* end on exact match */ + if (r->rate == target_rate) + i = mode->num_rates; + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + break; + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result inline __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, struct sk_buff *skb, struct net_device *dev, @@ -1126,6 +1258,9 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + int hdrlen; memset(tx, 0, sizeof(*tx)); @@ -1135,7 +1270,32 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); tx->sta = sta_info_get(local, hdr->addr1); tx->fc = le16_to_cpu(hdr->frame_control); + + /* + * set defaults for things that can be set by + * injected radiotap headers + */ control->power_level = local->hw.conf.power_level; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) + control->antenna_sel_tx = tx->sta->antenna_sel_tx; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb, control) == + TXRX_DROP) { + return TXRX_DROP; + } + /* + * we removed the radiotap header after this point, + * we filled control with what we could use + * set to the actual ieee header now + */ + hdr = (struct ieee80211_hdr *) skb->data; + res = TXRX_QUEUED; /* indication it was monitor packet */ + } + tx->u.tx.control = control; tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); if (is_multicast_ether_addr(hdr->addr1)) @@ -1152,9 +1312,6 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; tx->sta->clear_dst_mask = 0; } - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; hdrlen = ieee80211_get_hdrlen(tx->fc); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; @@ -1162,6 +1319,7 @@ __ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, } control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + return res; } static int inline is_ieee80211_device(struct net_device *dev, @@ -1274,7 +1432,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, struct sta_info *sta; ieee80211_tx_handler *handler; struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; int ret, i; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1284,15 +1442,26 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, return 0; } - __ieee80211_tx_prepare(&tx, skb, dev, control); + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + sta = tx.sta; tx.u.tx.mgmt_interface = mgmt; tx.u.tx.mode = local->hw.conf.mode; - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; + if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ + res = TXRX_CONTINUE; + } else { + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } } skb = tx.skb; /* handlers are allowed to change skb */ @@ -1531,6 +1700,51 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len; + + /* + * there must be a radiotap header at the + * start in this case + */ + if (unlikely(prthdr->it_version)) { + /* only version 0 is supported */ + ret = 0; + goto fail; + } + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + pkt_data->ifindex = sdata->dev->ifindex; + pkt_data->mgmt_iface = 0; + pkt_data->do_not_encrypt = 1; + + /* above needed because we set skb device to master */ + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + len = le16_to_cpu(get_unaligned(&prthdr->it_len)); + skb_set_mac_header(skb, len); + skb_set_network_header(skb, len + sizeof(hdr)); + skb_set_transport_header(skb, len + sizeof(hdr)); + + /* + * pass the radiotap header up to + * the next stage intact + */ + dev_queue_xmit(skb); + + return 0; + } + nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; -- cgit v0.10.2 From b306f45300866adc01b84f7aa083bfcd9cbb89c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: show transmitted frames on monitor interfaces This patch makes mac80211 show transmitted frames on monitor interfaces, including radiotap headers that indicate some transmission parameters. The shown parameters will need to be expanded, but this should work as a basis to work from. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 8b57eaa..85f23fd 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -57,6 +57,17 @@ static const unsigned char eapol_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; +/* + * For seeing transmitted packets on monitor interfaces + * we have a radiotap header too. + */ +struct ieee80211_tx_status_rtap_hdr { + struct ieee80211_radiotap_header hdr; + __le16 tx_flags; + u8 data_retries; +} __attribute__ ((packed)); + + static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr) { @@ -529,7 +540,7 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* reserve enough extra head and tail room for possible * encryption */ frag = frags[i] = - dev_alloc_skb(tx->local->hw.extra_tx_headroom + + dev_alloc_skb(tx->local->tx_headroom + frag_threshold + IEEE80211_ENCRYPT_HEADROOM + IEEE80211_ENCRYPT_TAILROOM); @@ -538,8 +549,8 @@ ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) /* Make sure that all fragments use the same priority so * that they end up using the same TX queue */ frag->priority = first->priority; - skb_reserve(frag, tx->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); memcpy(fhdr, first->data, hdrlen); if (i == num_fragm - 2) @@ -1636,8 +1647,7 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } osdata = IEEE80211_DEV_TO_SUB_IF(odev); - headroom = osdata->local->hw.extra_tx_headroom + - IEEE80211_ENCRYPT_HEADROOM; + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; if (skb_headroom(skb) < headroom) { if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); @@ -1833,7 +1843,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and * alloc_skb() (net/core/skbuff.c) */ - head_need = hdrlen + encaps_len + local->hw.extra_tx_headroom; + head_need = hdrlen + encaps_len + local->tx_headroom; head_need -= skb_headroom(skb); /* We are going to modify skb data, so make a copy of it if happens to @@ -1920,9 +1930,9 @@ ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } - if (skb_headroom(skb) < sdata->local->hw.extra_tx_headroom) { - if (pskb_expand_head(skb, - sdata->local->hw.extra_tx_headroom, 0, GFP_ATOMIC)) { + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { dev_kfree_skb(skb); return 0; } @@ -2061,12 +2071,12 @@ struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, bh_len = ap->beacon_head_len; bt_len = ap->beacon_tail_len; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + bh_len + bt_len + 256 /* maximum TIM len */); if (!skb) return NULL; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); memcpy(skb_put(skb, bh_len), b_head, bh_len); ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); @@ -4498,6 +4508,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_local *local = hw_to_local(hw); u16 frag, type; u32 msg_type; + struct ieee80211_tx_status_rtap_hdr *rthdr; + struct ieee80211_sub_if_data *sdata; + int monitors; if (!status) { printk(KERN_ERR @@ -4609,27 +4622,100 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, local->dot11FailedCount++; } - if (!(status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) - || unlikely(!local->apdev)) { + msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? + ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + + /* this was a transmitted frame, but now we want to reuse it */ + skb_orphan(skb); + + if ((status->control.flags & IEEE80211_TXCTL_REQ_TX_STATUS) && + local->apdev) { + if (local->monitors) { + skb2 = skb_clone(skb, GFP_ATOMIC); + } else { + skb2 = skb; + skb = NULL; + } + + if (skb2) + /* Send frame to hostapd */ + ieee80211_rx_mgmt(local, skb2, NULL, msg_type); + + if (!skb) + return; + } + + if (!local->monitors) { dev_kfree_skb(skb); return; } - msg_type = (status->flags & IEEE80211_TX_STATUS_ACK) ? - ieee80211_msg_tx_callback_ack : ieee80211_msg_tx_callback_fail; + /* send frame to monitor interfaces now */ - /* skb was the original skb used for TX. Clone it and give the clone - * to netif_rx(). Free original skb. */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) { + if (skb_headroom(skb) < sizeof(*rthdr)) { + printk(KERN_ERR "ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } - dev_kfree_skb(skb); - skb = skb2; - /* Send frame to hostapd */ - ieee80211_rx_mgmt(local, skb, NULL, msg_type); + rthdr = (struct ieee80211_tx_status_rtap_hdr*) + skb_push(skb, sizeof(*rthdr)); + + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | + (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + + if (!(status->flags & IEEE80211_TX_STATUS_ACK) && + !is_multicast_ether_addr(hdr->addr1)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); + + if ((status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) && + (status->control.flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); + else if (status->control.flags & IEEE80211_TXCTL_USE_RTS_CTS) + rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + + rthdr->data_retries = status->retry_count; + + read_lock(&local->sub_if_lock); + monitors = local->monitors; + list_for_each_entry(sdata, &local->sub_if_list, list) { + /* + * Using the monitors counter is possibly racy, but + * if the value is wrong we simply either clone the skb + * once too much or forget sending it to one monitor iface + * The latter case isn't nice but fixing the race is much + * more complicated. + */ + if (!monitors || !skb) + goto out; + + if (sdata->type == IEEE80211_IF_TYPE_MNTR) { + if (!netif_running(sdata->dev)) + continue; + monitors--; + if (monitors) + skb2 = skb_clone(skb, GFP_KERNEL); + else + skb2 = NULL; + skb->dev = sdata->dev; + /* XXX: is this sufficient for BPF? */ + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + skb = skb2; + break; + } + } + out: + read_unlock(&local->sub_if_lock); + if (skb) + dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_tx_status); @@ -4926,6 +5012,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_workqueue; } + /* + * The hardware needs headroom for sending the frame, + * and we need some headroom for passing the frame to monitor + * interfaces, but never both at the same time. + */ + local->tx_headroom = max(local->hw.extra_tx_headroom, + sizeof(struct ieee80211_tx_status_rtap_hdr)); + debugfs_hw_add(local); local->hw.conf.beacon_int = 1000; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index af4d14d..5a91e17 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -392,6 +392,7 @@ struct ieee80211_local { int monitors; struct iw_statistics wstats; u8 wstats_flags; + int tx_headroom; /* required headroom for hardware/radiotap */ enum { IEEE80211_DEV_UNINITIALIZED = 0, -- cgit v0.10.2 From 7f8c05982865a32ee001b79ee0bd469f55ac8aba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: remove ieee80211_msg_passive_scan This constant is unused. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index b9a73e7..7af6710 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -47,7 +47,7 @@ enum ieee80211_msg_type { ieee80211_msg_normal = 0, ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, - ieee80211_msg_passive_scan = 3, + /* hole at 3, was ieee80211_msg_passive_scan but unused */ ieee80211_msg_wep_frame_unknown_key = 4, ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ -- cgit v0.10.2 From c59304b5e07128816347fe3996d7952561f60529 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: remove ieee80211_set_aid_for_sta Remove ieee80211_set_aid_for_sta and associated code. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a7f122b..6278857 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -921,12 +921,6 @@ struct sk_buff * ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, struct ieee80211_tx_control *control); -/* Low level drivers that have their own MLME and MAC indicate - * the aid for an associating station with this call */ -int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, - u8 *peer_address, u16 aid); - - /* Given an sk_buff with a raw 802.11 header at the data pointer this function * returns the 802.11 header length in bytes (not including encryption * headers). If the data in the sk_buff is too short to contain a valid 802.11 diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 85f23fd..4bcf180 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -3165,34 +3165,6 @@ int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, } EXPORT_SYMBOL(ieee80211_radar_status); -int ieee80211_set_aid_for_sta(struct ieee80211_hw *hw, u8 *peer_address, - u16 aid) -{ - struct sk_buff *skb; - struct ieee80211_msg_set_aid_for_sta *msg; - struct ieee80211_local *local = hw_to_local(hw); - - /* unlikely because if this event only happens for APs, - * which require an open ap device. */ - if (unlikely(!local->apdev)) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_msg_set_aid_for_sta)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_msg_set_aid_for_sta *) - skb_put(skb, sizeof(struct ieee80211_msg_set_aid_for_sta)); - memcpy(msg->sta_address, peer_address, ETH_ALEN); - msg->aid = aid; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_set_aid_for_sta); - return 0; -} -EXPORT_SYMBOL(ieee80211_set_aid_for_sta); static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) { diff --git a/net/mac80211/ieee80211_common.h b/net/mac80211/ieee80211_common.h index 7af6710..77c6afb 100644 --- a/net/mac80211/ieee80211_common.h +++ b/net/mac80211/ieee80211_common.h @@ -52,16 +52,11 @@ enum ieee80211_msg_type { ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, - ieee80211_msg_set_aid_for_sta = 8 /* used by Intersil MVC driver */, + /* 8 was ieee80211_msg_set_aid_for_sta */ ieee80211_msg_key_threshold_notification = 9, ieee80211_msg_radar = 11, }; -struct ieee80211_msg_set_aid_for_sta { - char sta_address[ETH_ALEN]; - u16 aid; -}; - struct ieee80211_msg_key_notification { int tx_rx_count; char ifname[IFNAMSIZ]; -- cgit v0.10.2 From 333af2f0715c8d4d38cb657d8f4fb7c4e3ceba9f Mon Sep 17 00:00:00 2001 From: Hong Liu Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: add support for iwlist channel Add supported channels info in SIOCGIWRANGE implementation. Signed-off-by: Hong Liu Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 66e8a97..ef74a91 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -345,6 +345,8 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct iw_range *range = (struct iw_range *) extra; + struct ieee80211_hw_mode *mode = NULL; + int c = 0; data->length = sizeof(struct iw_range); memset(range, 0, sizeof(struct iw_range)); @@ -378,6 +380,29 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; + list_for_each_entry(mode, &local->modes_list, list) { + int i = 0; + + if (!(local->enabled_modes & (1 << mode->mode)) || + (local->hw_modes & local->enabled_modes & + (1 << MODE_IEEE80211G) && mode->mode == MODE_IEEE80211B)) + continue; + + while (i < mode->num_channels && c < IW_MAX_FREQUENCIES) { + struct ieee80211_channel *chan = &mode->channels[i]; + + if (chan->flag & IEEE80211_CHAN_W_SCAN) { + range->freq[c].i = chan->chan; + range->freq[c].m = chan->freq * 100000; + range->freq[c].e = 1; + c++; + } + i++; + } + } + range->num_channels = c; + range->num_frequency = c; + IW_EVENT_CAPA_SET_KERNEL(range->event_capa); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); -- cgit v0.10.2 From 40f7cac9f8dd662c1dd02334afdceef0be03e34f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:08 +0200 Subject: [PATCH] mac80211: separate monitor/subif_start_xmit This patch separates the monitor interface start_xmit from the subif start xmit (those other devices have 802.3 framing, monitor interfaces have radiotap framing) Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 4bcf180..e916983 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -1673,6 +1673,56 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, } +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len; + + /* + * there must be a radiotap header at the + * start in this case + */ + if (unlikely(prthdr->it_version)) { + /* only version 0 is supported */ + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + pkt_data->ifindex = dev->ifindex; + pkt_data->mgmt_iface = 0; + pkt_data->do_not_encrypt = 1; + + /* above needed because we set skb device to master */ + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + len = le16_to_cpu(get_unaligned(&prthdr->it_len)); + skb_set_mac_header(skb, len); + skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); + skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); + + /* + * pass the radiotap header up to + * the next stage intact + */ + dev_queue_xmit(skb); + + return NETDEV_TX_OK; +} + + /** * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type * subinterfaces (wlan#, WDS, and VLAN interfaces) @@ -1688,8 +1738,8 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, * encapsulated packet will then be passed to master interface, wlan#.11, for * transmission (through low-level driver). */ -static int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_packet_data *pkt_data; @@ -1710,51 +1760,6 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } - if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; - u16 len; - - /* - * there must be a radiotap header at the - * start in this case - */ - if (unlikely(prthdr->it_version)) { - /* only version 0 is supported */ - ret = 0; - goto fail; - } - - skb->dev = local->mdev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); - pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = 0; - pkt_data->do_not_encrypt = 1; - - /* above needed because we set skb device to master */ - - /* - * fix up the pointers accounting for the radiotap - * header still being in there. We are being given - * a precooked IEEE80211 header so no need for - * normal processing - */ - len = le16_to_cpu(get_unaligned(&prthdr->it_len)); - skb_set_mac_header(skb, len); - skb_set_network_header(skb, len + sizeof(hdr)); - skb_set_transport_header(skb, len + sizeof(hdr)); - - /* - * pass the radiotap header up to - * the next stage intact - */ - dev_queue_xmit(skb); - - return 0; - } - nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; @@ -1882,7 +1887,7 @@ static int ieee80211_subif_start_xmit(struct sk_buff *skb, pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; + pkt_data->ifindex = dev->ifindex; pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); pkt_data->do_not_encrypt = no_encrypt; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5a91e17..fadcbcc 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -720,6 +720,8 @@ void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); void ieee80211_if_mgmt_setup(struct net_device *dev); int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_iface.c b/net/mac80211/ieee80211_iface.c index cf0f32e..8532a5c 100644 --- a/net/mac80211/ieee80211_iface.c +++ b/net/mac80211/ieee80211_iface.c @@ -157,6 +157,8 @@ void ieee80211_if_set_type(struct net_device *dev, int type) struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; + dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->type = type; switch (type) { case IEEE80211_IF_TYPE_WDS: @@ -196,6 +198,7 @@ void ieee80211_if_set_type(struct net_device *dev, int type) } case IEEE80211_IF_TYPE_MNTR: dev->type = ARPHRD_IEEE80211_RADIOTAP; + dev->hard_start_xmit = ieee80211_monitor_start_xmit; break; default: printk(KERN_WARNING "%s: %s: Unknown interface type 0x%x", -- cgit v0.10.2 From 3ef8bed4692a0de6a47d162196c850c5dea85b70 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill rate control ioctls These aren't used anywhere (hostapd, wpa_supplicant) and until we have a proper interface to the rate control algorithms they don't make much sense either since e.g. rc80211_lowest won't honour them. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 34fa128..252204f 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -40,10 +40,6 @@ enum { PRISM2_PARAM_ANTENNA_MODE = 1013, PRISM2_PARAM_STAT_TIME = 1016, PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_FORCE_UNICAST_RATE = 1018, - PRISM2_PARAM_RATE_CTRL_NUM_UP = 1019, - PRISM2_PARAM_RATE_CTRL_NUM_DOWN = 1020, - PRISM2_PARAM_MAX_RATECTRL_RATE = 1021, PRISM2_PARAM_TX_POWER_REDUCTION = 1022, PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index e916983..773a103 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -4924,8 +4924,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->short_retry_limit = 7; local->long_retry_limit = 4; local->hw.conf.radio_enabled = 1; - local->rate_ctrl_num_up = RATE_CONTROL_NUM_UP; - local->rate_ctrl_num_down = RATE_CONTROL_NUM_DOWN; local->enabled_modes = (unsigned int) -1; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fadcbcc..b222a9a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -514,8 +514,6 @@ struct ieee80211_local { STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 } sta_antenna_sel; - int rate_ctrl_num_up, rate_ctrl_num_down; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index ef74a91..f404f1f 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1074,62 +1074,6 @@ static int ieee80211_ioctl_clear_keys(struct net_device *dev) } -static int -ieee80211_ioctl_force_unicast_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.force_unicast_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.force_unicast_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - -static int -ieee80211_ioctl_max_ratectrl_rate(struct net_device *dev, - struct ieee80211_sub_if_data *sdata, - int rate) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw_mode *mode; - int i; - - if (sdata->type != IEEE80211_IF_TYPE_AP) - return -ENOENT; - - if (rate == 0) { - sdata->u.ap.max_ratectrl_rateidx = -1; - return 0; - } - - mode = local->oper_hw_mode; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].rate == rate) { - sdata->u.ap.max_ratectrl_rateidx = i; - return 0; - } - } - return -EINVAL; -} - - static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, struct ieee80211_key *key) { @@ -1317,22 +1261,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->sta_antenna_sel = value; break; - case PRISM2_PARAM_FORCE_UNICAST_RATE: - ret = ieee80211_ioctl_force_unicast_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_MAX_RATECTRL_RATE: - ret = ieee80211_ioctl_max_ratectrl_rate(dev, sdata, value); - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - local->rate_ctrl_num_up = value; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - local->rate_ctrl_num_down = value; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: if (value < 0) ret = -EINVAL; @@ -1451,14 +1379,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = local->sta_antenna_sel; break; - case PRISM2_PARAM_RATE_CTRL_NUM_UP: - *param = local->rate_ctrl_num_up; - break; - - case PRISM2_PARAM_RATE_CTRL_NUM_DOWN: - *param = local->rate_ctrl_num_down; - break; - case PRISM2_PARAM_TX_POWER_REDUCTION: *param = local->hw.conf.tx_power_reduction; break; diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 5ae7fc4..f6780d6 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c @@ -187,9 +187,13 @@ static void rate_control_simple_tx_status(void *priv, struct net_device *dev, } #endif - if (per_failed > local->rate_ctrl_num_down) { + /* + * XXX: Make these configurable once we have an + * interface to the rate control algorithms + */ + if (per_failed > RATE_CONTROL_NUM_DOWN) { rate_control_rate_dec(local, sta); - } else if (per_failed < local->rate_ctrl_num_up) { + } else if (per_failed < RATE_CONTROL_NUM_UP) { rate_control_rate_inc(local, sta); } srctrl->tx_avg_rate_sum += status->control.rate->rate; -- cgit v0.10.2 From 9771f740c6319e67bab44d18b9717c894a6f266d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill antenna select ioctls Not used anywhere. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 252204f..230eb44 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -26,8 +26,6 @@ * mess shall be deleted completely. */ enum { PRISM2_PARAM_IEEE_802_1X = 23, - PRISM2_PARAM_ANTSEL_TX = 24, - PRISM2_PARAM_ANTSEL_RX = 25, /* Instant802 additions */ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index f404f1f..ab09e9a 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1197,18 +1197,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, sdata->ieee802_1x = value; break; - case PRISM2_PARAM_ANTSEL_TX: - local->hw.conf.antenna_sel_tx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_ANTSEL_RX: - local->hw.conf.antenna_sel_rx = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: local->cts_protect_erp_frames = value; break; @@ -1340,14 +1328,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = sdata->ieee802_1x; break; - case PRISM2_PARAM_ANTSEL_TX: - *param = local->hw.conf.antenna_sel_tx; - break; - - case PRISM2_PARAM_ANTSEL_RX: - *param = local->hw.conf.antenna_sel_rx; - break; - case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: *param = local->cts_protect_erp_frames; break; -- cgit v0.10.2 From fda6cc7ac45f97d4d40cc42781041dec488fa78c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: remove PRISM2_PARAM_DROP_UNENCRYPTED ioctl Interestingly, wpa_supplicant doesn't use it, but uses the currently unsupported IW_AUTH_DROP_UNENCRYPTED. So I guess it doesn't matter anyway. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 230eb44..95ca1f9 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -29,7 +29,6 @@ enum { /* Instant802 additions */ PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES = 1001, - PRISM2_PARAM_DROP_UNENCRYPTED = 1002, PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index ab09e9a..f465d0a 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1201,10 +1201,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->cts_protect_erp_frames = value; break; - case PRISM2_PARAM_DROP_UNENCRYPTED: - sdata->drop_unencrypted = value; - break; - case PRISM2_PARAM_PREAMBLE: local->short_preamble = value; break; @@ -1332,10 +1328,6 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, *param = local->cts_protect_erp_frames; break; - case PRISM2_PARAM_DROP_UNENCRYPTED: - *param = sdata->drop_unencrypted; - break; - case PRISM2_PARAM_PREAMBLE: *param = local->short_preamble; break; -- cgit v0.10.2 From 191b92666e3a8aa52af84e2d03350c25145be695 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: kill PRISM2_PARAM_CLEAR_KEYS Not used anywhere, hence dead code. wpa_supplicant has its own clear_keys routine. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/hostapd_ioctl.h b/net/mac80211/hostapd_ioctl.h index 95ca1f9..52da513 100644 --- a/net/mac80211/hostapd_ioctl.h +++ b/net/mac80211/hostapd_ioctl.h @@ -32,7 +32,6 @@ enum { PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_CLEAR_KEYS = 1009, PRISM2_PARAM_RADIO_ENABLED = 1010, PRISM2_PARAM_ANTENNA_MODE = 1013, PRISM2_PARAM_STAT_TIME = 1016, diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index f465d0a..9c1d076 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1018,62 +1018,6 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, return 0; } -static int ieee80211_ioctl_clear_keys(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_key_conf key; - int i; - u8 addr[ETH_ALEN]; - struct ieee80211_key_conf *keyconf; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - memset(addr, 0xff, ETH_ALEN); - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - keyconf = NULL; - if (sdata->keys[i] && - !sdata->keys[i]->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, - sdata->keys[i]))) - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, addr, - keyconf, 0); - kfree(keyconf); - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } - sdata->default_key = NULL; - } - read_unlock(&local->sub_if_lock); - - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - keyconf = NULL; - if (sta->key && !sta->key->force_sw_encrypt && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, sta->key))) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, keyconf, sta->aid); - kfree(keyconf); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - spin_unlock_bh(&local->sta_lock); - - memset(&key, 0, sizeof(key)); - if (local->ops->set_key && - local->ops->set_key(local_to_hw(local), REMOVE_ALL_KEYS, - NULL, &key, 0)) - printk(KERN_DEBUG "%s: failed to remove hwaccel keys\n", - dev->name); - - return 0; -} - - static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, struct ieee80211_key *key) { @@ -1227,10 +1171,6 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, local->next_mode = value; break; - case PRISM2_PARAM_CLEAR_KEYS: - ret = ieee80211_ioctl_clear_keys(dev); - break; - case PRISM2_PARAM_RADIO_ENABLED: ret = ieee80211_ioctl_set_radio_enabled(dev, value); break; -- cgit v0.10.2 From 5558235c6bade6662e6f257a35f2dfdc8a742147 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:09 +0200 Subject: [PATCH] mac80211: conserve stack space due to padding This patch reorders some fields in struct ieee802_11_elems to save 17*7 or 17*3 bytes (on 64/32-bit machines respectively) stack space in a few functions. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index 91b545c..f3ca837 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -76,33 +76,36 @@ static int ieee80211_sta_config_auth(struct net_device *dev, /* Parsed Information Elements */ struct ieee802_11_elems { + /* pointers to IEs */ u8 *ssid; - u8 ssid_len; u8 *supp_rates; - u8 supp_rates_len; u8 *fh_params; - u8 fh_params_len; u8 *ds_params; - u8 ds_params_len; u8 *cf_params; - u8 cf_params_len; u8 *tim; - u8 tim_len; u8 *ibss_params; - u8 ibss_params_len; u8 *challenge; - u8 challenge_len; u8 *wpa; - u8 wpa_len; u8 *rsn; - u8 rsn_len; u8 *erp_info; - u8 erp_info_len; u8 *ext_supp_rates; - u8 ext_supp_rates_len; u8 *wmm_info; - u8 wmm_info_len; u8 *wmm_param; + + /* length of them, respectively */ + u8 ssid_len; + u8 supp_rates_len; + u8 fh_params_len; + u8 ds_params_len; + u8 cf_params_len; + u8 tim_len; + u8 ibss_params_len; + u8 challenge_len; + u8 wpa_len; + u8 rsn_len; + u8 erp_info_len; + u8 ext_supp_rates_len; + u8 wmm_info_len; u8 wmm_param_len; }; -- cgit v0.10.2 From 4480f15ca62a595248d6d8e2b3e75052113cde59 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: clarify some mac80211 things The semantics of not having an add_interface callback are not well defined, this callback is required because otherwise you cannot obtain the requested MAC address of the device. Change the documentation to reflect this, add a note about having no MAC address at all, add a warning that mac_addr in struct ieee80211_if_init_conf can be NULL and finally verify that a few callbacks are assigned by way of BUG_ON() Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6278857..c34fd9a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -347,9 +347,16 @@ enum ieee80211_if_types { * @mac_addr: pointer to MAC address of the interface. This pointer is valid * until the interface is removed (i.e. it cannot be used after * remove_interface() callback was called for this interface). + * This pointer will be %NULL for monitor interfaces, be careful. * * This structure is used in add_interface() and remove_interface() * callbacks of &struct ieee80211_hw. + * + * When you allow multiple interfaces to be added to your PHY, take care + * that the hardware can actually handle multiple MAC addresses. However, + * also take care that when there's no interface left with mac_addr != %NULL + * you remove the MAC address from the device to avoid acknowledging packets + * in pure monitor mode. */ struct ieee80211_if_init_conf { int if_id; @@ -574,10 +581,11 @@ struct ieee80211_ops { * to returning zero. By returning non-zero addition of the interface * is inhibited. Unless monitor_during_oper is set, it is guaranteed * that monitor interfaces and normal interfaces are mutually - * exclusive. The open() handler is called after add_interface() - * if this is the first device added. At least one of the open() - * open() and add_interface() callbacks has to be assigned. If - * add_interface() is NULL, one STA interface is permitted only. */ + * exclusive. If assigned, the open() handler is called after + * add_interface() if this is the first device added. The + * add_interface() callback has to be assigned because it is the only + * way to obtain the requested MAC address for any interface. + */ int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 773a103..fe32a2d 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -2605,8 +2605,7 @@ static void ieee80211_start_hard_monitor(struct ieee80211_local *local) struct ieee80211_if_init_conf conf; if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->add_interface) { + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { conf.if_id = -1; conf.type = IEEE80211_IF_TYPE_MNTR; conf.mac_addr = NULL; @@ -2649,21 +2648,14 @@ static int ieee80211_open(struct net_device *dev) } ieee80211_start_soft_monitor(local); - if (local->ops->add_interface) { - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - } else { - if (sdata->type != IEEE80211_IF_TYPE_STA) - return -EOPNOTSUPP; - if (local->open_count > 0) - return -ENOBUFS; + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) { + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + ieee80211_start_hard_monitor(local); + return res; } if (local->open_count == 0) { @@ -4896,6 +4888,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ((sizeof(struct ieee80211_local) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + BUG_ON(!ops->tx); + BUG_ON(!ops->config); + BUG_ON(!ops->add_interface); local->ops = ops; /* for now, mdev needs sub_if_data :/ */ -- cgit v0.10.2 From 1fd5e589d8c7d3cd42ddd39358338766cfcedec8 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: Implementation of SIOCSIWRATE The WEXT ioctl SIOCSIWRATE is not implemented in mac80211. This patch adds the missing routine. It supports the 'auto' keyword, fixed rates, and the combination of 'auto' and a fixed rate to select an upper bound. Based on the patch from Mohamed Abbas . Signed-off-by: Larry Finger Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 9c1d076..9bc209b 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -863,6 +863,44 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, } +static int ieee80211_ioctl_siwrate(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rate, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hw_mode *mode; + int i; + u32 target_rate = rate->value / 100000; + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (!sdata->bss) + return -ENODEV; + mode = local->oper_hw_mode; + /* target_rate = -1, rate->fixed = 0 means auto only, so use all rates + * target_rate = X, rate->fixed = 1 means only rate X + * target_rate = X, rate->fixed = 0 means all rates <= X */ + sdata->bss->max_ratectrl_rateidx = -1; + sdata->bss->force_unicast_rateidx = -1; + if (rate->value < 0) + return 0; + for (i=0; i< mode->num_rates; i++) { + struct ieee80211_rate *rates = &mode->rates[i]; + int this_rate = rates->rate; + + if (mode->mode == MODE_ATHEROS_TURBO || + mode->mode == MODE_ATHEROS_TURBOG) + this_rate *= 2; + if (target_rate == this_rate) { + sdata->bss->max_ratectrl_rateidx = i; + if (rate->fixed) + sdata->bss->force_unicast_rateidx = i; + break; + } + } + return 0; +} + static int ieee80211_ioctl_giwrate(struct net_device *dev, struct iw_request_info *info, struct iw_param *rate, char *extra) @@ -1658,7 +1696,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWNICKN */ (iw_handler) NULL, /* -- hole -- */ (iw_handler) NULL, /* -- hole -- */ - (iw_handler) NULL, /* SIOCSIWRATE */ + (iw_handler) ieee80211_ioctl_siwrate, /* SIOCSIWRATE */ (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ -- cgit v0.10.2 From 5628221caf88e2a052782b042e12da7cd34111b0 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 10 Jul 2007 19:32:10 +0200 Subject: [PATCH] mac80211: ERP IE handling improvements The "protection needed" flag is currently parsed out of the ERP IE in beacons. This patch allows the ERP IE to be available at assocation time and causes the appropriate actions to be performed earlier. It is slightly complicated by the fact that most APs don't include the ERP IE in association responses. To work around this, we store ERP values in the ieee80211_sta_bss structure. Also added some WLAN_ERP defines for use by upcoming patches. Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ecd61e8..272f8c8 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -227,6 +227,17 @@ struct ieee80211_cts { #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) +/* 802.11g ERP information element */ +#define WLAN_ERP_NON_ERP_PRESENT (1<<0) +#define WLAN_ERP_USE_PROTECTION (1<<1) +#define WLAN_ERP_BARKER_PREAMBLE (1<<2) + +/* WLAN_ERP_BARKER_PREAMBLE values */ +enum { + WLAN_ERP_PREAMBLE_SHORT = 0, + WLAN_ERP_PREAMBLE_LONG = 1, +}; + /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b222a9a..99ff7c5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -99,6 +99,12 @@ struct ieee80211_sta_bss { int probe_resp; unsigned long last_update; + /* during assocation, we save an ERP value from a probe response so + * that we can feed ERP info to the driver when handling the + * association completes. these fields probably won't be up-to-date + * otherwise, you probably don't want to use them. */ + int has_erp_value; + u8 erp_value; }; diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index f3ca837..df6c410 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -314,6 +314,27 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, } +static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + + if (use_protection != !!ifsta->use_protection) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" + MAC_FMT ")\n", + dev->name, + use_protection ? "enabled" : "disabled", + MAC_ARG(ifsta->bssid)); + } + ifsta->use_protection = use_protection ? 1 : 0; + local->cts_protect_erp_frames = use_protection; + } +} + + static void ieee80211_sta_send_associnfo(struct net_device *dev, struct ieee80211_if_sta *ifsta) { @@ -377,9 +398,18 @@ static void ieee80211_set_associated(struct net_device *dev, if (assoc) { struct ieee80211_sub_if_data *sdata; + struct ieee80211_sta_bss *bss; sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; + + bss = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + if (bss->has_erp_value) + ieee80211_handle_erp_ie(dev, bss->erp_value); + ieee80211_rx_bss_put(dev, bss); + } + netif_carrier_on(dev); ifsta->prev_bssid_set = 1; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); @@ -1177,6 +1207,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev, return; } + /* it probably doesn't, but if the frame includes an ERP value then + * update our stored copy */ + if (elems.erp_info && elems.erp_info_len >= 1) { + struct ieee80211_sta_bss *bss + = ieee80211_rx_bss_get(dev, ifsta->bssid); + if (bss) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + ieee80211_rx_bss_put(dev, bss); + } + } + printk(KERN_DEBUG "%s: associated\n", dev->name); ifsta->aid = aid; ifsta->ap_capab = capab_info; @@ -1499,6 +1541,12 @@ static void ieee80211_rx_bss_info(struct net_device *dev, return; } + /* save the ERP value so that it is available at association time */ + if (elems.erp_info && elems.erp_info_len >= 1) { + bss->erp_value = elems.erp_info[0]; + bss->has_erp_value = 1; + } + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); if (elems.ssid && elems.ssid_len <= IEEE80211_MAX_SSID_LEN) { @@ -1614,10 +1662,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; - int use_protection; size_t baselen; struct ieee802_11_elems elems; @@ -1641,23 +1687,8 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, &elems) == ParseFailed) return; - use_protection = 0; - if (elems.erp_info && elems.erp_info_len >= 1) { - use_protection = - (elems.erp_info[0] & ERP_INFO_USE_PROTECTION) != 0; - } - - if (use_protection != !!ifsta->use_protection) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - MAC_FMT ")\n", - dev->name, - use_protection ? "enabled" : "disabled", - MAC_ARG(ifsta->bssid)); - } - ifsta->use_protection = use_protection ? 1 : 0; - local->cts_protect_erp_frames = use_protection; - } + if (elems.erp_info && elems.erp_info_len >= 1) + ieee80211_handle_erp_ie(dev, elems.erp_info[0]); if (elems.wmm_param && ifsta->wmm_enabled) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, -- cgit v0.10.2 From 63fc33ceb0ccc08b3f62d7bfe56a33eb33ca9427 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 10 Jul 2007 19:32:11 +0200 Subject: [PATCH] mac80211: improved 802.11g CTS protection Currently, CTS protection is partially implemented twice: 1. via prism2 ioctls, only used by hostapd 2. via STA beacon parsing, recorded in sta.use_protection but never used (other than printed in debugfs) Protection control should be implemented on a per-subif basis. For example, a single physical device may be running a soft AP on one channel, and a STA on another. The AP interface should use protection based on what hostapd told it, and the STA interface should use protection based on beacon parsing. These should operate independantly: one subif using protection should not influence the other. To implement this, I moved the use_protection flag into ieee80211_sub_if_data and removed the device-global cts_protect_erp_frames flag. I also made the PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES write operation only available for AP interfaces, to avoid any possibility of the user messing with the behaviour of a STA. Signed-off-by: Daniel Drake Signed-off-by: Jiri Benc Signed-off-by: John W. Linville diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 9e39646..a3e01d7 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -118,7 +118,7 @@ static ssize_t ieee80211_if_fmt_flags( sdata->u.sta.authenticated ? "AUTH\n" : "", sdata->u.sta.associated ? "ASSOC\n" : "", sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->u.sta.use_protection ? "CTS prot\n" : ""); + sdata->use_protection ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index fe32a2d..2ddf4ef 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c @@ -442,7 +442,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) if (!tx->u.tx.rate) return TXRX_DROP; if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->local->cts_protect_erp_frames && tx->fragmented && + tx->sdata->use_protection && tx->fragmented && extra.nonerp) { tx->u.tx.last_frag_rate = tx->u.tx.rate; tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; @@ -868,8 +868,7 @@ ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) * for the frame. */ if (mode->mode == MODE_IEEE80211G && (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && - tx->local->cts_protect_erp_frames && + tx->u.tx.unicast && tx->sdata->use_protection && !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 99ff7c5..055a2a9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -241,7 +241,6 @@ struct ieee80211_if_sta { unsigned int authenticated:1; unsigned int associated:1; unsigned int probereq_poll:1; - unsigned int use_protection:1; unsigned int create_ibss:1; unsigned int mixed_cell:1; unsigned int wmm_enabled:1; @@ -284,6 +283,7 @@ struct ieee80211_sub_if_data { int mc_count; unsigned int allmulti:1; unsigned int promisc:1; + unsigned int use_protection:1; /* CTS protect ERP frames */ struct net_device_stats stats; int drop_unencrypted; @@ -444,7 +444,6 @@ struct ieee80211_local { int *basic_rates[NUM_IEEE80211_MODES]; int rts_threshold; - int cts_protect_erp_frames; int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c index 9bc209b..5918dd0 100644 --- a/net/mac80211/ieee80211_ioctl.c +++ b/net/mac80211/ieee80211_ioctl.c @@ -1180,7 +1180,10 @@ static int ieee80211_ioctl_prism2_param(struct net_device *dev, break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - local->cts_protect_erp_frames = value; + if (sdata->type != IEEE80211_IF_TYPE_AP) + ret = -ENOENT; + else + sdata->use_protection = value; break; case PRISM2_PARAM_PREAMBLE: @@ -1303,7 +1306,7 @@ static int ieee80211_ioctl_get_prism2_param(struct net_device *dev, break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = local->cts_protect_erp_frames; + *param = sdata->use_protection; break; case PRISM2_PARAM_PREAMBLE: diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c index df6c410..ba2bf8f 100644 --- a/net/mac80211/ieee80211_sta.c +++ b/net/mac80211/ieee80211_sta.c @@ -316,12 +316,11 @@ static void ieee80211_sta_wmm_params(struct net_device *dev, static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - if (use_protection != !!ifsta->use_protection) { + if (use_protection != sdata->use_protection) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" MAC_FMT ")\n", @@ -329,8 +328,7 @@ static void ieee80211_handle_erp_ie(struct net_device *dev, u8 erp_value) use_protection ? "enabled" : "disabled", MAC_ARG(ifsta->bssid)); } - ifsta->use_protection = use_protection ? 1 : 0; - local->cts_protect_erp_frames = use_protection; + sdata->use_protection = use_protection; } } @@ -390,6 +388,7 @@ static void ieee80211_set_associated(struct net_device *dev, struct ieee80211_if_sta *ifsta, int assoc) { union iwreq_data wrqu; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (ifsta->associated == assoc) return; @@ -417,6 +416,7 @@ static void ieee80211_set_associated(struct net_device *dev, ieee80211_sta_send_associnfo(dev, ifsta); } else { netif_carrier_off(dev); + sdata->use_protection = 0; memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; -- cgit v0.10.2 From e6c9116d1dc984cb7ecf1b0fe26ca4a8ab36bb57 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jul 2007 18:50:15 -0700 Subject: [RFKILL]: fix net/rfkill/rfkill-input.c bug on 64-bit systems Subject: [patch] net/input: fix net/rfkill/rfkill-input.c bug on 64-bit systems this recent commit: commit cf4328cd949c2086091c62c5685f1580fe9b55e4 Author: Ivo van Doorn Date: Mon May 7 00:34:20 2007 -0700 [NET]: rfkill: add support for input key to control wireless radio added this 64-bit bug: .... unsigned int flags; spin_lock_irqsave(&task->lock, flags); .... irq 'flags' must be unsigned long, not unsigned int. The -rt tree has strict checks about this on 64-bit so this triggered a build failure. Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e5c840c..230e35c 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -55,7 +55,7 @@ static void rfkill_task_handler(struct work_struct *work) static void rfkill_schedule_toggle(struct rfkill_task *task) { - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&task->lock, flags); -- cgit v0.10.2 From 24023451c8df726692e2f52288a20870d13b501f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:51:31 -0700 Subject: [NET]: Add net_device change_rx_mode callback Currently the set_multicast_list (and set_rx_mode) callbacks are responsible for configuring the device according to the IFF_PROMISC, IFF_MULTICAST and IFF_ALLMULTI flags and the mc_list (and uc_list in case of set_rx_mode). These callbacks can be invoked from BH context without the rtnl_mutex by dev_mc_add/dev_mc_delete, which makes reading the device flags and promiscous/allmulti count racy. For real hardware drivers that just commit all changes to the hardware this is not a real problem since the stack guarantees to call them for every change, so at least the final call will not race and commit the correct configuration to the hardware. For software devices that want to synchronize promiscous and multicast state to an underlying device however this can cause corruption of the underlying device's flags or promisc/allmulti counts. When the software device is concurrently put in promiscous or allmulti mode while set_multicast_list is invoked from bottem half context, the device might synchronize the change to the underlying device without holding the rtnl_mutex, which races with concurrent changes to the underlying device. Add a dev->change_rx_flags hook that is invoked when any of the flags that affect rx filtering change (under the rtnl_mutex), which allows drivers to perform synchronization immediately and only synchronize the address lists in set_multicast_list/set_rx_mode. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 79cc3da..f193aba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -516,6 +516,9 @@ struct net_device void *saddr, unsigned len); int (*rebuild_header)(struct sk_buff *skb); +#define HAVE_CHANGE_RX_FLAGS + void (*change_rx_flags)(struct net_device *dev, + int flags); #define HAVE_SET_RX_MODE void (*set_rx_mode)(struct net_device *dev); #define HAVE_MULTICAST diff --git a/net/core/dev.c b/net/core/dev.c index 9644305..59ec811 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2521,6 +2521,8 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + if ((dev->promiscuity += inc) == 0) dev->flags &= ~IFF_PROMISC; else @@ -2535,6 +2537,9 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc) dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current->audit_context)); + + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_PROMISC); } } @@ -2573,11 +2578,16 @@ void dev_set_allmulti(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + ASSERT_RTNL(); + dev->flags |= IFF_ALLMULTI; if ((dev->allmulti += inc) == 0) dev->flags &= ~IFF_ALLMULTI; - if (dev->flags ^ old_flags) + if (dev->flags ^ old_flags) { + if (dev->change_rx_flags) + dev->change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); + } } /* @@ -2778,6 +2788,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) int ret, changes; int old_flags = dev->flags; + ASSERT_RTNL(); + /* * Set the flags on our device. */ @@ -2792,6 +2804,9 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ + if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + dev->change_rx_flags(dev, IFF_MULTICAST); + dev_set_rx_mode(dev); /* -- cgit v0.10.2 From a0a400d79e3dd7843e7e81baa3ef2957bdc292d0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:02 -0700 Subject: [NET]: dev_mcast: add multicast list synchronization helpers The method drivers currently use to synchronize multicast lists is not very pretty: - walk the multicast list - search each entry on a copy of the previous list - if new add to lower device - walk the copy of the previous list - search each entry on the current list - if removed delete from lower device - copy entire list This patch adds a new field to struct dev_addr_list to store the synchronization state and adds two helper functions for synchronization and cleanup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f193aba..e5af458 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -190,6 +190,7 @@ struct dev_addr_list struct dev_addr_list *next; u8 da_addr[MAX_ADDR_LEN]; u8 da_addrlen; + u8 da_synced; int da_users; int da_gusers; }; @@ -1103,6 +1104,8 @@ extern int dev_unicast_delete(struct net_device *dev, void *addr, int alen); extern int dev_unicast_add(struct net_device *dev, void *addr, int alen); extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all); extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); +extern int dev_mc_sync(struct net_device *to, struct net_device *from); +extern void dev_mc_unsync(struct net_device *to, struct net_device *from); extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index aa38100..235a2a8 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -102,6 +102,81 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) return err; } +/** + * dev_mc_sync - Synchronize device's multicast list to another device + * @to: destination device + * @from: source device + * + * Add newly added addresses to the destination device and release + * addresses that have no users left. The source device must be + * locked by netif_tx_lock_bh. + * + * This function is intended to be called from the dev->set_multicast_list + * function of layered software devices. + */ +int dev_mc_sync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + int err = 0; + + netif_tx_lock_bh(to); + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) { + err = __dev_addr_add(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + if (err < 0) + break; + da->da_synced = 1; + da->da_users++; + } else if (da->da_users == 1) { + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + } + if (!err) + __dev_set_rx_mode(to); + netif_tx_unlock_bh(to); + + return err; +} +EXPORT_SYMBOL(dev_mc_sync); + + +/** + * dev_mc_unsync - Remove synchronized addresses from the destination + * device + * @to: destination device + * @from: source device + * + * Remove all addresses that were added to the destination device by + * dev_mc_sync(). This function is intended to be called from the + * dev->stop function of layered software devices. + */ +void dev_mc_unsync(struct net_device *to, struct net_device *from) +{ + struct dev_addr_list *da; + + netif_tx_lock_bh(from); + netif_tx_lock_bh(to); + + for (da = from->mc_list; da != NULL; da = da->next) { + if (!da->da_synced) + continue; + __dev_addr_delete(&to->mc_list, &to->mc_count, + da->da_addr, da->da_addrlen, 0); + da->da_synced = 0; + __dev_addr_delete(&from->mc_list, &from->mc_count, + da->da_addr, da->da_addrlen, 0); + } + __dev_set_rx_mode(to); + + netif_tx_unlock_bh(to); + netif_tx_unlock_bh(from); +} +EXPORT_SYMBOL(dev_mc_unsync); + /* * Discard multicast list when a device is downed */ -- cgit v0.10.2 From 6c78dcbd47a68a7d25d2bee7a6c74b9136cb5fde Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:52:56 -0700 Subject: [VLAN]: Fix promiscous/allmulti synchronization races The set_multicast_list function may be called without holding the rtnl mutex, resulting in races when changing the underlying device's promiscous and allmulti state. Use the change_rx_mode hook, which is always invoked under the rtnl. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 61a57dc..7f71df4 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -132,8 +132,6 @@ struct vlan_dev_info { * made, in order to feed the right changes down * to the real hardware... */ - int old_allmulti; /* similar to above. */ - int old_promiscuity; /* similar to above. */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index abb9900..39bdcc2 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -373,6 +373,7 @@ void vlan_setup(struct net_device *new_dev) new_dev->open = vlan_dev_open; new_dev->stop = vlan_dev_stop; new_dev->set_multicast_list = vlan_dev_set_multicast_list; + new_dev->change_rx_flags = vlan_change_rx_flags; new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 62ce1c5..7df5b29 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -69,6 +69,7 @@ int vlan_dev_set_vlan_flag(const struct net_device *dev, u32 flag, short flag_val); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); +void vlan_change_rx_flags(struct net_device *dev, int change); void vlan_dev_set_multicast_list(struct net_device *vlan_dev); int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d4a62d1..dec7e62 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -712,6 +712,11 @@ int vlan_dev_open(struct net_device *dev) } memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, 1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, 1); + return 0; } @@ -721,6 +726,11 @@ int vlan_dev_stop(struct net_device *dev) vlan_flush_mc_list(dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(real_dev, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(real_dev, -1); + if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); @@ -754,34 +764,26 @@ int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } +void vlan_change_rx_flags(struct net_device *dev, int change) +{ + struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); +} + /** Taken from Gleb + Lennert's VLAN code, and modified... */ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) { struct dev_mc_list *dmi; struct net_device *real_dev; - int inc; if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) { /* Then it's a real vlan device, as far as we can tell.. */ real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev; - /* compare the current promiscuity to the last promisc we had.. */ - inc = vlan_dev->promiscuity - VLAN_DEV_INFO(vlan_dev)->old_promiscuity; - if (inc) { - printk(KERN_INFO "%s: dev_set_promiscuity(master, %d)\n", - vlan_dev->name, inc); - dev_set_promiscuity(real_dev, inc); /* found in dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_promiscuity = vlan_dev->promiscuity; - } - - inc = vlan_dev->allmulti - VLAN_DEV_INFO(vlan_dev)->old_allmulti; - if (inc) { - printk(KERN_INFO "%s: dev_set_allmulti(master, %d)\n", - vlan_dev->name, inc); - dev_set_allmulti(real_dev, inc); /* dev.c */ - VLAN_DEV_INFO(vlan_dev)->old_allmulti = vlan_dev->allmulti; - } - /* looking for addresses to add to master's list */ for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) { if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) { -- cgit v0.10.2 From 56addd6eeeb4e11f5a0af7093ca078e0f29140e0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:53:28 -0700 Subject: [VLAN]: Use multicast list synchronization helpers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 7f71df4..f8443fd 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -127,11 +127,6 @@ struct vlan_dev_info { * like DHCP that use packet-filtering and don't understand * 802.1Q */ - struct dev_mc_list *old_mc_list; /* old multi-cast list for the VLAN interface.. - * we save this so we can tell what changes were - * made, in order to feed the right changes down - * to the real hardware... - */ struct net_device *real_dev; /* the underlying device/interface */ unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; /* Holds the proc data */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index dec7e62..4d2aa4d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -612,90 +612,6 @@ void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) *result = VLAN_DEV_INFO(dev)->vlan_id; } -static inline int vlan_dmi_equals(struct dev_mc_list *dmi1, - struct dev_mc_list *dmi2) -{ - return ((dmi1->dmi_addrlen == dmi2->dmi_addrlen) && - (memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0)); -} - -/** dmi is a single entry into a dev_mc_list, a single node. mc_list is - * an entire list, and we'll iterate through it. - */ -static int vlan_should_add_mc(struct dev_mc_list *dmi, struct dev_mc_list *mc_list) -{ - struct dev_mc_list *idmi; - - for (idmi = mc_list; idmi != NULL; ) { - if (vlan_dmi_equals(dmi, idmi)) { - if (dmi->dmi_users > idmi->dmi_users) - return 1; - else - return 0; - } else { - idmi = idmi->next; - } - } - - return 1; -} - -static inline void vlan_destroy_mc_list(struct dev_mc_list *mc_list) -{ - struct dev_mc_list *dmi = mc_list; - struct dev_mc_list *next; - - while(dmi) { - next = dmi->next; - kfree(dmi); - dmi = next; - } -} - -static void vlan_copy_mc_list(struct dev_mc_list *mc_list, struct vlan_dev_info *vlan_info) -{ - struct dev_mc_list *dmi, *new_dmi; - - vlan_destroy_mc_list(vlan_info->old_mc_list); - vlan_info->old_mc_list = NULL; - - for (dmi = mc_list; dmi != NULL; dmi = dmi->next) { - new_dmi = kmalloc(sizeof(*new_dmi), GFP_ATOMIC); - if (new_dmi == NULL) { - printk(KERN_ERR "vlan: cannot allocate memory. " - "Multicast may not work properly from now.\n"); - return; - } - - /* Copy whole structure, then make new 'next' pointer */ - *new_dmi = *dmi; - new_dmi->next = vlan_info->old_mc_list; - vlan_info->old_mc_list = new_dmi; - } -} - -static void vlan_flush_mc_list(struct net_device *dev) -{ - struct dev_mc_list *dmi = dev->mc_list; - - while (dmi) { - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from vlan interface\n", - dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - dmi = dev->mc_list; - } - - /* dev->mc_list is NULL by the time we get here. */ - vlan_destroy_mc_list(VLAN_DEV_INFO(dev)->old_mc_list); - VLAN_DEV_INFO(dev)->old_mc_list = NULL; -} - int vlan_dev_open(struct net_device *dev) { struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); @@ -724,8 +640,7 @@ int vlan_dev_stop(struct net_device *dev) { struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; - vlan_flush_mc_list(dev); - + dev_mc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) @@ -777,47 +692,5 @@ void vlan_change_rx_flags(struct net_device *dev, int change) /** Taken from Gleb + Lennert's VLAN code, and modified... */ void vlan_dev_set_multicast_list(struct net_device *vlan_dev) { - struct dev_mc_list *dmi; - struct net_device *real_dev; - - if (vlan_dev && (vlan_dev->priv_flags & IFF_802_1Q_VLAN)) { - /* Then it's a real vlan device, as far as we can tell.. */ - real_dev = VLAN_DEV_INFO(vlan_dev)->real_dev; - - /* looking for addresses to add to master's list */ - for (dmi = vlan_dev->mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, VLAN_DEV_INFO(vlan_dev)->old_mc_list)) { - dev_mc_add(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: add %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address to master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } - - /* looking for addresses to delete from master's list */ - for (dmi = VLAN_DEV_INFO(vlan_dev)->old_mc_list; dmi != NULL; dmi = dmi->next) { - if (vlan_should_add_mc(dmi, vlan_dev->mc_list)) { - /* if we think we should add it to the new list, then we should really - * delete it from the real list on the underlying device. - */ - dev_mc_delete(real_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0); - printk(KERN_DEBUG "%s: del %.2x:%.2x:%.2x:%.2x:%.2x:%.2x mcast address from master interface\n", - vlan_dev->name, - dmi->dmi_addr[0], - dmi->dmi_addr[1], - dmi->dmi_addr[2], - dmi->dmi_addr[3], - dmi->dmi_addr[4], - dmi->dmi_addr[5]); - } - } - - /* save multicast list */ - vlan_copy_mc_list(vlan_dev->mc_list, VLAN_DEV_INFO(vlan_dev)); - } + dev_mc_sync(VLAN_DEV_INFO(vlan_dev)->real_dev, vlan_dev); } -- cgit v0.10.2 From b863ceb7ddcea8c55fcf1d7b2ac591d50aa7ed53 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:55:06 -0700 Subject: [NET]: Add macvlan driver Add macvlan driver, which allows to create virtual ethernet devices based on MAC address. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 845fbf4..360eb58 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2330,6 +2330,12 @@ W: http://linuxwireless.org/ T: git kernel.org:/pub/scm/linux/kernel/git/jbenc/mac80211.git S: Maintained +MACVLAN DRIVER +P: Patrick McHardy +M: kaber@trash.net +L: netdev@vger.kernel.org +S: Maintained + MARVELL YUKON / SYSKONNECT DRIVER P: Mirko Lindner M: mlindner@syskonnect.de diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ba314ad..d17d64e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -82,6 +82,16 @@ config BONDING To compile this driver as a module, choose M here: the module will be called bonding. +config MACVLAN + tristate "MAC-VLAN support (EXPERIMENTAL)" + depends on EXPERIMENTAL + ---help--- + This allows one to create virtual interfaces that map packets to + or from specific MAC addresses to a particular interface. + + To compile this driver as a module, choose M here: the module + will be called macvlan. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a2241e6..c26b867 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -128,6 +128,7 @@ obj-$(CONFIG_SLHC) += slhc.o obj-$(CONFIG_DUMMY) += dummy.o obj-$(CONFIG_IFB) += ifb.o +obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_DE600) += de600.o obj-$(CONFIG_DE620) += de620.o obj-$(CONFIG_LANCE) += lance.o diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c new file mode 100644 index 0000000..dc74d00 --- /dev/null +++ b/drivers/net/macvlan.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * The code this is based on carried the following copyright notice: + * --- + * (C) Copyright 2001-2006 + * Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com + * Re-worked by Ben Greear + * --- + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MACVLAN_HASH_SIZE (1 << BITS_PER_BYTE) + +struct macvlan_port { + struct net_device *dev; + struct hlist_head vlan_hash[MACVLAN_HASH_SIZE]; + struct list_head vlans; +}; + +struct macvlan_dev { + struct net_device *dev; + struct list_head list; + struct hlist_node hlist; + struct macvlan_port *port; + struct net_device *lowerdev; +}; + + +static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, + const unsigned char *addr) +{ + struct macvlan_dev *vlan; + struct hlist_node *n; + + hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[addr[5]], hlist) { + if (!compare_ether_addr(vlan->dev->dev_addr, addr)) + return vlan; + } + return NULL; +} + +static void macvlan_broadcast(struct sk_buff *skb, + const struct macvlan_port *port) +{ + const struct ethhdr *eth = eth_hdr(skb); + const struct macvlan_dev *vlan; + struct hlist_node *n; + struct net_device *dev; + struct sk_buff *nskb; + unsigned int i; + + for (i = 0; i < MACVLAN_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(vlan, n, &port->vlan_hash[i], hlist) { + dev = vlan->dev; + if (unlikely(!(dev->flags & IFF_UP))) + continue; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb == NULL) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + continue; + } + + dev->stats.rx_bytes += skb->len + ETH_HLEN; + dev->stats.rx_packets++; + dev->stats.multicast++; + dev->last_rx = jiffies; + + nskb->dev = dev; + if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + nskb->pkt_type = PACKET_BROADCAST; + else + nskb->pkt_type = PACKET_MULTICAST; + + netif_rx(nskb); + } + } +} + +/* called under rcu_read_lock() from netif_receive_skb */ +static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) +{ + const struct ethhdr *eth = eth_hdr(skb); + const struct macvlan_port *port; + const struct macvlan_dev *vlan; + struct net_device *dev; + + port = rcu_dereference(skb->dev->macvlan_port); + if (port == NULL) + return skb; + + if (is_multicast_ether_addr(eth->h_dest)) { + macvlan_broadcast(skb, port); + return skb; + } + + vlan = macvlan_hash_lookup(port, eth->h_dest); + if (vlan == NULL) + return skb; + + dev = vlan->dev; + if (unlikely(!(dev->flags & IFF_UP))) { + kfree_skb(skb); + return NULL; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb == NULL) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + return NULL; + } + + dev->stats.rx_bytes += skb->len + ETH_HLEN; + dev->stats.rx_packets++; + dev->last_rx = jiffies; + + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + + netif_rx(skb); + return NULL; +} + +static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + unsigned int len = skb->len; + int ret; + + skb->dev = vlan->lowerdev; + ret = dev_queue_xmit(skb); + + if (likely(ret == NET_XMIT_SUCCESS)) { + dev->stats.tx_packets++; + dev->stats.tx_bytes += len; + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } + return NETDEV_TX_OK; +} + +static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + return lowerdev->hard_header(skb, lowerdev, type, daddr, + saddr ? : dev->dev_addr, len); +} + +static int macvlan_open(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; + struct net_device *lowerdev = vlan->lowerdev; + int err; + + err = dev_unicast_add(lowerdev, dev->dev_addr, ETH_ALEN); + if (err < 0) + return err; + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, 1); + + hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[dev->dev_addr[5]]); + return 0; +} + +static int macvlan_stop(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + dev_mc_unsync(lowerdev, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, -1); + + dev_unicast_delete(lowerdev, dev->dev_addr, ETH_ALEN); + + hlist_del_rcu(&vlan->hlist); + synchronize_rcu(); + return 0; +} + +static void macvlan_change_rx_flags(struct net_device *dev, int change) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); +} + +static void macvlan_set_multicast_list(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + dev_mc_sync(vlan->lowerdev, dev); +} + +static int macvlan_change_mtu(struct net_device *dev, int new_mtu) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + if (new_mtu < 68 || vlan->lowerdev->mtu < new_mtu) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +/* + * macvlan network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key macvlan_netdev_xmit_lock_key; + +#define MACVLAN_FEATURES \ + (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ + NETIF_F_TSO_ECN | NETIF_F_TSO6) + +#define MACVLAN_STATE_MASK \ + ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) + +static int macvlan_init(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + const struct net_device *lowerdev = vlan->lowerdev; + + dev->state = (dev->state & ~MACVLAN_STATE_MASK) | + (lowerdev->state & MACVLAN_STATE_MASK); + dev->features = lowerdev->features & MACVLAN_FEATURES; + dev->iflink = lowerdev->ifindex; + + lockdep_set_class(&dev->_xmit_lock, &macvlan_netdev_xmit_lock_key); + return 0; +} + +static void macvlan_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + snprintf(drvinfo->driver, 32, "macvlan"); + snprintf(drvinfo->version, 32, "0.1"); +} + +static u32 macvlan_ethtool_get_rx_csum(struct net_device *dev) +{ + const struct macvlan_dev *vlan = netdev_priv(dev); + struct net_device *lowerdev = vlan->lowerdev; + + if (lowerdev->ethtool_ops->get_rx_csum == NULL) + return 0; + return lowerdev->ethtool_ops->get_rx_csum(lowerdev); +} + +static const struct ethtool_ops macvlan_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_rx_csum = macvlan_ethtool_get_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_tso = ethtool_op_get_tso, + .get_ufo = ethtool_op_get_ufo, + .get_sg = ethtool_op_get_sg, + .get_drvinfo = macvlan_ethtool_get_drvinfo, +}; + +static void macvlan_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->init = macvlan_init; + dev->open = macvlan_open; + dev->stop = macvlan_stop; + dev->change_mtu = macvlan_change_mtu; + dev->change_rx_flags = macvlan_change_rx_flags; + dev->set_multicast_list = macvlan_set_multicast_list; + dev->hard_header = macvlan_hard_header; + dev->hard_start_xmit = macvlan_hard_start_xmit; + dev->destructor = free_netdev; + dev->ethtool_ops = &macvlan_ethtool_ops; + dev->tx_queue_len = 0; +} + +static int macvlan_port_create(struct net_device *dev) +{ + struct macvlan_port *port; + unsigned int i; + + if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) + return -EINVAL; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (port == NULL) + return -ENOMEM; + + port->dev = dev; + INIT_LIST_HEAD(&port->vlans); + for (i = 0; i < MACVLAN_HASH_SIZE; i++) + INIT_HLIST_HEAD(&port->vlan_hash[i]); + rcu_assign_pointer(dev->macvlan_port, port); + return 0; +} + +static void macvlan_port_destroy(struct net_device *dev) +{ + struct macvlan_port *port = dev->macvlan_port; + + rcu_assign_pointer(dev->macvlan_port, NULL); + synchronize_rcu(); + kfree(port); +} + +static void macvlan_transfer_operstate(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + const struct net_device *lowerdev = vlan->lowerdev; + + if (lowerdev->operstate == IF_OPER_DORMANT) + netif_dormant_on(dev); + else + netif_dormant_off(dev); + + if (netif_carrier_ok(lowerdev)) { + if (!netif_carrier_ok(dev)) + netif_carrier_on(dev); + } else { + if (netif_carrier_ok(lowerdev)) + netif_carrier_off(dev); + } +} + +static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static int macvlan_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port; + struct net_device *lowerdev; + int err; + + if (!tb[IFLA_LINK]) + return -EINVAL; + + lowerdev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK])); + if (lowerdev == NULL) + return -ENODEV; + + if (!tb[IFLA_MTU]) + dev->mtu = lowerdev->mtu; + else if (dev->mtu > lowerdev->mtu) + return -EINVAL; + + if (!tb[IFLA_ADDRESS]) + random_ether_addr(dev->dev_addr); + + if (lowerdev->macvlan_port == NULL) { + err = macvlan_port_create(lowerdev); + if (err < 0) + return err; + } + port = lowerdev->macvlan_port; + + vlan->lowerdev = lowerdev; + vlan->dev = dev; + vlan->port = port; + + err = register_netdevice(dev); + if (err < 0) + return err; + + list_add_tail(&vlan->list, &port->vlans); + macvlan_transfer_operstate(dev); + return 0; +} + +static void macvlan_dellink(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvlan_port *port = vlan->port; + + list_del(&vlan->list); + unregister_netdevice(dev); + + if (list_empty(&port->vlans)) + macvlan_port_destroy(dev); +} + +static struct rtnl_link_ops macvlan_link_ops __read_mostly = { + .kind = "macvlan", + .priv_size = sizeof(struct macvlan_dev), + .setup = macvlan_setup, + .validate = macvlan_validate, + .newlink = macvlan_newlink, + .dellink = macvlan_dellink, +}; + +static int macvlan_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + struct macvlan_dev *vlan, *next; + struct macvlan_port *port; + + port = dev->macvlan_port; + if (port == NULL) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_CHANGE: + list_for_each_entry(vlan, &port->vlans, list) + macvlan_transfer_operstate(vlan->dev); + break; + case NETDEV_FEAT_CHANGE: + list_for_each_entry(vlan, &port->vlans, list) { + vlan->dev->features = dev->features & MACVLAN_FEATURES; + netdev_features_change(vlan->dev); + } + break; + case NETDEV_UNREGISTER: + list_for_each_entry_safe(vlan, next, &port->vlans, list) + macvlan_dellink(vlan->dev); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block macvlan_notifier_block __read_mostly = { + .notifier_call = macvlan_device_event, +}; + +static int __init macvlan_init_module(void) +{ + int err; + + register_netdevice_notifier(&macvlan_notifier_block); + macvlan_handle_frame_hook = macvlan_handle_frame; + + err = rtnl_link_register(&macvlan_link_ops); + if (err < 0) + goto err1; + return 0; +err1: + macvlan_handle_frame_hook = macvlan_handle_frame; + unregister_netdevice_notifier(&macvlan_notifier_block); + return err; +} + +static void __exit macvlan_cleanup_module(void) +{ + rtnl_link_unregister(&macvlan_link_ops); + macvlan_handle_frame_hook = NULL; + unregister_netdevice_notifier(&macvlan_notifier_block); +} + +module_init(macvlan_init_module); +module_exit(macvlan_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Driver for MAC address based VLANs"); +MODULE_ALIAS_RTNL_LINK("macvlan"); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 0000000..0d9d7ea --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +#ifdef __KERNEL__ + +extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e5af458..322b5ea 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -564,6 +564,8 @@ struct net_device /* bridge stuff */ struct net_bridge_port *br_port; + /* macvlan */ + struct macvlan_port *macvlan_port; /* class/net/name entry */ struct device dev; diff --git a/net/core/dev.c b/net/core/dev.c index 59ec811..13a0d9f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -1813,6 +1814,28 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, #define handle_bridge(skb, pt_prev, ret, orig_dev) (skb) #endif +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); + +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, + struct packet_type **pt_prev, + int *ret, + struct net_device *orig_dev) +{ + if (skb->dev->macvlan_port == NULL) + return skb; + + if (*pt_prev) { + *ret = deliver_skb(skb, *pt_prev, orig_dev); + *pt_prev = NULL; + } + return macvlan_handle_frame_hook(skb); +} +#else +#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) +#endif + #ifdef CONFIG_NET_CLS_ACT /* TODO: Maybe we should just force sch_ingress to be compiled in * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions @@ -1920,6 +1943,9 @@ ncls: skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; + skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { -- cgit v0.10.2 From a7ecfc866578e665e20004a2f5fff5b73e8be3bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 18:56:30 -0700 Subject: [VLAN]: Fix memset length Fix sizeof(ETH_ALEN) Introduced by my rtnl_link patches. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 39bdcc2..cda936b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -377,7 +377,7 @@ void vlan_setup(struct net_device *new_dev) new_dev->destructor = free_netdev; new_dev->do_ioctl = vlan_dev_ioctl; - memset(new_dev->broadcast, 0, sizeof(ETH_ALEN)); + memset(new_dev->broadcast, 0, ETH_ALEN); } static void vlan_transfer_operstate(const struct net_device *dev, struct net_device *vlandev) -- cgit v0.10.2 From b3b0b681b12478a7afa7d1f3d58be96830e16c7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 14 Jul 2007 18:57:19 -0700 Subject: [TCP]: tcp probe add back ssthresh field Sangtae noticed the ssthresh got missed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 86624fa..f37d592 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -111,6 +111,7 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; p->snd_wnd = tp->snd_wnd; + p->ssthresh = tcp_current_ssthresh(sk); p->srtt = tp->srtt >> 3; tcp_probe.head = (tcp_probe.head + 1) % bufsize; -- cgit v0.10.2 From acd159b6b5828175be6b9ccccd9b054239ec63e9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 14 Jul 2007 19:00:59 -0700 Subject: [INET_SOCK]: make net/ipv4/inet_timewait_sock.c:__inet_twsk_kill() static This patch makes the needlessly global __inet_twsk_kill() static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 09a2532..47d52b2 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -209,9 +209,6 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state); -extern void __inet_twsk_kill(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo); - extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a73cf93..2586df0 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -14,7 +14,8 @@ #include /* Must be called with locally disabled BHs. */ -void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) +static void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -47,8 +48,6 @@ void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashi inet_twsk_put(tw); } -EXPORT_SYMBOL_GPL(__inet_twsk_kill); - /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it -- cgit v0.10.2 From da7de31cc50796a53593785d4508b7b7ffa9a9b2 Mon Sep 17 00:00:00 2001 From: Jennifer Hunt Date: Sat, 14 Jul 2007 19:03:00 -0700 Subject: [AF_IUCV]: Improve description of IUCV and AFIUCV configuration options. Signed-off-by: Jennifer Hunt Signed-off-by: Ursula Braun >braunu@de.ibm.com> Acked-by: Frank Pavlic Signed-off-by: David S. Miller diff --git a/net/iucv/Kconfig b/net/iucv/Kconfig index f8fcc3d..16ce9cd 100644 --- a/net/iucv/Kconfig +++ b/net/iucv/Kconfig @@ -1,13 +1,13 @@ config IUCV - tristate "IUCV support (VM only)" + tristate "IUCV support (S390 - z/VM only)" depends on S390 help - Select this option if you want to use inter-user communication under - VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast + Select this option if you want to use inter-user communication + under VM or VIF. If you run on z/VM, say "Y" to enable a fast communication link between VM guests. config AFIUCV - tristate "AF_IUCV support (VM only)" + tristate "AF_IUCV support (S390 - z/VM only)" depends on IUCV help Select this option if you want to use inter-user communication under -- cgit v0.10.2 From 13fdc9a74df0fec70f421c6891e184ed8c3b9088 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sat, 14 Jul 2007 19:03:41 -0700 Subject: [AF_IUCV]: Avoid deadlock between iucv_path_connect and tasklet. An iucv deadlock may occur, where one CPU is spinning on the iucv_table_lock for iucv_tasklet_fn(), while another CPU is holding the iucv_table_lock for an iucv_path_connect() and is waiting for the first CPU in an smp_call_function. Solution: replace spin_lock in iucv_tasklet_fn by spin_trylock and reschedule tasklet in case of non-granted lock. Signed-off-by: Ursula Braun Acked-by: Frank Pavlic Signed-off-by: David S. Miller diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index b733306..ad5150b 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1494,7 +1494,10 @@ static void iucv_tasklet_fn(unsigned long ignored) struct iucv_irq_list *p, *n; /* Serialize tasklet, iucv_path_sever and iucv_path_connect. */ - spin_lock(&iucv_table_lock); + if (!spin_trylock(&iucv_table_lock)) { + tasklet_schedule(&iucv_tasklet); + return; + } iucv_active_cpu = smp_processor_id(); spin_lock_irq(&iucv_queue_lock); -- cgit v0.10.2 From febca281f677a775c61cd0572c2f35e4ead9e7d5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Sat, 14 Jul 2007 19:04:25 -0700 Subject: [AF_IUCV]: Add lock when updating accept_q The accept_queue of an af_iucv socket will be corrupted, if adding and deleting of entries in this queue occurs at the same time (connect request from one client, while accept call is processed for another client). Solution: add locking when updating accept_q Signed-off-by: Ursula Braun Acked-by: Frank Pavlic Signed-off-by: David S. Miller diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index f9bd11b..b6c468c 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -60,6 +60,7 @@ struct iucv_sock { char dst_user_id[8]; char dst_name[8]; struct list_head accept_q; + spinlock_t accept_q_lock; struct sock *parent; struct iucv_path *path; struct sk_buff_head send_skb_q; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index d9e9ddb..53ae14c 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -219,6 +219,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) sock_init_data(sock, sk); INIT_LIST_HEAD(&iucv_sk(sk)->accept_q); + spin_lock_init(&iucv_sk(sk)->accept_q_lock); skb_queue_head_init(&iucv_sk(sk)->send_skb_q); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; @@ -274,15 +275,25 @@ void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(parent); + sock_hold(sk); - list_add_tail(&iucv_sk(sk)->accept_q, &iucv_sk(parent)->accept_q); + spin_lock_irqsave(&par->accept_q_lock, flags); + list_add_tail(&iucv_sk(sk)->accept_q, &par->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent = parent; parent->sk_ack_backlog++; } void iucv_accept_unlink(struct sock *sk) { + unsigned long flags; + struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); + + spin_lock_irqsave(&par->accept_q_lock, flags); list_del_init(&iucv_sk(sk)->accept_q); + spin_unlock_irqrestore(&par->accept_q_lock, flags); iucv_sk(sk)->parent->sk_ack_backlog--; iucv_sk(sk)->parent = NULL; sock_put(sk); @@ -298,8 +309,8 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) lock_sock(sk); if (sk->sk_state == IUCV_CLOSED) { - release_sock(sk); iucv_accept_unlink(sk); + release_sock(sk); continue; } @@ -879,6 +890,7 @@ static int iucv_callback_connreq(struct iucv_path *path, /* Find out if this path belongs to af_iucv. */ read_lock(&iucv_sk_list.lock); iucv = NULL; + sk = NULL; sk_for_each(sk, node, &iucv_sk_list.head) if (sk->sk_state == IUCV_LISTEN && !memcmp(&iucv_sk(sk)->src_name, src_name, 8)) { -- cgit v0.10.2 From 6460d948f3ebf7d5040328a60a0ab7221f69945b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 14 Jul 2007 19:07:52 -0700 Subject: [NET]: Add ethtool support for NETIF_F_IPV6_CSUM devices. Add ethtool utility function to set or clear IPV6_CSUM feature flag. Modify tg3.c and bnx2.c to use this function when doing ethtool -K to change tx checksum. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 4e5e1cb..d23861c 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -6218,7 +6218,7 @@ bnx2_set_tx_csum(struct net_device *dev, u32 data) struct bnx2 *bp = netdev_priv(dev); if (CHIP_NUM(bp) == CHIP_NUM_5709) - return (ethtool_op_set_tx_hw_csum(dev, data)); + return (ethtool_op_set_tx_ipv6_csum(dev, data)); else return (ethtool_op_set_tx_csum(dev, data)); } diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 32e4037..5ee1476 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -8318,7 +8318,7 @@ static int tg3_set_tx_csum(struct net_device *dev, u32 data) if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) - ethtool_op_set_tx_hw_csum(dev, data); + ethtool_op_set_tx_ipv6_csum(dev, data); else ethtool_op_set_tx_csum(dev, data); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f2d248f..3a63224 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -265,6 +265,7 @@ u32 ethtool_op_get_link(struct net_device *dev); u32 ethtool_op_get_tx_csum(struct net_device *dev); int ethtool_op_set_tx_csum(struct net_device *dev, u32 data); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data); +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data); u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 8d5e5a0..0b531e9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -52,6 +52,17 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) return 0; } + +int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + + return 0; +} + u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -980,5 +991,6 @@ EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); -- cgit v0.10.2 From d87d8469e2dd19a3a134b99f78288d41854c614b Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:44:23 -0700 Subject: [NETFILTER]: nf_conntrack: Increment error count on parsing IPv4 header Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 89e20ab..b5c4bb5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -131,8 +131,6 @@ ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, */ if ((protoff < 0) || (protoff > (*pskb)->len)) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3d14110..b730413 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -625,6 +625,8 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { pr_debug("not prepared to track yet or error occured\n"); + NF_CT_STAT_INC_ATOMIC(error); + NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } -- cgit v0.10.2 From ffc30690480bdd337e4914302b926d24870b56b2 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:44:50 -0700 Subject: [NETFILTER]: nf_conntrack: make l3proto->prepare() generic and renames it The icmp[v6] l4proto modules parse headers in ICMP[v6] error to get tuple. But they have to find the offset to transport protocol header before that. Their processings are almost same as prepare() of l3proto modules. This makes prepare() more generic to simplify icmp[v6] l4proto module later. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index b4b6049..5a89659 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -7,7 +7,7 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -extern int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, +extern int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len); extern int nf_ct_frag6_init(void); diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 890752d..e3708a6 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -58,11 +58,11 @@ struct nf_conntrack_l3proto /* * Called before tracking. - * *dataoff: offset of protocol header (TCP, UDP,...) in *pskb + * *dataoff: offset of protocol header (TCP, UDP,...) in skb * *protonum: protocol number */ - int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, - unsigned int *dataoff, u_int8_t *protonum); + int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum); int (*tuple_to_nfattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 3c56299..ee29f4e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -78,21 +78,26 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) return skb; } -static int -ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { + struct iphdr _iph, *iph; + + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return -NF_DROP; + /* Never happen */ - if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) { + if (iph->frag_off & htons(IP_OFFSET)) { if (net_ratelimit()) { - printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", - ip_hdr(*pskb)->protocol, hooknum); + printk(KERN_ERR "ipv4_get_l4proto: Frag of proto %u\n", + iph->protocol); } return -NF_DROP; } - *dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb); - *protonum = ip_hdr(*pskb)->protocol; + *dataoff = nhoff + (iph->ihl << 2); + *protonum = iph->protocol; return NF_ACCEPT; } @@ -407,7 +412,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, .print_conntrack = ipv4_print_conntrack, - .prepare = ipv4_prepare, + .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv4_tuple_to_nfattr, .nfattr_to_tuple = ipv4_nfattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b5c4bb5..9b7eaaa 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -86,7 +86,7 @@ static int ipv6_print_conntrack(struct seq_file *s, * - Note also special handling of AUTH header. Thanks to IPsec wizards. */ -int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, +int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len) { u8 nexthdr = *nexthdrp; @@ -117,19 +117,24 @@ int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, return start; } -static int -ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { - unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; - unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; - int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, - (*pskb)->len - extoff); + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); + unsigned char pnum; + int protoff; + + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), + &pnum, sizeof(pnum)) != 0) { + pr_debug("ip6_conntrack_core: can't get nexthdr\n"); + return -NF_ACCEPT; + } + protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); /* - * (protoff == (*pskb)->len) mean that the packet doesn't have no data + * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > (*pskb)->len)) { + if ((protoff < 0) || (protoff > skb->len)) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } @@ -375,7 +380,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .print_conntrack = ipv6_print_conntrack, - .prepare = ipv6_prepare, + .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nfattr = ipv6_tuple_to_nfattr, .nfattr_to_tuple = ipv6_nfattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b730413..5b194e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -622,8 +622,9 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - - if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { + ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), + &dataoff, &protonum); + if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index b1bfa20..0691642 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -61,9 +61,8 @@ static int generic_print_conntrack(struct seq_file *s, return 0; } -static int -generic_prepare(struct sk_buff **pskb, unsigned int hooknum, - unsigned int *dataoff, u_int8_t *protonum) +static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { /* Never track !!! */ return -NF_ACCEPT; @@ -77,6 +76,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, .print_conntrack = generic_print_conntrack, - .prepare = generic_prepare, + .get_l4proto = generic_get_l4proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); -- cgit v0.10.2 From e2a3123fbe58da9fd3f35cd242087896ace6049f Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:45:14 -0700 Subject: [NETFILTER]: nf_conntrack: Introduces nf_ct_get_tuplepr and uses it nf_ct_get_tuple() requires the offset to transport header and that bothers callers such as icmp[v6] l4proto modules. This introduces new function to simplify them. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d4f02eb..810020e 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -186,6 +186,10 @@ extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_conntrack_flush(void); +extern int nf_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + u_int16_t l3num, + struct nf_conntrack_tuple *tuple); extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 0fe8fb0..b8b7999 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -136,40 +136,22 @@ icmp_error_message(struct sk_buff *skb, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } _in, *inside; struct nf_conntrack_l4proto *innerproto; struct nf_conntrack_tuple_hash *h; - int dataoff; NF_CT_ASSERT(skb->nfct == NULL); - /* Not enough header? */ - inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in); - if (inside == NULL) - return -NF_ACCEPT; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside->ip.frag_off & htons(IP_OFFSET)) { - pr_debug("icmp_error_message: fragment of proto %u\n", - inside->ip.protocol); + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + ip_hdrlen(skb) + + sizeof(struct icmphdr), + PF_INET, &origtuple)) { + pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - - dataoff = ip_hdrlen(skb) + sizeof(inside->icmp); - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, - inside->ip.protocol, &origtuple, - &nf_conntrack_l3proto_ipv4, innerproto)) { - pr_debug("icmp_error_message: ! get_tuple p=%u", - inside->ip.protocol); - return -NF_ACCEPT; - } + innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 9defc7e..0fca7e8 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -136,49 +136,23 @@ icmpv6_error_message(struct sk_buff *skb, { struct nf_conntrack_tuple intuple, origtuple; struct nf_conntrack_tuple_hash *h; - struct icmp6hdr _hdr, *hp; - unsigned int inip6off; struct nf_conntrack_l4proto *inproto; - u_int8_t inprotonum; - unsigned int inprotoff; NF_CT_ASSERT(skb->nfct == NULL); - hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); - if (hp == NULL) { - pr_debug("icmpv6_error: Can't get ICMPv6 hdr.\n"); - return -NF_ACCEPT; - } - - inip6off = icmp6off + sizeof(_hdr); - if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), - &inprotonum, sizeof(inprotonum)) != 0) { - pr_debug("icmpv6_error: Can't get nexthdr in inner IPv6 " - "header.\n"); - return -NF_ACCEPT; - } - inprotoff = nf_ct_ipv6_skip_exthdr(skb, - inip6off + sizeof(struct ipv6hdr), - &inprotonum, - skb->len - inip6off - - sizeof(struct ipv6hdr)); - - if ((inprotoff > skb->len) || (inprotonum == NEXTHDR_FRAGMENT)) { - pr_debug("icmpv6_error: Can't get protocol header in ICMPv6 " - "payload.\n"); - return -NF_ACCEPT; - } - - /* rcu_read_lock()ed by nf_hook_slow */ - inproto = __nf_ct_l4proto_find(PF_INET6, inprotonum); - /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, - &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct icmp6hdr), + PF_INET6, &origtuple)) { pr_debug("icmpv6_error: Can't get tuple\n"); return -NF_ACCEPT; } + /* rcu_read_lock()ed by nf_hook_slow */ + inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); + /* Ordinarily, we'd expect the inverted tupleproto, but it's been preserved inside the ICMP. */ if (!nf_ct_invert_tuple(&intuple, &origtuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5b194e3..8cce814 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -113,6 +113,36 @@ nf_ct_get_tuple(const struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_ct_get_tuple); +int nf_ct_get_tuplepr(const struct sk_buff *skb, + unsigned int nhoff, + u_int16_t l3num, + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_l4proto *l4proto; + unsigned int protoff; + u_int8_t protonum; + int ret; + + rcu_read_lock(); + + l3proto = __nf_ct_l3proto_find(l3num); + ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); + if (ret != NF_ACCEPT) { + rcu_read_unlock(); + return 0; + } + + l4proto = __nf_ct_l4proto_find(l3num, protonum); + + ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, + l3proto, l4proto); + + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); + int nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig, -- cgit v0.10.2 From 130e7a83d7ec8c5c673225e0fa8ea37b1ed507a5 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Sat, 14 Jul 2007 20:45:41 -0700 Subject: [NETFILTER]: nf_conntrack: Don't track locally generated special ICMP error The conntrack assigned to locally generated ICMP error is usually the one assigned to the original packet which has caused the error. But if the original packet is handled as invalid by nf_conntrack, no conntrack is assigned to the original packet. Then nf_ct_attach() cannot assign any conntrack to the ICMP error packet. In that case the current nf_conntrack_icmp assigns appropriate conntrack to it. But the current code mistakes the direction of the packet. As a result, NAT code mistakes the address to be mangled. To fix the bug, this changes nf_conntrack_icmp not to assign conntrack to such ICMP error. Actually no address is necessary to be mangled in this case. Spotted by Jordan Russell. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index b8b7999..f965733 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -165,25 +165,13 @@ icmp_error_message(struct sk_buff *skb, h = nf_conntrack_find_get(&innertuple); if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = nf_conntrack_find_get(&origtuple); - - if (!h) { - pr_debug("icmp_error_message: no match\n"); - return -NF_ACCEPT; - } - - /* Reverse direction from that found */ - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + pr_debug("icmp_error_message: no match\n"); + return -NF_ACCEPT; } + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + /* Update skb to refer to this connection */ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; -- cgit v0.10.2 From a887c1c148ffb3eb1c193e9869ca5297c6e22078 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:46:15 -0700 Subject: [NETFILTER]: Lower *tables printk severity Lower ip6tables, arptables and ebtables printk severity similar to Dan Aloni's patch for iptables. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac9984f..4169a2a 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1525,14 +1525,14 @@ static int __init ebtables_init(void) if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; - printk(KERN_NOTICE "Ebtables v2.0 registered\n"); + printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; } static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); - printk(KERN_NOTICE "Ebtables v2.0 unregistered\n"); + printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e981232..d1149ab 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1184,7 +1184,7 @@ static int __init arp_tables_init(void) if (ret < 0) goto err4; - printk("arp_tables: (C) 2002 David S. Miller\n"); + printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); return 0; err4: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 254c769..aeda617 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1497,7 +1497,7 @@ static int __init ip6_tables_init(void) if (ret < 0) goto err5; - printk("ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); + printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); return 0; err5: -- cgit v0.10.2 From 370786f9cfd430cb424f00ce4110e75bb1b95a19 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 14 Jul 2007 20:47:26 -0700 Subject: [NETFILTER]: x_tables: add connlimit match ipt_connlimit has been sitting in POM-NG for a long time. Here is a new shiny xt_connlimit with: * xtables'ified * will request the layer3 module (previously it hotdropped every packet when it was not loaded) * fixed: there was a deadlock in case of an OOM condition * support for any layer4 protocol (e.g. UDP/SCTP) * using jhash, as suggested by Eric Dumazet * ipv6 support Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h new file mode 100644 index 0000000..90ae8b4 --- /dev/null +++ b/include/linux/netfilter/xt_connlimit.h @@ -0,0 +1,17 @@ +#ifndef _XT_CONNLIMIT_H +#define _XT_CONNLIMIT_H + +struct xt_connlimit_data; + +struct xt_connlimit_info { + union { + u_int32_t v4_mask; + u_int32_t v6_mask[4]; + }; + unsigned int limit, inverse; + + /* this needs to be at the end */ + struct xt_connlimit_data *data __attribute__((aligned(8))); +}; + +#endif /* _XT_CONNLIMIT_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index df5e8da..9415b9a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -423,6 +423,13 @@ config NETFILTER_XT_MATCH_CONNBYTES If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_CONNLIMIT + tristate '"connlimit" match support"' + depends on NETFILTER_XTABLES + ---help--- + This match allows you to match against the number of parallel + connections to a server per client IP address (or address block). + config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 58b4245..3e4a16a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o +obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c new file mode 100644 index 0000000..3335dd5 --- /dev/null +++ b/net/netfilter/xt_connlimit.c @@ -0,0 +1,313 @@ +/* + * netfilter module to limit the number of parallel tcp + * connections per IP address. + * (c) 2000 Gerd Knorr + * Nov 2002: Martin Bene : + * only ignore TIME_WAIT or gone connections + * Copyright © Jan Engelhardt , 2007 + * + * based on ... + * + * Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* we will save the tuples of all connections we care about */ +struct xt_connlimit_conn { + struct list_head list; + struct nf_conntrack_tuple tuple; +}; + +struct xt_connlimit_data { + struct list_head iphash[256]; + spinlock_t lock; +}; + +static u_int32_t connlimit_rnd; +static bool connlimit_rnd_inited; + +static inline unsigned int connlimit_iphash(u_int32_t addr) +{ + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } + return jhash_1word(addr, connlimit_rnd) & 0xFF; +} + +static inline unsigned int +connlimit_iphash6(const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask) +{ + union nf_conntrack_address res; + unsigned int i; + + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } + + for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) + res.ip6[i] = addr->ip6[i] & mask->ip6[i]; + + return jhash2(res.ip6, ARRAY_SIZE(res.ip6), connlimit_rnd) & 0xFF; +} + +static inline bool already_closed(const struct nf_conn *conn) +{ + u_int16_t proto = conn->tuplehash[0].tuple.dst.protonum; + + if (proto == IPPROTO_TCP) + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + else + return 0; +} + +static inline unsigned int +same_source_net(const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask, + const union nf_conntrack_address *u3, unsigned int family) +{ + if (family == AF_INET) { + return (addr->ip & mask->ip) == (u3->ip & mask->ip); + } else { + union nf_conntrack_address lh, rh; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { + lh.ip6[i] = addr->ip6[i] & mask->ip6[i]; + rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; + } + + return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)) == 0; + } +} + +static int count_them(struct xt_connlimit_data *data, + const struct nf_conntrack_tuple *tuple, + const union nf_conntrack_address *addr, + const union nf_conntrack_address *mask, + const struct xt_match *match) +{ + struct nf_conntrack_tuple_hash *found; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct nf_conn *found_ct; + struct list_head *hash; + bool addit = true; + int matches = 0; + + + if (match->family == AF_INET6) + hash = &data->iphash[connlimit_iphash6(addr, mask)]; + else + hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; + + read_lock_bh(&nf_conntrack_lock); + + /* check the saved connections */ + list_for_each_entry_safe(conn, tmp, hash, list) { + found = __nf_conntrack_find(&conn->tuple, NULL); + found_ct = NULL; + + if (found != NULL) + found_ct = nf_ct_tuplehash_to_ctrack(found); + + if (found_ct != NULL && + nf_ct_tuple_equal(&conn->tuple, tuple) && + !already_closed(found_ct)) + /* + * Just to be sure we have it only once in the list. + * We should not see tuples twice unless someone hooks + * this into a table without "-p tcp --syn". + */ + addit = false; + + if (found == NULL) { + /* this one is gone */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (already_closed(found_ct)) { + /* + * we do not care about connections which are + * closed already -> ditch it + */ + list_del(&conn->list); + kfree(conn); + continue; + } + + if (same_source_net(addr, mask, &conn->tuple.src.u3, + match->family)) + /* same source network -> be counted! */ + ++matches; + } + + read_unlock_bh(&nf_conntrack_lock); + + if (addit) { + /* save the new connection in our list */ + conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + if (conn == NULL) + return -ENOMEM; + conn->tuple = *tuple; + list_add(&conn->list, hash); + ++matches; + } + + return matches; +} + +static bool connlimit_match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, int offset, + unsigned int protoff, bool *hotdrop) +{ + const struct xt_connlimit_info *info = matchinfo; + union nf_conntrack_address addr, mask; + struct nf_conntrack_tuple tuple; + const struct nf_conntrack_tuple *tuple_ptr = &tuple; + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + int connections; + + ct = nf_ct_get(skb, &ctinfo); + if (ct != NULL) + tuple_ptr = &ct->tuplehash[0].tuple; + else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + match->family, &tuple)) + goto hotdrop; + + if (match->family == AF_INET6) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); + memcpy(&mask.ip6, info->v6_mask, sizeof(info->v6_mask)); + } else { + const struct iphdr *iph = ip_hdr(skb); + addr.ip = iph->saddr; + mask.ip = info->v4_mask; + } + + spin_lock_bh(&info->data->lock); + connections = count_them(info->data, tuple_ptr, &addr, &mask, match); + spin_unlock_bh(&info->data->lock); + + if (connections < 0) { + /* kmalloc failed, drop it entirely */ + *hotdrop = true; + return false; + } + + return (connections > info->limit) ^ info->inverse; + + hotdrop: + *hotdrop = true; + return false; +} + +static bool connlimit_check(const char *tablename, const void *ip, + const struct xt_match *match, void *matchinfo, + unsigned int hook_mask) +{ + struct xt_connlimit_info *info = matchinfo; + unsigned int i; + + if (nf_ct_l3proto_try_module_get(match->family) < 0) { + printk(KERN_WARNING "cannot load conntrack support for " + "address family %u\n", match->family); + return false; + } + + /* init private data */ + info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); + if (info->data == NULL) { + nf_ct_l3proto_module_put(match->family); + return false; + } + + spin_lock_init(&info->data->lock); + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) + INIT_LIST_HEAD(&info->data->iphash[i]); + + return true; +} + +static void connlimit_destroy(const struct xt_match *match, void *matchinfo) +{ + struct xt_connlimit_info *info = matchinfo; + struct xt_connlimit_conn *conn; + struct xt_connlimit_conn *tmp; + struct list_head *hash = info->data->iphash; + unsigned int i; + + nf_ct_l3proto_module_put(match->family); + + for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { + list_for_each_entry_safe(conn, tmp, &hash[i], list) { + list_del(&conn->list); + kfree(conn); + } + } + + kfree(info->data); +} + +static struct xt_match connlimit_reg[] __read_mostly = { + { + .name = "connlimit", + .family = AF_INET, + .checkentry = connlimit_check, + .match = connlimit_match, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_destroy, + .me = THIS_MODULE, + }, + { + .name = "connlimit", + .family = AF_INET6, + .checkentry = connlimit_check, + .match = connlimit_match, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_destroy, + .me = THIS_MODULE, + }, +}; + +static int __init xt_connlimit_init(void) +{ + return xt_register_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); +} + +static void __exit xt_connlimit_exit(void) +{ + xt_unregister_matches(connlimit_reg, ARRAY_SIZE(connlimit_reg)); +} + +module_init(xt_connlimit_init); +module_exit(xt_connlimit_exit); +MODULE_AUTHOR("Jan Engelhardt "); +MODULE_DESCRIPTION("netfilter xt_connlimit match module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_connlimit"); +MODULE_ALIAS("ip6t_connlimit"); -- cgit v0.10.2 From 61075af51f252913401c41fbe94075b46c94e9f1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:48:19 -0700 Subject: [NETFILTER]: nf_conntrack: mark protocols __read_mostly Also remove two unnecessary EXPORT_SYMBOLs and move the nf_conntrack_l3proto_ipv4 declaration to the correct file. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 3ed4e14..7a67160 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -12,6 +12,8 @@ /* Returns new sk_buff, or NULL */ struct sk_buff *nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; + extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e3708a6..3c58a2c 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -89,8 +89,6 @@ extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p); /* Existing built-in protocols */ -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; static inline struct nf_conntrack_l3proto * diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ee29f4e..64552af 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -405,7 +405,7 @@ static struct nf_sockopt_ops so_getorigdst = { .get = &getorigdst, }; -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index f965733..6593fd2 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -312,7 +312,7 @@ static struct ctl_table icmp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, @@ -338,4 +338,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp = #endif #endif }; -EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_icmp); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 9b7eaaa..36df221 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -373,7 +373,7 @@ static int ipv6_nfattr_to_tuple(struct nfattr *tb[], } #endif -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", .pkt_to_tuple = ipv6_pkt_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 0fca7e8..ab154fb 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -276,7 +276,7 @@ static struct ctl_table icmpv6_sysctl_table[] = { }; #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, @@ -297,5 +297,3 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = .ctl_table = icmpv6_sysctl_table, #endif }; - -EXPORT_SYMBOL(nf_conntrack_l4proto_icmpv6); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 0691642..991c52c 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -69,7 +69,7 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } -struct nf_conntrack_l3proto nf_conntrack_l3proto_generic = { +struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 6faf1be..d8b5018 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -98,7 +98,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = +struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .l4proto = 0, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 771c4c2..bdbead8 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -261,7 +261,7 @@ static void gre_destroy(struct nf_conn *ct) } /* protocol helper struct */ -static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, .name = "gre", diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index debfe61..04192ac 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -601,7 +601,7 @@ static struct ctl_table sctp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -622,7 +622,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { #endif }; -struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { +static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1c8206e..87ad3cc 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1372,7 +1372,7 @@ static struct ctl_table tcp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, @@ -1401,7 +1401,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 3620ecc..13d94a0 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -191,7 +191,7 @@ static struct ctl_table udp_compat_sysctl_table[] = { #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, @@ -218,7 +218,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 = }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4); -struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, -- cgit v0.10.2 From 59eecdfb166f6846ae356ddc744abed5820ad965 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:48:44 -0700 Subject: [NETFILTER]: nf_conntrack: UDPLITE support Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9415b9a..3ac39f1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -102,6 +102,16 @@ config NF_CT_PROTO_SCTP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NF_CT_PROTO_UDPLITE + tristate 'UDP-Lite protocol connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on UDP-Lite + connections. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3e4a16a..0c054bf 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o # SCTP protocol connection tracking obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c new file mode 100644 index 0000000..93e747b --- /dev/null +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -0,0 +1,266 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * (C) 2007 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static unsigned int nf_ct_udplite_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_udplite_timeout_stream __read_mostly = 180*HZ; + +static int udplite_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.udp.port = hp->source; + tuple->dst.u.udp.port = hp->dest; + return 1; +} + +static int udplite_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int udplite_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static int udplite_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udplite_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udplite_timeout_stream); + /* Also, more likely to be important, and not a probe */ + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + nf_conntrack_event_cache(IPCT_STATUS, skb); + } else + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udplite_timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udplite_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +static int udplite_error(struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + unsigned int udplen = skb->len - dataoff; + struct udphdr _hdr, *hdr; + unsigned int cscov; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hdr == NULL) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: short packet "); + return -NF_ACCEPT; + } + + cscov = ntohs(hdr->len); + if (cscov == 0) + cscov = udplen; + else if (cscov < sizeof(*hdr) || cscov > udplen) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: invalid checksum coverage "); + return -NF_ACCEPT; + } + + /* UDPLITE mandates checksums */ + if (!hdr->check) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: checksum missing "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. */ + if (nf_conntrack_checksum && !skb_csum_unnecessary(skb) && + ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING))) { + if (pf == PF_INET) { + struct iphdr *iph = ip_hdr(skb); + + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + udplen, IPPROTO_UDPLITE, 0); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + __wsum hsum = skb_checksum(skb, 0, dataoff, 0); + + skb->csum = ~csum_unfold( + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + udplen, IPPROTO_UDPLITE, + csum_sub(0, hsum))); + } + + skb->ip_summed = CHECKSUM_NONE; + if (__skb_checksum_complete_head(skb, dataoff + cscov)) { + if (LOG_INVALID(IPPROTO_UDPLITE)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_udplite: bad UDPLite " + "checksum "); + return -NF_ACCEPT; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + return NF_ACCEPT; +} + +#ifdef CONFIG_SYSCTL +static unsigned int udplite_sysctl_table_users; +static struct ctl_table_header *udplite_sysctl_header; +static struct ctl_table udplite_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_udplite_timeout", + .data = &nf_ct_udplite_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_udplite_timeout_stream", + .data = &nf_ct_udplite_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = 0 + } +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +{ + .l3proto = PF_INET, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .pkt_to_tuple = udplite_pkt_to_tuple, + .invert_tuple = udplite_invert_tuple, + .print_tuple = udplite_print_tuple, + .print_conntrack = udplite_print_conntrack, + .packet = udplite_packet, + .new = udplite_new, + .error = udplite_error, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udplite_sysctl_table_users, + .ctl_table_header = &udplite_sysctl_header, + .ctl_table = udplite_sysctl_table, +#endif +}; + +static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +{ + .l3proto = PF_INET6, + .l4proto = IPPROTO_UDPLITE, + .name = "udplite", + .pkt_to_tuple = udplite_pkt_to_tuple, + .invert_tuple = udplite_invert_tuple, + .print_tuple = udplite_print_tuple, + .print_conntrack = udplite_print_conntrack, + .packet = udplite_packet, + .new = udplite_new, + .error = udplite_error, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nfattr = nf_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = nf_ct_port_nfattr_to_tuple, +#endif +#ifdef CONFIG_SYSCTL + .ctl_table_users = &udplite_sysctl_table_users, + .ctl_table_header = &udplite_sysctl_header, + .ctl_table = udplite_sysctl_table, +#endif +}; + +static int __init nf_conntrack_proto_udplite_init(void) +{ + int err; + + err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite4); + if (err < 0) + goto err1; + err = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udplite6); + if (err < 0) + goto err2; + return 0; +err2: + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +err1: + return err; +} + +static void __exit nf_conntrack_proto_udplite_exit(void) +{ + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite6); + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udplite4); +} + +module_init(nf_conntrack_proto_udplite_init); +module_exit(nf_conntrack_proto_udplite_exit); + +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 0621ed2e4edbe2f6f83dafbf85eecefae7aaf2e8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 14 Jul 2007 20:49:26 -0700 Subject: [NET_SCHED]: Revert "avoid transmit softirq on watchdog wakeup" optimization As noticed by Ranko Zivojnovic , calling qdisc_run from the timer handler can result in deadlock: > CPU#0 > > qdisc_watchdog() fires and gets dev->queue_lock > qdisc_run()...qdisc_restart()... > -> releases dev->queue_lock and enters dev_hard_start_xmit() > > CPU#1 > > tc del qdisc dev ... > qdisc_graft()...dev_graft_qdisc()...dev_deactivate()... > -> grabs dev->queue_lock ... > > qdisc_reset()...{cbq,hfsc,htb,netem,tbf}_reset()...qdisc_watchdog_cancel()... > -> hrtimer_cancel() - waiting for the qdisc_watchdog() to exit, while still > holding dev->queue_lock > > CPU#0 > > dev_hard_start_xmit() returns ... > -> wants to get dev->queue_lock(!) > > DEADLOCK! The entire optimization is a bit questionable IMO, it moves potentially large parts of NET_TX_SOFTIRQ work to TIMER_SOFTIRQ/HRTIMER_SOFTIRQ, which kind of defeats the separation of them. Signed-off-by: Patrick McHardy Acked-by: Ranko Zivojnovic Signed-off-by: David S. Miller diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d92ea26..4fd0bec 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -278,11 +278,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - if (spin_trylock(&dev->queue_lock)) { - qdisc_run(dev); - spin_unlock(&dev->queue_lock); - } else - netif_schedule(dev); + netif_schedule(dev); return HRTIMER_NORESTART; } -- cgit v0.10.2 From 1b1ac759d7c6bba6e5f4731ef6ea720b6636e27c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 14 Jul 2007 20:51:44 -0700 Subject: [IPV4]: Cleanup call to __neigh_lookup() Back in the times of Linux 2.2, negative values for the creat parameter of __neigh_lookup() had a particular meaning, but no longer, so we should pass 1 instead. Signed-off-by: Jean Delvare Signed-off-by: David S. Miller diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e00767e..9ab9d53 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -885,7 +885,7 @@ static int arp_process(struct sk_buff *skb) if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(sip) == RTN_UNICAST) - n = __neigh_lookup(&arp_tbl, &sip, dev, -1); + n = __neigh_lookup(&arp_tbl, &sip, dev, 1); } if (n) { -- cgit v0.10.2 From f13ec93fba60d339dc1663eb47b2fb801225d2d2 Mon Sep 17 00:00:00 2001 From: Dmitry Butskoy Date: Sat, 14 Jul 2007 23:53:08 -0700 Subject: [IPV6]: MSG_ERRQUEUE messages do not pass to connected raw sockets From: Dmitry Butskoy Taken from http://bugzilla.kernel.org/show_bug.cgi?id=8747 Problem Description: It is related to the possibility to obtain MSG_ERRQUEUE messages from the udp and raw sockets, both connected and unconnected. There is a little typo in net/ipv6/icmp.c code, which prevents such messages to be delivered to the errqueue of the correspond raw socket, when the socket is CONNECTED. The typo is due to swap of local/remote addresses. Consider __raw_v6_lookup() function from net/ipv6/raw.c. When a raw socket is looked up usual way, it is something like: sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); where "daddr" is a destination address of the incoming packet (IOW our local address), "saddr" is a source address of the incoming packet (the remote end). But when the raw socket is looked up for some icmp error report, in net/ipv6/icmp.c:icmpv6_notify() , daddr/saddr are obtained from the echoed fragment of the "bad" packet, i.e. "daddr" is the original destination address of that packet, "saddr" is our local address. Hence, for icmpv6_notify() must use "saddr, daddr" in its arguments, not "daddr, saddr" ... Steps to reproduce: Create some raw socket, connect it to an address, and cause some error situation: f.e. set ttl=1 where the remote address is more than 1 hop to reach. Set IPV6_RECVERR . Then send something and wait for the error (f.e. poll() with POLLERR|POLLIN). You should receive "time exceeded" icmp message (because of "ttl=1"), but the socket do not receive it. If you do not connect your raw socket, you will receive MSG_ERRQUEUE successfully. (The reason is that for unconnected socket there are no actual checks for local/remote addresses). Signed-off-by: Andrew Morton Signed-off-by: David S. Miller diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4765a29..6a6714d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -604,7 +604,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) read_lock(&raw_v6_lock); if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { - while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, + while ((sk = __raw_v6_lookup(sk, nexthdr, saddr, daddr, IP6CB(skb)->iif))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); -- cgit v0.10.2 From b0188d4dbe5f4285372dd033acf7c92a97006629 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:01:25 -0700 Subject: [NET_SCHED]: sch_atm: Lindent Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 54b92d2..9b458c4 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -2,7 +2,6 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - #include #include #include @@ -11,12 +10,11 @@ #include #include #include -#include /* for fput */ +#include /* for fput */ #include #include - -extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ +extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #if 0 /* control */ #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) @@ -30,7 +28,6 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ #define D2PRINTK(format,args...) #endif - /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. @@ -52,16 +49,15 @@ extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ * - should lock the flow while there is data in the queue (?) */ - #define PRIV(sch) qdisc_priv(sch) #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - struct atm_flow_data { - struct Qdisc *q; /* FIFO, TBF, etc. */ + struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto *filter_list; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc,struct sk_buff *skb); /* chaining */ + struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ + void (*old_pop)(struct atm_vcc *vcc, + struct sk_buff * skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ @@ -82,76 +78,74 @@ struct atm_qdisc_data { struct tasklet_struct task; /* requeue tasklet */ }; - /* ------------------------- Class/flow operations ------------------------- */ - -static int find_flow(struct atm_qdisc_data *qdisc,struct atm_flow_data *flow) +static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow) { struct atm_flow_data *walk; - DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc,flow); + DPRINTK("find_flow(qdisc %p,flow %p)\n", qdisc, flow); for (walk = qdisc->flows; walk; walk = walk->next) - if (walk == flow) return 1; + if (walk == flow) + return 1; DPRINTK("find_flow: not found\n"); return 0; } - -static __inline__ struct atm_flow_data *lookup_flow(struct Qdisc *sch, - u32 classid) +static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; for (flow = p->flows; flow; flow = flow->next) - if (flow->classid == classid) break; + if (flow->classid == classid) + break; return flow; } - -static int atm_tc_graft(struct Qdisc *sch,unsigned long arg, - struct Qdisc *new,struct Qdisc **old) +static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; - - DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch, - p,flow,new,old); - if (!find_flow(p,flow)) return -EINVAL; - if (!new) new = &noop_qdisc; - *old = xchg(&flow->q,new); - if (*old) qdisc_reset(*old); + struct atm_flow_data *flow = (struct atm_flow_data *)arg; + + DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", + sch, p, flow, new, old); + if (!find_flow(p, flow)) + return -EINVAL; + if (!new) + new = &noop_qdisc; + *old = xchg(&flow->q, new); + if (*old) + qdisc_reset(*old); return 0; } - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch,unsigned long cl) +static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) { - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch,flow); + DPRINTK("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); return flow ? flow->q : NULL; } - -static unsigned long atm_tc_get(struct Qdisc *sch,u32 classid) +static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) { - struct atm_qdisc_data *p __attribute__((unused)) = PRIV(sch); + struct atm_qdisc_data *p __maybe_unused = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch,p,classid); - flow = lookup_flow(sch,classid); - if (flow) flow->ref++; - DPRINTK("atm_tc_get: flow %p\n",flow); - return (unsigned long) flow; + DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); + flow = lookup_flow(sch, classid); + if (flow) + flow->ref++; + DPRINTK("atm_tc_get: flow %p\n", flow); + return (unsigned long)flow; } - static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) + unsigned long parent, u32 classid) { - return atm_tc_get(sch,classid); + return atm_tc_get(sch, classid); } /* @@ -159,72 +153,75 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch, * requested (atm_tc_destroy, etc.). The assumption here is that we never drop * anything that still seems to be in use. */ - static void atm_tc_put(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; struct atm_flow_data **prev; - DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (--flow->ref) return; + DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (--flow->ref) + return; DPRINTK("atm_tc_put: destroying\n"); for (prev = &p->flows; *prev; prev = &(*prev)->next) - if (*prev == flow) break; + if (*prev == flow) + break; if (!*prev) { - printk(KERN_CRIT "atm_tc_put: class %p not found\n",flow); + printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow); return; } *prev = flow->next; - DPRINTK("atm_tc_put: qdisc %p\n",flow->q); + DPRINTK("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); tcf_destroy_chain(flow->filter_list); if (flow->sock) { DPRINTK("atm_tc_put: f_count %d\n", - file_count(flow->sock->file)); + file_count(flow->sock->file)); flow->vcc->pop = flow->old_pop; sockfd_put(flow->sock); } - if (flow->excess) atm_tc_put(sch,(unsigned long) flow->excess); - if (flow != &p->link) kfree(flow); + if (flow->excess) + atm_tc_put(sch, (unsigned long)flow->excess); + if (flow != &p->link) + kfree(flow); /* * If flow == &p->link, the qdisc no longer works at this point and * needs to be removed. (By the caller of atm_tc_put.) */ } - -static void sch_atm_pop(struct atm_vcc *vcc,struct sk_buff *skb) +static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) { struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n",vcc,skb,p); - VCC2FLOW(vcc)->old_pop(vcc,skb); + D2PRINTK("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); + VCC2FLOW(vcc)->old_pop(vcc, skb); tasklet_schedule(&p->task); } static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ + 0xaa, /* DSAP: non-ISO */ + 0xaa, /* SSAP: non-ISO */ + 0x03, /* Ctrl: Unnumbered Information Command PDU */ + 0x00, /* OUI: EtherType */ 0x00, 0x00, - 0x08, 0x00 }; /* Ethertype IP (0800) */ + 0x08, 0x00 +}; /* Ethertype IP (0800) */ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct rtattr **tca, unsigned long *arg) + struct rtattr **tca, unsigned long *arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) *arg; + struct atm_flow_data *flow = (struct atm_flow_data *)*arg; struct atm_flow_data *excess = NULL; - struct rtattr *opt = tca[TCA_OPTIONS-1]; + struct rtattr *opt = tca[TCA_OPTIONS - 1]; struct rtattr *tb[TCA_ATM_MAX]; struct socket *sock; - int fd,error,hdr_len; + int fd, error, hdr_len; void *hdr; DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n",sch,p,classid,parent,flow,opt); + "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); /* * The concept of parents doesn't apply for this qdisc. */ @@ -237,33 +234,36 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, * class needs to be removed and a new one added. (This may be changed * later.) */ - if (flow) return -EBUSY; + if (flow) + return -EBUSY; if (opt == NULL || rtattr_parse_nested(tb, TCA_ATM_MAX, opt)) return -EINVAL; - if (!tb[TCA_ATM_FD-1] || RTA_PAYLOAD(tb[TCA_ATM_FD-1]) < sizeof(fd)) + if (!tb[TCA_ATM_FD - 1] || RTA_PAYLOAD(tb[TCA_ATM_FD - 1]) < sizeof(fd)) return -EINVAL; - fd = *(int *) RTA_DATA(tb[TCA_ATM_FD-1]); - DPRINTK("atm_tc_change: fd %d\n",fd); - if (tb[TCA_ATM_HDR-1]) { - hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR-1]); - hdr = RTA_DATA(tb[TCA_ATM_HDR-1]); - } - else { + fd = *(int *)RTA_DATA(tb[TCA_ATM_FD - 1]); + DPRINTK("atm_tc_change: fd %d\n", fd); + if (tb[TCA_ATM_HDR - 1]) { + hdr_len = RTA_PAYLOAD(tb[TCA_ATM_HDR - 1]); + hdr = RTA_DATA(tb[TCA_ATM_HDR - 1]); + } else { hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ + hdr = NULL; /* default LLC/SNAP for IP */ } - if (!tb[TCA_ATM_EXCESS-1]) excess = NULL; + if (!tb[TCA_ATM_EXCESS - 1]) + excess = NULL; else { - if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS-1]) != sizeof(u32)) + if (RTA_PAYLOAD(tb[TCA_ATM_EXCESS - 1]) != sizeof(u32)) return -EINVAL; - excess = (struct atm_flow_data *) atm_tc_get(sch, - *(u32 *) RTA_DATA(tb[TCA_ATM_EXCESS-1])); - if (!excess) return -ENOENT; + excess = (struct atm_flow_data *) + atm_tc_get(sch, *(u32 *)RTA_DATA(tb[TCA_ATM_EXCESS - 1])); + if (!excess) + return -ENOENT; } DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->rta_type,RTA_PAYLOAD(opt),hdr_len); - if (!(sock = sockfd_lookup(fd,&error))) return error; /* f_count++ */ - DPRINTK("atm_tc_change: f_count %d\n",file_count(sock->file)); + opt->rta_type, RTA_PAYLOAD(opt), hdr_len); + if (!(sock = sockfd_lookup(fd, &error))) + return error; /* f_count++ */ + DPRINTK("atm_tc_change: f_count %d\n", file_count(sock->file)); if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { error = -EPROTOTYPE; goto err_out; @@ -276,37 +276,37 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (find_flow(p,flow)) { + if (find_flow(p, flow)) { error = -EEXIST; goto err_out; } - } - else { + } else { int i; unsigned long cl; for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle,0x8000 | i); - if (!(cl = atm_tc_get(sch,classid))) break; - atm_tc_put(sch,cl); + classid = TC_H_MAKE(sch->handle, 0x8000 | i); + if (!(cl = atm_tc_get(sch, classid))) + break; + atm_tc_put(sch, cl); } } - DPRINTK("atm_tc_change: new id %x\n",classid); - flow = kmalloc(sizeof(struct atm_flow_data)+hdr_len,GFP_KERNEL); - DPRINTK("atm_tc_change: flow %p\n",flow); + DPRINTK("atm_tc_change: new id %x\n", classid); + flow = kmalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); + DPRINTK("atm_tc_change: flow %p\n", flow); if (!flow) { error = -ENOBUFS; goto err_out; } - memset(flow,0,sizeof(*flow)); + memset(flow, 0, sizeof(*flow)); flow->filter_list = NULL; - if (!(flow->q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops,classid))) + if (!(flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid))) flow->q = &noop_qdisc; - DPRINTK("atm_tc_change: qdisc %p\n",flow->q); + DPRINTK("atm_tc_change: qdisc %p\n", flow->q); flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ + flow->vcc = ATM_SD(sock); /* speedup */ flow->vcc->user_back = flow; - DPRINTK("atm_tc_change: vcc %p\n",flow->vcc); + DPRINTK("atm_tc_change: vcc %p\n", flow->vcc); flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; @@ -317,50 +317,53 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, p->link.next = flow; flow->hdr_len = hdr_len; if (hdr) - memcpy(flow->hdr,hdr,hdr_len); + memcpy(flow->hdr, hdr, hdr_len); else - memcpy(flow->hdr,llc_oui_ip,sizeof(llc_oui_ip)); - *arg = (unsigned long) flow; + memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); + *arg = (unsigned long)flow; return 0; err_out: - if (excess) atm_tc_put(sch,(unsigned long) excess); + if (excess) + atm_tc_put(sch, (unsigned long)excess); sockfd_put(sock); return error; } - -static int atm_tc_delete(struct Qdisc *sch,unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; - DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); - if (!find_flow(PRIV(sch),flow)) return -EINVAL; - if (flow->filter_list || flow == &p->link) return -EBUSY; + DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); + if (!find_flow(PRIV(sch), flow)) + return -EINVAL; + if (flow->filter_list || flow == &p->link) + return -EBUSY; /* * Reference count must be 2: one for "keepalive" (set at class * creation), and one for the reference held when calling delete. */ if (flow->ref < 2) { - printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n",flow->ref); + printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); return -EINVAL; } - if (flow->ref > 2) return -EBUSY; /* catch references via excess, etc.*/ - atm_tc_put(sch,arg); + if (flow->ref > 2) + return -EBUSY; /* catch references via excess, etc. */ + atm_tc_put(sch, arg); return 0; } - -static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) +static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch,p,walker); - if (walker->stop) return; + DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); + if (walker->stop) + return; for (flow = p->flows; flow; flow = flow->next) { if (walker->count >= walker->skip) - if (walker->fn(sch,(unsigned long) flow,walker) < 0) { + if (walker->fn(sch, (unsigned long)flow, walker) < 0) { walker->stop = 1; break; } @@ -368,73 +371,74 @@ static void atm_tc_walk(struct Qdisc *sch,struct qdisc_walker *walker) } } - -static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch,unsigned long cl) +static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; - DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch,p,flow); + DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); return flow ? &flow->filter_list : &p->link.filter_list; } - /* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = NULL ; /* @@@ */ + struct atm_flow_data *flow = NULL; /* @@@ */ struct tcf_result res; int result; int ret = NET_XMIT_POLICED; - D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - result = TC_POLICE_OK; /* be nice to gcc */ + D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + result = TC_POLICE_OK; /* be nice to gcc */ if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *) atm_tc_get(sch,skb->priority))) + !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) for (flow = p->flows; flow; flow = flow->next) if (flow->filter_list) { - result = tc_classify(skb,flow->filter_list, - &res); - if (result < 0) continue; - flow = (struct atm_flow_data *) res.class; - if (!flow) flow = lookup_flow(sch,res.classid); + result = tc_classify(skb, flow->filter_list, + &res); + if (result < 0) + continue; + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); break; } - if (!flow) flow = &p->link; + if (!flow) + flow = &p->link; else { if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-)*/ + /*@@@ looks good ... but it's not supposed to work :-) */ #ifdef CONFIG_NET_CLS_POLICE switch (result) { - case TC_POLICE_SHOT: - kfree_skb(skb); - break; - case TC_POLICE_RECLASSIFY: - if (flow->excess) flow = flow->excess; - else { - ATM_SKB(skb)->atm_options |= - ATM_ATMOPT_CLP; - break; - } - /* fall through */ - case TC_POLICE_OK: - /* fall through */ - default: + case TC_POLICE_SHOT: + kfree_skb(skb); + break; + case TC_POLICE_RECLASSIFY: + if (flow->excess) + flow = flow->excess; + else { + ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; break; + } + /* fall through */ + case TC_POLICE_OK: + /* fall through */ + default: + break; } #endif } if ( #ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || + result == TC_POLICE_SHOT || #endif - (ret = flow->q->enqueue(skb,flow->q)) != 0) { + (ret = flow->q->enqueue(skb, flow->q)) != 0) { sch->qstats.drops++; - if (flow) flow->qstats.drops++; + if (flow) + flow->qstats.drops++; return ret; } sch->bstats.bytes += skb->len; @@ -458,7 +462,6 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) return NET_XMIT_BYPASS; } - /* * Dequeue packets and send them over ATM. Note that we quite deliberately * avoid checking net_device's flow control here, simply because sch_atm @@ -466,167 +469,163 @@ static int atm_tc_enqueue(struct sk_buff *skb,struct Qdisc *sch) * non-ATM interfaces. */ - static void sch_atm_dequeue(unsigned long data) { - struct Qdisc *sch = (struct Qdisc *) data; + struct Qdisc *sch = (struct Qdisc *)data; struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; struct sk_buff *skb; - D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); for (flow = p->link.next; flow; flow = flow->next) /* * If traffic is properly shaped, this won't generate nasty * little bursts. Otherwise, it may ... (but that's okay) */ while ((skb = flow->q->dequeue(flow->q))) { - if (!atm_may_send(flow->vcc,skb->truesize)) { - (void) flow->q->ops->requeue(skb,flow->q); + if (!atm_may_send(flow->vcc, skb->truesize)) { + (void)flow->q->ops->requeue(skb, flow->q); break; } - D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow); + D2PRINTK("atm_tc_dequeue: sending on class %p\n", flow); /* remove any LL header somebody else has attached */ skb_pull(skb, skb_network_offset(skb)); if (skb_headroom(skb) < flow->hdr_len) { struct sk_buff *new; - new = skb_realloc_headroom(skb,flow->hdr_len); + new = skb_realloc_headroom(skb, flow->hdr_len); dev_kfree_skb(skb); - if (!new) continue; + if (!new) + continue; skb = new; } D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", skb_network_header(skb), skb->data); ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb,flow->hdr_len),flow->hdr, - flow->hdr_len); + memcpy(skb_push(skb, flow->hdr_len), flow->hdr, + flow->hdr_len); atomic_add(skb->truesize, &sk_atm(flow->vcc)->sk_wmem_alloc); /* atm.atm_options are already set by atm_tc_enqueue */ - (void) flow->vcc->send(flow->vcc,skb); + flow->vcc->send(flow->vcc, skb); } } - static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct sk_buff *skb; - D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch,p); + D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); skb = p->link.q->dequeue(p->link.q); - if (skb) sch->q.qlen--; + if (skb) + sch->q.qlen--; return skb; } - -static int atm_tc_requeue(struct sk_buff *skb,struct Qdisc *sch) +static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); int ret; - D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n",skb,sch,p); - ret = p->link.q->ops->requeue(skb,p->link.q); + D2PRINTK("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); + ret = p->link.q->ops->requeue(skb, p->link.q); if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else { + sch->q.qlen++; + sch->qstats.requeues++; + } else { sch->qstats.drops++; p->link.qstats.drops++; } return ret; } - static unsigned int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; unsigned int len; - DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); for (flow = p->flows; flow; flow = flow->next) if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) return len; return 0; } - -static int atm_tc_init(struct Qdisc *sch,struct rtattr *opt) +static int atm_tc_init(struct Qdisc *sch, struct rtattr *opt) { struct atm_qdisc_data *p = PRIV(sch); - DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); + DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); p->flows = &p->link; - if(!(p->link.q = qdisc_create_dflt(sch->dev,&pfifo_qdisc_ops, - sch->handle))) + if (!(p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, + sch->handle))) p->link.q = &noop_qdisc; - DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p->link,p->link.q); + DPRINTK("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); p->link.filter_list = NULL; p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; p->link.ref = 1; p->link.next = NULL; - tasklet_init(&p->task,sch_atm_dequeue,(unsigned long) sch); + tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; } - static void atm_tc_reset(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch,p); - for (flow = p->flows; flow; flow = flow->next) qdisc_reset(flow->q); + DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + qdisc_reset(flow->q); sch->q.qlen = 0; } - static void atm_tc_destroy(struct Qdisc *sch) { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow; - DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p); + DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); /* races ? */ while ((flow = p->flows)) { tcf_destroy_chain(flow->filter_list); flow->filter_list = NULL; if (flow->ref > 1) - printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, - flow->ref); - atm_tc_put(sch,(unsigned long) flow); + printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, + flow->ref); + atm_tc_put(sch, (unsigned long)flow); if (p->flows == flow) { printk(KERN_ERR "atm_destroy: putting flow %p didn't " - "kill it\n",flow); - p->flows = flow->next; /* brute force */ + "kill it\n", flow); + p->flows = flow->next; /* brute force */ break; } } tasklet_kill(&p->task); } - static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { struct atm_qdisc_data *p = PRIV(sch); - struct atm_flow_data *flow = (struct atm_flow_data *) cl; + struct atm_flow_data *flow = (struct atm_flow_data *)cl; unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch,p,flow,skb,tcm); - if (!find_flow(p,flow)) return -EINVAL; + sch, p, flow, skb, tcm); + if (!find_flow(p, flow)) + return -EINVAL; tcm->tcm_handle = flow->classid; tcm->tcm_info = flow->q->handle; - rta = (struct rtattr *) b; - RTA_PUT(skb,TCA_OPTIONS,0,NULL); - RTA_PUT(skb,TCA_ATM_HDR,flow->hdr_len,flow->hdr); + rta = (struct rtattr *)b; + RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + RTA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr); if (flow->vcc) { struct sockaddr_atmpvc pvc; int state; @@ -635,16 +634,16 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; pvc.sap_addr.vci = flow->vcc->vci; - RTA_PUT(skb,TCA_ATM_ADDR,sizeof(pvc),&pvc); + RTA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc); state = ATM_VF2VS(flow->vcc->flags); - RTA_PUT(skb,TCA_ATM_STATE,sizeof(state),&state); + RTA_PUT(skb, TCA_ATM_STATE, sizeof(state), &state); } if (flow->excess) - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(u32),&flow->classid); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(u32), &flow->classid); else { static u32 zero; - RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); + RTA_PUT(skb, TCA_ATM_EXCESS, sizeof(zero), &zero); } rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; @@ -655,9 +654,9 @@ rtattr_failure: } static int atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) + struct gnet_dump *d) { - struct atm_flow_data *flow = (struct atm_flow_data *) arg; + struct atm_flow_data *flow = (struct atm_flow_data *)arg; flow->qstats.qlen = flow->q->q.qlen; @@ -674,38 +673,35 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) } static struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .get = atm_tc_get, - .put = atm_tc_put, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_chain = atm_tc_find_tcf, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, + .graft = atm_tc_graft, + .leaf = atm_tc_leaf, + .get = atm_tc_get, + .put = atm_tc_put, + .change = atm_tc_change, + .delete = atm_tc_delete, + .walk = atm_tc_walk, + .tcf_chain = atm_tc_find_tcf, + .bind_tcf = atm_tc_bind_filter, + .unbind_tcf = atm_tc_put, + .dump = atm_tc_dump_class, + .dump_stats = atm_tc_dump_class_stats, }; static struct Qdisc_ops atm_qdisc_ops = { - .next = NULL, - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .requeue = atm_tc_requeue, - .drop = atm_tc_drop, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .change = NULL, - .dump = atm_tc_dump, - .owner = THIS_MODULE, + .cl_ops = &atm_class_ops, + .id = "atm", + .priv_size = sizeof(struct atm_qdisc_data), + .enqueue = atm_tc_enqueue, + .dequeue = atm_tc_dequeue, + .requeue = atm_tc_requeue, + .drop = atm_tc_drop, + .init = atm_tc_init, + .reset = atm_tc_reset, + .destroy = atm_tc_destroy, + .dump = atm_tc_dump, + .owner = THIS_MODULE, }; - static int __init atm_init(void) { return register_qdisc(&atm_qdisc_ops); -- cgit v0.10.2 From 9210080445b0c51a73b488750a26eb17177d8684 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:01:49 -0700 Subject: [NET_SCHED]: sch_atm: act_api support Handle act_api classification results. The ATM scheduler behaves slightly different than other schedulers in that it only handles policer results for successful classifications, this behaviour is retained for the act_api case. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 9b458c4..ccee10d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -411,11 +411,21 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_POLICE +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + goto drop; + } +#elif defined(CONFIG_NET_CLS_POLICE) switch (result) { case TC_POLICE_SHOT: kfree_skb(skb); - break; + goto drop; case TC_POLICE_RECLASSIFY: if (flow->excess) flow = flow->excess; @@ -431,11 +441,8 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } #endif } - if ( -#ifdef CONFIG_NET_CLS_POLICE - result == TC_POLICE_SHOT || -#endif - (ret = flow->q->enqueue(skb, flow->q)) != 0) { + if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { +drop: __maybe_unused sch->qstats.drops++; if (flow) flow->qstats.drops++; -- cgit v0.10.2 From f6853e2df3de82c1dac8f62ddcf3a8dfa302419e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:02:10 -0700 Subject: [NET_SCHED]: sch_dsmark: act_api support Handle act_api classification results. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 4d2c233..2d7e891 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -237,25 +237,34 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) D2PRINTK("result %d class 0x%04x\n", result, res.classid); switch (result) { -#ifdef CONFIG_NET_CLS_POLICE - case TC_POLICE_SHOT: - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_POLICED; +#ifdef CONFIG_NET_CLS_ACT + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + kfree_skb(skb); + return NET_XMIT_SUCCESS; + case TC_ACT_SHOT: + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_BYPASS; +#elif defined(CONFIG_NET_CLS_POLICE) + case TC_POLICE_SHOT: + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_POLICED; #if 0 - case TC_POLICE_RECLASSIFY: - /* FIXME: what to do here ??? */ + case TC_POLICE_RECLASSIFY: + /* FIXME: what to do here ??? */ #endif #endif - case TC_POLICE_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - case TC_POLICE_UNSPEC: - /* fall through */ - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; + case TC_POLICE_OK: + skb->tc_index = TC_H_MIN(res.classid); + break; + case TC_POLICE_UNSPEC: + /* fall through */ + default: + if (p->default_index != NO_DEFAULT_INDEX) + skb->tc_index = p->default_index; + break; } } -- cgit v0.10.2 From 73ca4918fbb98311421259d82ef4ab44feeace43 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:02:31 -0700 Subject: [NET_SCHED]: act_api: qdisc internal reclassify support The behaviour of NET_CLS_POLICE for TC_POLICE_RECLASSIFY was to return it to the qdisc, which could handle it internally or ignore it. With NET_CLS_ACT however, tc_classify starts over at the first classifier and never returns it to the qdisc. This makes it impossible to support qdisc-internal reclassification, which in turn makes it impossible to remove the old NET_CLS_POLICE code without breaking compatibility since we have two qdiscs (CBQ and ATM) that support this. This patch adds a tc_classify_compat function that handles reclassification the old way and changes CBQ and ATM to use it. This again is of course not fully backwards compatible with the previous NET_CLS_ACT behaviour. Unfortunately there is no way to fully maintain compatibility *and* support qdisc internal reclassification with NET_CLS_ACT, but this seems like the better choice over keeping the two incompatible options around forever. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5754d53..9e22526 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -89,8 +89,10 @@ static inline void qdisc_run(struct net_device *dev) __qdisc_run(dev); } +extern int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res); extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res); + struct tcf_result *res); /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 1b8e351..0153cd9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -290,7 +290,7 @@ static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) { sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) goto drop; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4fd0bec..13c09bc 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1145,47 +1145,57 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) to this qdisc, (optionally) tests for protocol and asks specific classifiers. */ +int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + __be16 protocol = skb->protocol; + int err = 0; + + for (; tp; tp = tp->next) { + if ((tp->protocol == protocol || + tp->protocol == htons(ETH_P_ALL)) && + (err = tp->classify(skb, tp, res)) >= 0) { +#ifdef CONFIG_NET_CLS_ACT + if (err != TC_ACT_RECLASSIFY && skb->tc_verd) + skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); +#endif + return err; + } + } + return -1; +} +EXPORT_SYMBOL(tc_classify_compat); + int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { int err = 0; - __be16 protocol = skb->protocol; + __be16 protocol; #ifdef CONFIG_NET_CLS_ACT struct tcf_proto *otp = tp; reclassify: #endif protocol = skb->protocol; - for ( ; tp; tp = tp->next) { - if ((tp->protocol == protocol || - tp->protocol == htons(ETH_P_ALL)) && - (err = tp->classify(skb, tp, res)) >= 0) { + err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT - if ( TC_ACT_RECLASSIFY == err) { - __u32 verd = (__u32) G_TC_VERD(skb->tc_verd); - tp = otp; - - if (MAX_REC_LOOP < verd++) { - printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n", - tp->prio&0xffff, ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd); - goto reclassify; - } else { - if (skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd,0); - return err; - } -#else - - return err; -#endif + if (err == TC_ACT_RECLASSIFY) { + u32 verd = G_TC_VERD(skb->tc_verd); + tp = otp; + + if (verd++ >= MAX_REC_LOOP) { + printk("rule prio %u protocol %02x reclassify loop, " + "packet dropped\n", + tp->prio&0xffff, ntohs(tp->protocol)); + return TC_ACT_SHOT; } - + skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); + goto reclassify; } - return -1; +#endif + return err; } +EXPORT_SYMBOL(tc_classify); void tcf_destroy(struct tcf_proto *tp) { @@ -1252,4 +1262,3 @@ EXPORT_SYMBOL(qdisc_get_rtab); EXPORT_SYMBOL(qdisc_put_rtab); EXPORT_SYMBOL(register_qdisc); EXPORT_SYMBOL(unregister_qdisc); -EXPORT_SYMBOL(tc_classify); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ccee10d..37ae6d1 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -396,8 +396,9 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) for (flow = p->flows; flow; flow = flow->next) if (flow->filter_list) { - result = tc_classify(skb, flow->filter_list, - &res); + result = tc_classify_compat(skb, + flow->filter_list, + &res); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; @@ -420,6 +421,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) case TC_ACT_SHOT: kfree_skb(skb); goto drop; + case TC_POLICE_RECLASSIFY: + if (flow->excess) + flow = flow->excess; + else + ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; + break; } #elif defined(CONFIG_NET_CLS_POLICE) switch (result) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b184c35..77381f1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -82,7 +82,7 @@ struct cbq_class unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) unsigned char police; #endif @@ -154,7 +154,7 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) struct cbq_class *rx_class; #endif struct cbq_class *tx_class; @@ -196,7 +196,7 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) return NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) @@ -247,7 +247,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 2+n. Apply classifier. */ - if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0) + if (!head->filter_list || + (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) goto fallback; if ((cl = (void*)res.class) == NULL) { @@ -267,6 +268,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; + case TC_ACT_RECLASSIFY: + return cbq_reclassify(skb, cl); } #elif defined(CONFIG_NET_CLS_POLICE) switch (result) { @@ -389,7 +392,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = cl; #endif if (cl == NULL) { @@ -399,7 +402,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) cl->q->__parent = sch; #endif if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { @@ -434,7 +437,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_mark_toplevel(q, cl); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = cl; cl->q->__parent = sch; #endif @@ -670,7 +673,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { @@ -1364,7 +1367,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) return 0; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; @@ -1532,7 +1535,7 @@ rtattr_failure: return -1; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1558,7 +1561,7 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || cbq_dump_ovl(skb, cl) < 0 || -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) cbq_dump_police(skb, cl) < 0 || #endif cbq_dump_fopt(skb, cl) < 0) @@ -1653,7 +1656,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->classid)) == NULL) return -ENOBUFS; } else { -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (cl->police == TC_POLICE_RECLASSIFY) new->reshape_fail = cbq_reshape_fail; #endif @@ -1718,7 +1721,7 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) q->rx_class = NULL; #endif /* @@ -1747,7 +1750,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) struct cbq_class *cl = (struct cbq_class*)arg; if (--cl->refcnt == 0) { -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(&sch->dev->queue_lock); @@ -1795,7 +1798,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) return -EINVAL; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1] && RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) return -EINVAL; @@ -1838,7 +1841,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1931,7 +1934,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1975,7 +1978,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) q->tx_class = NULL; q->tx_borrowed = NULL; } -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (q->rx_class == cl) q->rx_class = NULL; #endif diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 22e431d..b8b3345 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -125,7 +125,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (skb->len > q->max_size) { sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_POLICE +#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) #endif kfree_skb(skb); -- cgit v0.10.2 From c3bc7cff8fddb6ff9715be8bfc3d911378c4d69d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 15 Jul 2007 00:03:05 -0700 Subject: [NET_SCHED]: Kill CONFIG_NET_CLS_POLICE The NET_CLS_ACT option is now a full replacement for NET_CLS_POLICE, remove the old code. The config option will be kept around to select the equivalent NET_CLS_ACT options for a short time to allow easier upgrades. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller diff --git a/include/net/act_api.h b/include/net/act_api.h index 2f0273f..68b4eaf 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -121,34 +121,4 @@ extern int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, in extern int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); extern int tcf_action_copy_stats (struct sk_buff *,struct tc_action *, int); #endif /* CONFIG_NET_CLS_ACT */ - -extern int tcf_police(struct sk_buff *skb, struct tcf_police *p); -extern void tcf_police_destroy(struct tcf_police *p); -extern struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est); -extern int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p); -extern int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p); - -static inline int -tcf_police_release(struct tcf_police *p, int bind) -{ - int ret = 0; -#ifdef CONFIG_NET_CLS_ACT - if (p) { - if (bind) - p->tcf_bindcnt--; - - p->tcf_refcnt--; - if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { - tcf_police_destroy(p); - ret = 1; - } - } -#else - if (p && --p->tcf_refcnt == 0) - tcf_police_destroy(p); - -#endif /* CONFIG_NET_CLS_ACT */ - return ret; -} - #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6c29920..7968b1d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -65,8 +65,6 @@ struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT struct tc_action *action; -#elif defined CONFIG_NET_CLS_POLICE - struct tcf_police *police; #endif }; @@ -91,8 +89,6 @@ tcf_exts_is_predicative(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT return !!exts->action; -#elif defined CONFIG_NET_CLS_POLICE - return !!exts->police; #else return 0; #endif @@ -129,11 +125,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, #ifdef CONFIG_NET_CLS_ACT if (exts->action) return tcf_action_exec(skb, exts->action, res); -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) - return tcf_police(skb, exts->police); #endif - return 0; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 0153cd9..8a67f24 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -290,7 +290,7 @@ static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) { sch->qstats.drops++; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) goto drop; diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b466288..d3f7c3f 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -472,12 +472,12 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" - depends on NET_CLS_ACT!=y + select NET_CLS_ACT + select NET_ACT_POLICE ---help--- Say Y here if you want to do traffic policing, i.e. strict - bandwidth limiting. This option is obsoleted by the traffic - policer implemented as action, it stays here for compatibility - reasons. + bandwidth limiting. This option is obsolete and just selects + the option replacing it. It will be removed in the future. config NET_CLS_IND bool "Incoming device classification" diff --git a/net/sched/Makefile b/net/sched/Makefile index 020767a..b67c36f 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o -obj-$(CONFIG_NET_CLS_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o diff --git a/net/sched/act_police.c b/net/sched/act_police.c index d204038..bf90e60f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -50,7 +50,6 @@ struct tc_police_compat /* Each policer is serialized by its individual spinlock */ -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { @@ -96,9 +95,8 @@ rtattr_failure: nlmsg_trim(skb, r); goto done; } -#endif -void tcf_police_destroy(struct tcf_police *p) +static void tcf_police_destroy(struct tcf_police *p) { unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); struct tcf_common **p1p; @@ -121,7 +119,6 @@ void tcf_police_destroy(struct tcf_police *p) BUG_TRAP(0); } -#ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { @@ -247,10 +244,19 @@ failure: static int tcf_act_police_cleanup(struct tc_action *a, int bind) { struct tcf_police *p = a->priv; + int ret = 0; - if (p != NULL) - return tcf_police_release(p, bind); - return 0; + if (p != NULL) { + if (bind) + p->tcf_bindcnt--; + + p->tcf_refcnt--; + if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { + tcf_police_destroy(p); + ret = 1; + } + } + return ret; } static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, @@ -372,229 +378,3 @@ police_cleanup_module(void) module_init(police_init_module); module_exit(police_cleanup_module); - -#else /* CONFIG_NET_CLS_ACT */ - -static struct tcf_common *tcf_police_lookup(u32 index) -{ - struct tcf_hashinfo *hinfo = &police_hash_info; - struct tcf_common *p; - - read_lock(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { - if (p->tcfc_index == index) - break; - } - read_unlock(hinfo->lock); - - return p; -} - -static u32 tcf_police_new_index(void) -{ - u32 *idx_gen = &police_idx_gen; - u32 val = *idx_gen; - - do { - if (++val == 0) - val = 1; - } while (tcf_police_lookup(val)); - - return (*idx_gen = val); -} - -struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) -{ - unsigned int h; - struct tcf_police *police; - struct rtattr *tb[TCA_POLICE_MAX]; - struct tc_police *parm; - int size; - - if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) - return NULL; - - if (tb[TCA_POLICE_TBF-1] == NULL) - return NULL; - size = RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]); - if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) - return NULL; - - parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - - if (parm->index) { - struct tcf_common *pc; - - pc = tcf_police_lookup(parm->index); - if (pc) { - police = to_police(pc); - police->tcf_refcnt++; - return police; - } - } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (unlikely(!police)) - return NULL; - - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (parm->rate.rate) { - police->tcfp_R_tab = - qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (police->tcfp_R_tab == NULL) - goto failure; - if (parm->peakrate.rate) { - police->tcfp_P_tab = - qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (police->tcfp_P_tab == NULL) - goto failure; - } - } - if (tb[TCA_POLICE_RESULT-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) - goto failure; - police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - } - if (tb[TCA_POLICE_AVRATE-1]) { - if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) - goto failure; - police->tcfp_ewma_rate = - *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); - } - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; - } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - police->tcfp_t_c = psched_get_time(); - police->tcf_index = parm->index ? parm->index : - tcf_police_new_index(); - police->tcf_action = parm->action; - if (est) - gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - &police->tcf_lock, est); - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; - write_unlock_bh(&police_lock); - return police; - -failure: - if (police->tcfp_R_tab) - qdisc_put_rtab(police->tcfp_R_tab); - kfree(police); - return NULL; -} - -int tcf_police(struct sk_buff *skb, struct tcf_police *police) -{ - psched_time_t now; - long toks; - long ptoks = 0; - - spin_lock(&police->tcf_lock); - - police->tcf_bstats.bytes += skb->len; - police->tcf_bstats.packets++; - - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; - } - if (skb->len <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - - now = psched_get_time(); - toks = psched_tdiff_bounded(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { - ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); - } - toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) - toks = police->tcfp_burst; - toks -= L2T(police, skb->len); - if ((toks|ptoks) >= 0) { - police->tcfp_t_c = now; - police->tcfp_toks = toks; - police->tcfp_ptoks = ptoks; - spin_unlock(&police->tcf_lock); - return police->tcfp_result; - } - } - - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; -} -EXPORT_SYMBOL(tcf_police); - -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); - RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (police->tcfp_result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), - &police->tcfp_result); - if (police->tcfp_ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); - return skb->len; - -rtattr_failure: - nlmsg_trim(skb, b); - return -1; -} - -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) -{ - struct gnet_dump d; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, &police->tcf_lock, - &d) < 0) - goto errout; - - if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || - gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || - gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) - goto errout; - - if (gnet_stats_finish_copy(&d) < 0) - goto errout; - - return 0; - -errout: - return -1; -} - -#endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 36b72aa..5f0fbca 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -458,11 +458,6 @@ tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts) tcf_action_destroy(exts->action, TCA_ACT_UNBIND); exts->action = NULL; } -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) { - tcf_police_release(exts->police, TCA_ACT_UNBIND); - exts->police = NULL; - } #endif } @@ -496,17 +491,6 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb, exts->action = act; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && tb[map->police-1]) { - struct tcf_police *p; - - p = tcf_police_locate(tb[map->police-1], rate_tlv); - if (p == NULL) - return -EINVAL; - - exts->police = p; - } else if (map->action && tb[map->action-1]) - return -EOPNOTSUPP; #else if ((map->action && tb[map->action-1]) || (map->police && tb[map->police-1])) @@ -529,15 +513,6 @@ tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, if (act) tcf_action_destroy(act, TCA_ACT_UNBIND); } -#elif defined CONFIG_NET_CLS_POLICE - if (src->police) { - struct tcf_police *p; - tcf_tree_lock(tp); - p = xchg(&dst->police, src->police); - tcf_tree_unlock(tp); - if (p) - tcf_police_release(p, TCA_ACT_UNBIND); - } #endif } @@ -566,17 +541,6 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } } -#elif defined CONFIG_NET_CLS_POLICE - if (map->police && exts->police) { - struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); - - RTA_PUT(skb, map->police, 0, NULL); - - if (tcf_police_dump(skb, exts->police) < 0) - goto rtattr_failure; - - p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; - } #endif return 0; rtattr_failure: __attribute__ ((unused)) @@ -591,10 +555,6 @@ tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts, if (exts->action) if (tcf_action_copy_stats(skb, exts->action, 1) < 0) goto rtattr_failure; -#elif defined CONFIG_NET_CLS_POLICE - if (exts->police) - if (tcf_police_dump_stats(skb, exts->police) < 0) - goto rtattr_failure; #endif return 0; rtattr_failure: __attribute__ ((unused)) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 77961e2..8dbe369 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -782,9 +782,6 @@ static int __init init_u32(void) #ifdef CONFIG_CLS_U32_PERF printk(" Performance counters on\n"); #endif -#ifdef CONFIG_NET_CLS_POLICE - printk(" OLD policer on \n"); -#endif #ifdef CONFIG_NET_CLS_IND printk(" input device check on \n"); #endif diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 37ae6d1..417ec8f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -428,26 +428,9 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; break; } -#elif defined(CONFIG_NET_CLS_POLICE) - switch (result) { - case TC_POLICE_SHOT: - kfree_skb(skb); - goto drop; - case TC_POLICE_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else { - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } - /* fall through */ - case TC_POLICE_OK: - /* fall through */ - default: - break; - } #endif } + if ((ret = flow->q->enqueue(skb, flow->q)) != 0) { drop: __maybe_unused sch->qstats.drops++; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 77381f1..e38c283 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -82,7 +82,7 @@ struct cbq_class unsigned char priority2; /* priority to be used after overlimit */ unsigned char ewma_log; /* time constant for idle time calculation */ unsigned char ovl_strategy; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT unsigned char police; #endif @@ -154,7 +154,7 @@ struct cbq_sched_data struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes with backlog */ -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT struct cbq_class *rx_class; #endif struct cbq_class *tx_class; @@ -196,7 +196,7 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) return NULL; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) @@ -271,15 +271,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } -#elif defined(CONFIG_NET_CLS_POLICE) - switch (result) { - case TC_POLICE_RECLASSIFY: - return cbq_reclassify(skb, cl); - case TC_POLICE_SHOT: - return NULL; - default: - break; - } #endif if (cl->level == 0) return cl; @@ -392,7 +383,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; struct cbq_class *cl = cbq_classify(skb, sch, &ret); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; #endif if (cl == NULL) { @@ -402,7 +393,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT cl->q->__parent = sch; #endif if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { @@ -437,7 +428,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) cbq_mark_toplevel(q, cl); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = cl; cl->q->__parent = sch; #endif @@ -672,9 +663,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) return HRTIMER_NORESTART; } - -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) - +#ifdef CONFIG_NET_CLS_ACT static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) { int len = skb->len; @@ -1367,7 +1356,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) return 0; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p) { cl->police = p->police; @@ -1535,7 +1524,7 @@ rtattr_failure: return -1; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1561,7 +1550,7 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) cbq_dump_rate(skb, cl) < 0 || cbq_dump_wrr(skb, cl) < 0 || cbq_dump_ovl(skb, cl) < 0 || -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT cbq_dump_police(skb, cl) < 0 || #endif cbq_dump_fopt(skb, cl) < 0) @@ -1656,7 +1645,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, cl->classid)) == NULL) return -ENOBUFS; } else { -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (cl->police == TC_POLICE_RECLASSIFY) new->reshape_fail = cbq_reshape_fail; #endif @@ -1721,7 +1710,7 @@ cbq_destroy(struct Qdisc* sch) struct cbq_class *cl; unsigned h; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT q->rx_class = NULL; #endif /* @@ -1750,7 +1739,7 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg) struct cbq_class *cl = (struct cbq_class*)arg; if (--cl->refcnt == 0) { -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT struct cbq_sched_data *q = qdisc_priv(sch); spin_lock_bh(&sch->dev->queue_lock); @@ -1798,7 +1787,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt)) return -EINVAL; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1] && RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police)) return -EINVAL; @@ -1841,7 +1830,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1934,7 +1923,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY-1]) cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1])); -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (tb[TCA_CBQ_POLICE-1]) cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1])); #endif @@ -1978,7 +1967,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) q->tx_class = NULL; q->tx_borrowed = NULL; } -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (q->rx_class == cl) q->rx_class = NULL; #endif diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2d7e891..60f8919 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -246,21 +246,10 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) kfree_skb(skb); sch->qstats.drops++; return NET_XMIT_BYPASS; -#elif defined(CONFIG_NET_CLS_POLICE) - case TC_POLICE_SHOT: - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_POLICED; -#if 0 - case TC_POLICE_RECLASSIFY: - /* FIXME: what to do here ??? */ -#endif #endif - case TC_POLICE_OK: + case TC_ACT_OK: skb->tc_index = TC_H_MIN(res.classid); break; - case TC_POLICE_UNSPEC: - /* fall through */ default: if (p->default_index != NO_DEFAULT_INDEX) skb->tc_index = p->default_index; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 874452c..55e7e45 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1174,9 +1174,6 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return NULL; #endif if ((cl = (struct hfsc_class *)res.class) == NULL) { if ((cl = hfsc_find_class(res.classid, sch)) == NULL) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index b417a95..246a2f9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -249,9 +249,6 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_SHOT: return NULL; } -#elif defined(CONFIG_NET_CLS_POLICE) - if (result == TC_POLICE_SHOT) - return HTB_DIRECT; #endif if ((cl = (void *)res.class) == NULL) { if (res.classid == sch->handle) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cd0aab6..51f16b0 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -164,31 +164,12 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) result = TC_ACT_OK; break; } -/* backward compat */ -#else -#ifdef CONFIG_NET_CLS_POLICE - switch (result) { - case TC_POLICE_SHOT: - result = NF_DROP; - sch->qstats.drops++; - break; - case TC_POLICE_RECLASSIFY: /* DSCP remarking here ? */ - case TC_POLICE_OK: - case TC_POLICE_UNSPEC: - default: - sch->bstats.packets++; - sch->bstats.bytes += skb->len; - result = NF_ACCEPT; - break; - } - #else D2PRINTK("Overriding result to ACCEPT\n"); result = NF_ACCEPT; sch->bstats.packets++; sch->bstats.bytes += skb->len; #endif -#endif return result; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b8b3345..8c2639a 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -125,7 +125,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) if (skb->len > q->max_size) { sch->qstats.drops++; -#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE) +#ifdef CONFIG_NET_CLS_ACT if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) #endif kfree_skb(skb); -- cgit v0.10.2 From 063ed369c97f8de4cce23bf93bebd7ffacb542ff Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sun, 15 Jul 2007 00:16:35 -0700 Subject: [IPV6]: Call inet6addr_chain notifiers on link down Currently if the link is brought down via ip link or ifconfig down, the inet6addr_chain notifiers are not called even though all the addresses are removed from the interface. This caused SCTP to add duplicate addresses to it's list. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24424c3..0601292 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2475,6 +2475,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_unlock_bh(&idev->lock); __ipv6_ifa_notify(RTM_DELADDR, ifa); + atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); in6_ifa_put(ifa); write_lock_bh(&idev->lock); -- cgit v0.10.2 From 0a9f2a467d8dacaf7e97469dba99ed2d07287d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 15 Jul 2007 00:19:29 -0700 Subject: [TCP]: Verify the presence of RETRANS bit when leaving FRTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For yet unknown reason, something cleared SACKED_RETRANS bit underneath FRTO. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 69f9f1e..4e5884a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1398,7 +1398,9 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) * waiting for the first ACK and did not get it)... */ if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { - tp->retrans_out += tcp_skb_pcount(skb); + /* For some reason this R-bit might get cleared? */ + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out += tcp_skb_pcount(skb); /* ...enter this if branch just for the first segment */ flag |= FLAG_DATA_ACKED; } else { -- cgit v0.10.2