diff options
author | Scott Wood <scottwood@freescale.com> | 2015-02-13 22:30:07 (GMT) |
---|---|---|
committer | Scott Wood <scottwood@freescale.com> | 2015-02-13 22:30:07 (GMT) |
commit | e5fc662387b40984ed4b7a14477cc659f4d80b94 (patch) | |
tree | cf60ec2e653100003e59216306e241a4485c7d4f | |
parent | fa2b857e85998a6843c1a6843446ff00067fa3a4 (diff) | |
parent | 38bff34aef9a92ba18386e801604824856883b32 (diff) | |
download | linux-fsl-qoriq-e5fc662387b40984ed4b7a14477cc659f4d80b94.tar.xz |
Merge branch 'rtmerge'
Signed-off-by: Scott Wood <scottwood@freescale.com>
Conflicts:
arch/arm/kvm/mmu.c
arch/arm/mm/proc-v7-3level.S
arch/powerpc/kernel/vdso32/getcpu.S
drivers/crypto/caam/error.c
drivers/crypto/caam/sg_sw_sec4.h
drivers/usb/host/ehci-fsl.c
1974 files changed, 26317 insertions, 12605 deletions
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index f1c5cc9..4c3efe4 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -23,7 +23,7 @@ Description: [fowner]] lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] - option: [[appraise_type=]] + option: [[appraise_type=]] [permit_directio] base: func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK] mask:= [MAY_READ] [MAY_WRITE] [MAY_APPEND] [MAY_EXEC] diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt index 14129f1..5e98303 100644 --- a/Documentation/DMA-API-HOWTO.txt +++ b/Documentation/DMA-API-HOWTO.txt @@ -101,14 +101,23 @@ style to do this even if your device holds the default setting, because this shows that you did think about these issues wrt. your device. -The query is performed via a call to dma_set_mask(): +The query is performed via a call to dma_set_mask_and_coherent(): - int dma_set_mask(struct device *dev, u64 mask); + int dma_set_mask_and_coherent(struct device *dev, u64 mask); -The query for consistent allocations is performed via a call to -dma_set_coherent_mask(): +which will query the mask for both streaming and coherent APIs together. +If you have some special requirements, then the following two separate +queries can be used instead: - int dma_set_coherent_mask(struct device *dev, u64 mask); + The query for streaming mappings is performed via a call to + dma_set_mask(): + + int dma_set_mask(struct device *dev, u64 mask); + + The query for consistent allocations is performed via a call + to dma_set_coherent_mask(): + + int dma_set_coherent_mask(struct device *dev, u64 mask); Here, dev is a pointer to the device struct of your device, and mask is a bit mask describing which bits of an address your device @@ -137,7 +146,7 @@ exactly why. The standard 32-bit addressing device would do something like this: - if (dma_set_mask(dev, DMA_BIT_MASK(32))) { + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; @@ -171,22 +180,20 @@ the case would look like this: int using_dac, consistent_using_dac; - if (!dma_set_mask(dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) { using_dac = 1; consistent_using_dac = 1; - dma_set_coherent_mask(dev, DMA_BIT_MASK(64)); - } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) { + } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) { using_dac = 0; consistent_using_dac = 0; - dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); } else { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; } -dma_set_coherent_mask() will always be able to set the same or a -smaller mask as dma_set_mask(). However for the rare case that a +The coherent coherent mask will always be able to set the same or a +smaller mask as the streaming mask. However for the rare case that a device driver only uses consistent allocations, one would have to check the return value from dma_set_coherent_mask(). @@ -199,9 +206,9 @@ address you might do something like: goto ignore_this_device; } -When dma_set_mask() is successful, and returns zero, the kernel saves -away this mask you have provided. The kernel will use this -information later when you make DMA mappings. +When dma_set_mask() or dma_set_mask_and_coherent() is successful, and +returns zero, the kernel saves away this mask you have provided. The +kernel will use this information later when you make DMA mappings. There is a case which we are aware of at this time, which is worth mentioning in this documentation. If your device supports multiple diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 78a6c56..e865279 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -142,6 +142,14 @@ internal API for use by the platform than an external API for use by driver writers. int +dma_set_mask_and_coherent(struct device *dev, u64 mask) + +Checks to see if the mask is possible and updates the device +streaming and coherent DMA mask parameters if it is. + +Returns: 0 if successful and a negative error if not. + +int dma_set_mask(struct device *dev, u64 mask) Checks to see if the mask is possible and updates the device diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile index f9fd615..1d27f0a 100644 --- a/Documentation/DocBook/media/Makefile +++ b/Documentation/DocBook/media/Makefile @@ -195,7 +195,7 @@ DVB_DOCUMENTED = \ # install_media_images = \ - $(Q)cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api + $(Q)-cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api $(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64 $(Q)base64 -d $< >$@ diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 26b1e31..1ec219a 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -119,6 +119,20 @@ Example: platform_set_drvdata(), but left the variable "dev" unused, delete it. +If your patch fixes a bug in a specific commit, e.g. you found an issue using +git-bisect, please use the 'Fixes:' tag with the first 12 characters of the +SHA-1 ID, and the one line summary. +Example: + + Fixes: e21d2170f366 ("video: remove unnecessary platform_set_drvdata()") + +The following git-config settings can be used to add a pretty format for +outputting the above style in the git log or git show commands + + [core] + abbrev = 12 + [pretty] + fixes = Fixes: %h (\"%s\") 3) Separate your changes. @@ -430,7 +444,7 @@ person it names. This tag documents that potentially interested parties have been included in the discussion -14) Using Reported-by:, Tested-by:, Reviewed-by: and Suggested-by: +14) Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes: If this patch fixes a problem reported by somebody else, consider adding a Reported-by: tag to credit the reporter for their contribution. Please @@ -485,6 +499,12 @@ idea was not posted in a public forum. That said, if we diligently credit our idea reporters, they will, hopefully, be inspired to help us again in the future. +A Fixes: tag indicates that the patch fixes an issue in a previous commit. It +is used to make it easy to determine where a bug originated, which can help +review a bug fix. This tag also assists the stable kernel team in determining +which stable kernel versions should receive your fix. This is the preferred +method for indicating a bug fixed by the patch. See #2 above for more details. + 15) The canonical patch format diff --git a/Documentation/devicetree/bindings/spi/efm32-spi.txt b/Documentation/devicetree/bindings/spi/efm32-spi.txt index a590ca5..f762e11 100644 --- a/Documentation/devicetree/bindings/spi/efm32-spi.txt +++ b/Documentation/devicetree/bindings/spi/efm32-spi.txt @@ -3,7 +3,7 @@ Required properties: - #address-cells: see spi-bus.txt - #size-cells: see spi-bus.txt -- compatible: should be "efm32,spi" +- compatible: should be "energymicro,efm32-spi" - reg: Offset and length of the register set for the controller - interrupts: pair specifying rx and tx irq - clocks: phandle to the spi clock @@ -15,7 +15,7 @@ Example: spi1: spi@0x4000c400 { /* USART1 */ #address-cells = <1>; #size-cells = <0>; - compatible = "efm32,spi"; + compatible = "energymicro,efm32-spi"; reg = <0x4000c400 0x400>; interrupts = <15 16>; clocks = <&cmu 20>; diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt index 5602eb7..e1ae127 100644 --- a/Documentation/input/elantech.txt +++ b/Documentation/input/elantech.txt @@ -504,9 +504,12 @@ byte 5: * reg_10 bit 7 6 5 4 3 2 1 0 - 0 0 0 0 0 0 0 A + 0 0 0 0 R F T A A: 1 = enable absolute tracking + T: 1 = enable two finger mode auto correct + F: 1 = disable ABS Position Filter + R: 1 = enable real hardware resolution 6.2 Native absolute mode 6 byte packet format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO index 8148a47..52cf275 100644 --- a/Documentation/ja_JP/HOWTO +++ b/Documentation/ja_JP/HOWTO @@ -315,7 +315,7 @@ Andrew Morton ㌠Linux-kernel メーリングリストã«ã‚«ãƒ¼ãƒãƒ«ãƒªãƒªãƒ¼ã ã‚‚ã—ã€3.x.y カーãƒãƒ«ãŒå˜åœ¨ã—ãªã„å ´åˆã«ã¯ã€ç•ªå·ãŒä¸€ç•ªå¤§ãã„ 3.x ㌠最新ã®å®‰å®šç‰ˆã‚«ãƒ¼ãƒãƒ«ã§ã™ã€‚ -3.x.y 㯠"stable" ãƒãƒ¼ãƒ <stable@kernel.org> ã§ãƒ¡ãƒ³ãƒ†ã•ã‚Œã¦ãŠã‚Šã€å¿… +3.x.y 㯠"stable" ãƒãƒ¼ãƒ <stable@vger.kernel.org> ã§ãƒ¡ãƒ³ãƒ†ã•ã‚Œã¦ãŠã‚Šã€å¿… è¦ã«å¿œã˜ã¦ãƒªãƒªãƒ¼ã‚¹ã•ã‚Œã¾ã™ã€‚通常ã®ãƒªãƒªãƒ¼ã‚¹æœŸé–“㯠2週間毎ã§ã™ãŒã€å·®ã—迫㣠ãŸå•é¡ŒãŒãªã‘ã‚Œã°ã‚‚ã†å°‘ã—é•·ããªã‚‹ã“ã¨ã‚‚ã‚ã‚Šã¾ã™ã€‚ã‚»ã‚ュリティ関連ã®å•é¡Œ ã®å ´åˆã¯ã“ã‚Œã«å¯¾ã—ã¦ã ã„ãŸã„ã®å ´åˆã€ã™ãã«ãƒªãƒªãƒ¼ã‚¹ãŒã•ã‚Œã¾ã™ã€‚ diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt index 1426583..9dbda9b 100644 --- a/Documentation/ja_JP/stable_kernel_rules.txt +++ b/Documentation/ja_JP/stable_kernel_rules.txt @@ -50,16 +50,16 @@ linux-2.6.29/Documentation/stable_kernel_rules.txt -stable ツリーã«ãƒ‘ッãƒã‚’é€ä»˜ã™ã‚‹æ‰‹ç¶šã- - - 上記ã®è¦å‰‡ã«å¾“ã£ã¦ã„ã‚‹ã‹ã‚’確èªã—ãŸå¾Œã«ã€stable@kernel.org ã«ãƒ‘ッム+ - 上記ã®è¦å‰‡ã«å¾“ã£ã¦ã„ã‚‹ã‹ã‚’確èªã—ãŸå¾Œã«ã€stable@vger.kernel.org ã«ãƒ‘ッムをé€ã‚‹ã€‚ - é€ä¿¡è€…ã¯ãƒ‘ッãƒãŒã‚ューã«å—ã‘付ã‘られãŸéš›ã«ã¯ ACK ã‚’ã€å´ä¸‹ã•ã‚ŒãŸå ´åˆ ã«ã¯ NAK ã‚’å—ã‘å–る。ã“ã®åå¿œã¯é–‹ç™ºè€…ãŸã¡ã®ã‚¹ã‚±ã‚¸ãƒ¥ãƒ¼ãƒ«ã«ã‚ˆã£ã¦ã€æ•° æ—¥ã‹ã‹ã‚‹å ´åˆãŒã‚る。 - ã‚‚ã—å—ã‘å–られãŸã‚‰ã€ãƒ‘ッãƒã¯ä»–ã®é–‹ç™ºè€…ãŸã¡ã¨é–¢é€£ã™ã‚‹ã‚µãƒ–システム㮠メンテナーã«ã‚ˆã‚‹ãƒ¬ãƒ“ューã®ãŸã‚ã« -stable ã‚ューã«è¿½åŠ ã•ã‚Œã‚‹ã€‚ - - パッãƒã« stable@kernel.org ã®ã‚¢ãƒ‰ãƒ¬ã‚¹ãŒä»˜åŠ ã•ã‚Œã¦ã„ã‚‹ã¨ãã«ã¯ã€ãã‚Œ + - パッãƒã« stable@vger.kernel.org ã®ã‚¢ãƒ‰ãƒ¬ã‚¹ãŒä»˜åŠ ã•ã‚Œã¦ã„ã‚‹ã¨ãã«ã¯ã€ãã‚Œ ㌠Linus ã®ãƒ„リーã«å…¥ã‚‹æ™‚ã«è‡ªå‹•çš„ã« stable ãƒãƒ¼ãƒ ã« email ã•ã‚Œã‚‹ã€‚ - - ã‚»ã‚ュリティパッãƒã¯ã“ã®ã‚¨ã‚¤ãƒªã‚¢ã‚¹ (stable@kernel.org) ã«é€ã‚‰ã‚Œã‚‹ã¹ + - ã‚»ã‚ュリティパッãƒã¯ã“ã®ã‚¨ã‚¤ãƒªã‚¢ã‚¹ (stable@vger.kernel.org) ã«é€ã‚‰ã‚Œã‚‹ã¹ ãã§ã¯ãªãã€ä»£ã‚ã‚Šã« security@kernel.org ã®ã‚¢ãƒ‰ãƒ¬ã‚¹ã«é€ã‚‰ã‚Œã‚‹ã€‚ レビューサイクル- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f5a6087..ebd2c25 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -343,6 +343,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. no: ACPI OperationRegions are not marked as reserved, no further checks are performed. + acpi_no_memhotplug [ACPI] Disable memory hotplug. Useful for kdump + kernels. + add_efi_memmap [EFI; X86] Include EFI memory map in kernel's map of available physical RAM. @@ -1119,6 +1122,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. i8042.notimeout [HW] Ignore timeout condition signalled by controller i8042.reset [HW] Reset the controller during init and cleanup i8042.unlock [HW] Unlock (ignore) the keylock + i8042.kbdreset [HW] Reset device connected to KBD port i810= [HW,DRM] diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt new file mode 100644 index 0000000..ea45dd3 --- /dev/null +++ b/Documentation/lzo.txt @@ -0,0 +1,164 @@ + +LZO stream format as understood by Linux's LZO decompressor +=========================================================== + +Introduction + + This is not a specification. No specification seems to be publicly available + for the LZO stream format. This document describes what input format the LZO + decompressor as implemented in the Linux kernel understands. The file subject + of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on + the compressor nor on any other implementations though it seems likely that + the format matches the standard one. The purpose of this document is to + better understand what the code does in order to propose more efficient fixes + for future bug reports. + +Description + + The stream is composed of a series of instructions, operands, and data. The + instructions consist in a few bits representing an opcode, and bits forming + the operands for the instruction, whose size and position depend on the + opcode and on the number of literals copied by previous instruction. The + operands are used to indicate : + + - a distance when copying data from the dictionary (past output buffer) + - a length (number of bytes to copy from dictionary) + - the number of literals to copy, which is retained in variable "state" + as a piece of information for next instructions. + + Optionally depending on the opcode and operands, extra data may follow. These + extra data can be a complement for the operand (eg: a length or a distance + encoded on larger values), or a literal to be copied to the output buffer. + + The first byte of the block follows a different encoding from other bytes, it + seems to be optimized for literal use only, since there is no dictionary yet + prior to that byte. + + Lengths are always encoded on a variable size starting with a small number + of bits in the operand. If the number of bits isn't enough to represent the + length, up to 255 may be added in increments by consuming more bytes with a + rate of at most 255 per extra byte (thus the compression ratio cannot exceed + around 255:1). The variable length encoding using #bits is always the same : + + length = byte & ((1 << #bits) - 1) + if (!length) { + length = ((1 << #bits) - 1) + length += 255*(number of zero bytes) + length += first-non-zero-byte + } + length += constant (generally 2 or 3) + + For references to the dictionary, distances are relative to the output + pointer. Distances are encoded using very few bits belonging to certain + ranges, resulting in multiple copy instructions using different encodings. + Certain encodings involve one extra byte, others involve two extra bytes + forming a little-endian 16-bit quantity (marked LE16 below). + + After any instruction except the large literal copy, 0, 1, 2 or 3 literals + are copied before starting the next instruction. The number of literals that + were copied may change the meaning and behaviour of the next instruction. In + practice, only one instruction needs to know whether 0, less than 4, or more + literals were copied. This is the information stored in the <state> variable + in this implementation. This number of immediate literals to be copied is + generally encoded in the last two bits of the instruction but may also be + taken from the last two bits of an extra operand (eg: distance). + + End of stream is declared when a block copy of distance 0 is seen. Only one + instruction may encode this distance (0001HLLL), it takes one LE16 operand + for the distance, thus requiring 3 bytes. + + IMPORTANT NOTE : in the code some length checks are missing because certain + instructions are called under the assumption that a certain number of bytes + follow because it has already been garanteed before parsing the instructions. + They just have to "refill" this credit if they consume extra bytes. This is + an implementation design choice independant on the algorithm or encoding. + +Byte sequences + + First byte encoding : + + 0..17 : follow regular instruction encoding, see below. It is worth + noting that codes 16 and 17 will represent a block copy from + the dictionary which is empty, and that they will always be + invalid at this place. + + 18..21 : copy 0..3 literals + state = (byte - 17) = 0..3 [ copy <state> literals ] + skip byte + + 22..255 : copy literal string + length = (byte - 17) = 4..238 + state = 4 [ don't copy extra literals ] + skip byte + + Instruction encoding : + + 0 0 0 0 X X X X (0..15) + Depends on the number of literals copied by the last instruction. + If last instruction did not copy any literal (state == 0), this + encoding will be a copy of 4 or more literal, and must be interpreted + like this : + + 0 0 0 0 L L L L (0..15) : copy long literal string + length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte) + state = 4 (no extra literals are copied) + + If last instruction used to copy between 1 to 3 literals (encoded in + the instruction's opcode or distance), the instruction is a copy of a + 2-byte block from the dictionary within a 1kB distance. It is worth + noting that this instruction provides little savings since it uses 2 + bytes to encode a copy of 2 other bytes but it encodes the number of + following literals for free. It must be interpreted like this : + + 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance + length = 2 + state = S (copy S literals after this block) + Always followed by exactly one byte : H H H H H H H H + distance = (H << 2) + D + 1 + + If last instruction used to copy 4 or more literals (as detected by + state == 4), the instruction becomes a copy of a 3-byte block from the + dictionary from a 2..3kB distance, and must be interpreted like this : + + 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance + length = 3 + state = S (copy S literals after this block) + Always followed by exactly one byte : H H H H H H H H + distance = (H << 2) + D + 2049 + + 0 0 0 1 H L L L (16..31) + Copy of a block within 16..48kB distance (preferably less than 10B) + length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte) + Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S + distance = 16384 + (H << 14) + D + state = S (copy S literals after this block) + End of stream is reached if distance == 16384 + + 0 0 1 L L L L L (32..63) + Copy of small block within 16kB distance (preferably less than 34B) + length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte) + Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S + distance = D + 1 + state = S (copy S literals after this block) + + 0 1 L D D D S S (64..127) + Copy 3-4 bytes from block within 2kB distance + state = S (copy S literals after this block) + length = 3 + L + Always followed by exactly one byte : H H H H H H H H + distance = (H << 3) + D + 1 + + 1 L L D D D S S (128..255) + Copy 5-8 bytes from block within 2kB distance + state = S (copy S literals after this block) + length = 5 + L + Always followed by exactly one byte : H H H H H H H H + distance = (H << 3) + D + 1 + +Authors + + This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an + analysis of the decompression code available in Linux 3.16-rc5. The code is + tricky, it is possible that this document contains mistakes or that a few + corner cases were overlooked. In any case, please report any doubt, fix, or + proposed updates to the author(s) so that the document can be updated. diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt index 69b3cac..5d86756 100644 --- a/Documentation/ramoops.txt +++ b/Documentation/ramoops.txt @@ -14,11 +14,19 @@ survive after a restart. 1. Ramoops concepts -Ramoops uses a predefined memory area to store the dump. The start and size of -the memory area are set using two variables: +Ramoops uses a predefined memory area to store the dump. The start and size +and type of the memory area are set using three variables: * "mem_address" for the start * "mem_size" for the size. The memory size will be rounded down to a power of two. + * "mem_type" to specifiy if the memory type (default is pgprot_writecombine). + +Typically the default value of mem_type=0 should be used as that sets the pstore +mapping to pgprot_writecombine. Setting mem_type=1 attempts to use +pgprot_noncached, which only works on some platforms. This is because pstore +depends on atomic operations. At least on ARM, pgprot_noncached causes the +memory to be mapped strongly ordered, and atomic operations on strongly ordered +memory are implementation defined, and won't work on many ARMs such as omaps. The memory area is divided into "record_size" chunks (also rounded down to power of two) and each oops/panic writes a "record_size" chunk of @@ -55,6 +63,7 @@ Setting the ramoops parameters can be done in 2 different manners: static struct ramoops_platform_data ramoops_data = { .mem_size = <...>, .mem_address = <...>, + .mem_type = <...>, .record_size = <...>, .dump_oops = <...>, .ecc = <...>, diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 95731a0..8f08b2a 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -2026,8 +2026,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. ------------------- Module for sound cards based on the Asus AV66/AV100/AV200 chips, - i.e., Xonar D1, DX, D2, D2X, DS, Essence ST (Deluxe), Essence STX, - HDAV1.3 (Deluxe), and HDAV1.3 Slim. + i.e., Xonar D1, DX, D2, D2X, DS, DSX, Essence ST (Deluxe), + Essence STX (II), HDAV1.3 (Deluxe), and HDAV1.3 Slim. This module supports autoprobe and multiple cards. diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 79a797e..138fe43 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -664,7 +664,8 @@ The batch value of each per cpu pagelist is also updated as a result. It is set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) The initial value is zero. Kernel does not use this value at boot time to set -the high water marks for each per cpu page list. +the high water marks for each per cpu page list. If the user writes '0' to this +sysctl, it will revert to this default behavior. ============================================================== diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 1e6b653..d2ba80b 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -55,6 +55,7 @@ zc3xx 0458:700f Genius VideoCam Web V2 sonixj 0458:7025 Genius Eye 311Q sn9c20x 0458:7029 Genius Look 320s sonixj 0458:702e Genius Slim 310 NB +sn9c20x 0458:7045 Genius Look 1320 V2 sn9c20x 0458:704a Genius Slim 1320 sn9c20x 0458:704c Genius i-Look 1321 sn9c20x 045e:00f4 LifeCam VX-6000 (SN9C20x + OV9650) diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 2908941..53838d9 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -425,6 +425,20 @@ fault through the slow path. Since only 19 bits are used to store generation-number on mmio spte, all pages are zapped when there is an overflow. +Unfortunately, a single memory access might access kvm_memslots(kvm) multiple +times, the last one happening when the generation number is retrieved and +stored into the MMIO spte. Thus, the MMIO spte might be created based on +out-of-date information, but with an up-to-date generation number. + +To avoid this, the generation number is incremented again after synchronize_srcu +returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a +memslot update, while some SRCU readers might be using the old copy. We do not +want to use an MMIO sptes created with an odd generation number, and we can do +this without losing a bit in the MMIO spte. The low bit of the generation +is not stored in MMIO spte, and presumed zero when it is extracted out of the +spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1, +the next access to the spte will always be a cache miss. + Further reading =============== diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt index 5500684..6ae89a9 100644 --- a/Documentation/vm/hwpoison.txt +++ b/Documentation/vm/hwpoison.txt @@ -84,6 +84,11 @@ PR_MCE_KILL PR_MCE_KILL_EARLY: Early kill PR_MCE_KILL_LATE: Late kill PR_MCE_KILL_DEFAULT: Use system global default + Note that if you want to have a dedicated thread which handles + the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should + call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise, + the SIGBUS is sent to the main thread. + PR_MCE_KILL_GET return current mode diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f..bd43704 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... +ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks +... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO index 7fba5aa..7599eb3 100644 --- a/Documentation/zh_CN/HOWTO +++ b/Documentation/zh_CN/HOWTO @@ -237,7 +237,7 @@ kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开å‘éµå¾ª 如果没有2.6.x.yç‰ˆæœ¬å†…æ ¸å˜åœ¨ï¼Œé‚£ä¹ˆæœ€æ–°çš„2.6.xç‰ˆæœ¬å†…æ ¸å°±ç›¸å½“äºŽæ˜¯å½“å‰çš„稳定 ç‰ˆå†…æ ¸ã€‚ -2.6.x.y版本由“稳定版â€å°ç»„(邮件地å€<stable@kernel.org>ï¼‰ç»´æŠ¤ï¼Œä¸€èˆ¬éš”å‘¨å‘ +2.6.x.y版本由“稳定版â€å°ç»„(邮件地å€<stable@vger.kernel.org>ï¼‰ç»´æŠ¤ï¼Œä¸€èˆ¬éš”å‘¨å‘ å¸ƒæ–°ç‰ˆæœ¬ã€‚ å†…æ ¸æºç ä¸çš„Documentation/stable_kernel_rules.txt文件具体æ述了å¯è¢«ç¨³å®š diff --git a/Documentation/zh_CN/stable_kernel_rules.txt b/Documentation/zh_CN/stable_kernel_rules.txt index b5b9b0a..26ea5ed 100644 --- a/Documentation/zh_CN/stable_kernel_rules.txt +++ b/Documentation/zh_CN/stable_kernel_rules.txt @@ -42,7 +42,7 @@ Documentation/stable_kernel_rules.txt çš„ä¸æ–‡ç¿»è¯‘ å‘稳定版代ç æ ‘æ交补ä¸çš„过程: - - 在确认了补ä¸ç¬¦åˆä»¥ä¸Šçš„规则åŽï¼Œå°†è¡¥ä¸å‘é€åˆ°stable@kernel.org。 + - 在确认了补ä¸ç¬¦åˆä»¥ä¸Šçš„规则åŽï¼Œå°†è¡¥ä¸å‘é€åˆ°stable@vger.kernel.org。 - 如果补ä¸è¢«æŽ¥å—到队列里,å‘é€è€…会收到一个ACK回å¤ï¼Œå¦‚果没有被接å—,收 到的是NAK回å¤ã€‚回å¤éœ€è¦å‡ 天的时间,这å–决于开å‘者的时间安排。 - 被接å—çš„è¡¥ä¸ä¼šè¢«åŠ 到稳定版本队列里,ç‰å¾…其他开å‘者的审查。 @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 12 -SUBLEVEL = 19 +SUBLEVEL = 37 EXTRAVERSION = NAME = One Giant Leap for Frogkind @@ -620,6 +620,8 @@ KBUILD_CFLAGS += -fomit-frame-pointer endif endif +KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) + ifdef CONFIG_DEBUG_INFO KBUILD_CFLAGS += -g KBUILD_AFLAGS += -gdwarf-2 diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e4abdaa..b7d4dab 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -61,4 +61,31 @@ extern void read_decode_cache_bcr(void); #endif /* !__ASSEMBLY__ */ +/* Instruction cache related Auxiliary registers */ +#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ +#define ARC_REG_IC_IVIC 0x10 +#define ARC_REG_IC_CTRL 0x11 +#define ARC_REG_IC_IVIL 0x19 +#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#define ARC_REG_IC_PTAG 0x1E +#endif + +/* Bit val in IC_CTRL */ +#define IC_CTRL_CACHE_DISABLE 0x1 + +/* Data cache related Auxiliary registers */ +#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ +#define ARC_REG_DC_IVDC 0x47 +#define ARC_REG_DC_CTRL 0x48 +#define ARC_REG_DC_IVDL 0x4A +#define ARC_REG_DC_FLSH 0x4B +#define ARC_REG_DC_FLDL 0x4C +#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#define ARC_REG_DC_PTAG 0x5C +#endif + +/* Bit val in DC_CTRL */ +#define DC_CTRL_INV_MODE_FLUSH 0x40 +#define DC_CTRL_FLUSH_STATUS 0x100 + #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2618cc1..76a7739 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -11,6 +11,7 @@ #ifndef _UAPI__ASM_ARC_PTRACE_H #define _UAPI__ASM_ARC_PTRACE_H +#define PTRACE_GET_THREAD_AREA 25 #ifndef __ASSEMBLY__ /* diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index b908dde..15588b0 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -610,11 +610,13 @@ resume_user_mode_begin: resume_kernel_mode: -#ifdef CONFIG_PREEMPT - - ; This is a must for preempt_schedule_irq() + ; Disable Interrupts from this point on + ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() + ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe IRQ_DISABLE r9 +#ifdef CONFIG_PREEMPT + ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 ld r8, [r10, THREAD_INFO_PREEMPT_COUNT] diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 0f944f0..a2bca37 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -12,10 +12,42 @@ * to skip certain things during boot on simulator */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/entry.h> -#include <linux/linkage.h> #include <asm/arcregs.h> +#include <asm/cache.h> + +.macro CPU_EARLY_SETUP + + ; Setting up Vectror Table (in case exception happens in early boot + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + + ; Disable I-cache/D-cache if kernel so configured + lr r5, [ARC_REG_IC_BCR] + breq r5, 0, 1f ; I$ doesn't exist + lr r5, [ARC_REG_IC_CTRL] +#ifdef CONFIG_ARC_HAS_ICACHE + bclr r5, r5, 0 ; 0 - Enable, 1 is Disable +#else + bset r5, r5, 0 ; I$ exists, but is not used +#endif + sr r5, [ARC_REG_IC_CTRL] + +1: + lr r5, [ARC_REG_DC_BCR] + breq r5, 0, 1f ; D$ doesn't exist + lr r5, [ARC_REG_DC_CTRL] + bclr r5, r5, 6 ; Invalidate (discard w/o wback) +#ifdef CONFIG_ARC_HAS_DCACHE + bclr r5, r5, 0 ; Enable (+Inv) +#else + bset r5, r5, 0 ; Disable (+Inv) +#endif + sr r5, [ARC_REG_DC_CTRL] + +1: +.endm .cpu A7 @@ -24,13 +56,13 @@ .globl stext stext: ;------------------------------------------------------------------- - ; Don't clobber r0-r4 yet. It might have bootloader provided info + ; Don't clobber r0-r2 yet. It might have bootloader provided info ;------------------------------------------------------------------- - sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + CPU_EARLY_SETUP #ifdef CONFIG_SMP - ; Only Boot (Master) proceeds. Others wait in platform dependent way + ; Ensure Boot (Master) proceeds. Others wait in platform dependent way ; IDENTITY Reg [ 3 2 1 0 ] ; (cpu-id) ^^^ => Zero for UP ARC700 ; => #Core-ID if SMP (Master 0) @@ -39,7 +71,8 @@ stext: ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 - jnz arc_platform_smp_wait_to_boot + mov.ne r0, r5 + jne arc_platform_smp_wait_to_boot #endif ; Clear BSS before updating any globals ; XXX: use ZOL here @@ -101,7 +134,7 @@ stext: first_lines_of_secondary: - sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + CPU_EARLY_SETUP ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 5d76706..13b3ffb 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -146,6 +146,10 @@ long arch_ptrace(struct task_struct *child, long request, pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data); switch (request) { + case PTRACE_GET_THREAD_AREA: + ret = put_user(task_thread_info(child)->thr_ptr, + (unsigned long __user *)data); + break; default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 5a1259c..7801034 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -73,37 +73,9 @@ #include <asm/cachectl.h> #include <asm/setup.h> -/* Instruction cache related Auxiliary registers */ -#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ -#define ARC_REG_IC_IVIC 0x10 -#define ARC_REG_IC_CTRL 0x11 -#define ARC_REG_IC_IVIL 0x19 -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_IC_PTAG 0x1E -#endif - -/* Bit val in IC_CTRL */ -#define IC_CTRL_CACHE_DISABLE 0x1 - -/* Data cache related Auxiliary registers */ -#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ -#define ARC_REG_DC_IVDC 0x47 -#define ARC_REG_DC_CTRL 0x48 -#define ARC_REG_DC_IVDL 0x4A -#define ARC_REG_DC_FLSH 0x4B -#define ARC_REG_DC_FLDL 0x4C -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_DC_PTAG 0x5C -#endif - -/* Bit val in DC_CTRL */ -#define DC_CTRL_INV_MODE_FLUSH 0x40 -#define DC_CTRL_FLUSH_STATUS 0x100 - -char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) +char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; - unsigned int c = smp_processor_id(); #define PR_CACHE(p, enb, str) \ { \ @@ -169,72 +141,43 @@ void read_decode_cache_bcr(void) */ void arc_cache_init(void) { - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - unsigned int dcache_does_alias, temp; + unsigned int __maybe_unused cpu = smp_processor_id(); + struct cpuinfo_arc_cache __maybe_unused *ic, __maybe_unused *dc; char str[256]; printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - if (!ic->ver) - goto chk_dc; - #ifdef CONFIG_ARC_HAS_ICACHE - /* 1. Confirm some of I-cache params which Linux assumes */ - if (ic->line_len != ARC_ICACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); - - if (ic->ver != CONFIG_ARC_MMU_VER) - panic("Cache ver doesn't match MMU ver\n"); -#endif - - /* Enable/disable I-Cache */ - temp = read_aux_reg(ARC_REG_IC_CTRL); - -#ifdef CONFIG_ARC_HAS_ICACHE - temp &= ~IC_CTRL_CACHE_DISABLE; -#else - temp |= IC_CTRL_CACHE_DISABLE; + ic = &cpuinfo_arc700[cpu].icache; + if (ic->ver) { + if (ic->line_len != ARC_ICACHE_LINE_LEN) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, ARC_ICACHE_LINE_LEN); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); + } #endif - write_aux_reg(ARC_REG_IC_CTRL, temp); - -chk_dc: - if (!dc->ver) - return; - #ifdef CONFIG_ARC_HAS_DCACHE - if (dc->line_len != ARC_DCACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); + dc = &cpuinfo_arc700[cpu].dcache; + if (dc->ver) { + unsigned int dcache_does_alias; - /* check for D-Cache aliasing */ - dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; + if (dc->line_len != ARC_DCACHE_LINE_LEN) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, ARC_DCACHE_LINE_LEN); - if (dcache_does_alias && !cache_is_vipt_aliasing()) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dcache_does_alias && cache_is_vipt_aliasing()) - panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); -#endif + /* check for D-Cache aliasing */ + dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; - /* Set the default Invalidate Mode to "simpy discard dirty lines" - * as this is more frequent then flush before invalidate - * Ofcourse we toggle this default behviour when desired - */ - temp = read_aux_reg(ARC_REG_DC_CTRL); - temp &= ~DC_CTRL_INV_MODE_FLUSH; - -#ifdef CONFIG_ARC_HAS_DCACHE - /* Enable D-Cache: Clear Bit 0 */ - write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE); -#else - /* Flush D cache */ - write_aux_reg(ARC_REG_DC_FLSH, 0x1); - /* Disable D cache */ - write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE); + if (dcache_does_alias && !cache_is_vipt_aliasing()) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dcache_does_alias && cache_is_vipt_aliasing()) + panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } #endif - - return; } #define OP_INV 0x1 diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig index 295cefe..33058aa 100644 --- a/arch/arc/plat-arcfpga/Kconfig +++ b/arch/arc/plat-arcfpga/Kconfig @@ -33,7 +33,6 @@ config ISS_SMP_EXTN bool "ARC SMP Extensions (ISS Models only)" default n depends on SMP - select ARC_HAS_COH_RTSC help SMP Extensions to ARC700, in a "simulation only" Model, supported in ARC ISS (Instruction Set Simulator). diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0d29536..69d9b63 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS @@ -893,7 +894,7 @@ config ARCH_MULTI_V5 bool "ARMv5 based platforms (ARM926T, XSCALE, PJ1, ...)" depends on !ARCH_MULTI_V6_V7 select ARCH_MULTI_V4_V5 - select CPU_ARM926T if (!CPU_ARM946E || CPU_ARM1020 || \ + select CPU_ARM926T if !(CPU_ARM946E || CPU_ARM1020 || \ CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_FEROCEON) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 75189f1..de5143e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -399,8 +399,7 @@ dtb_check_done: add sp, sp, r6 #endif - tst r4, #1 - bleq cache_clean_flush + bl cache_clean_flush adr r0, BSYM(restart) add r0, r0, r6 @@ -1053,6 +1052,8 @@ cache_clean_flush: b call_cache_fn __armv4_mpu_cache_flush: + tst r4, #1 + movne pc, lr mov r2, #1 mov r3, #0 mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -1070,6 +1071,8 @@ __armv4_mpu_cache_flush: mov pc, lr __fa526_cache_flush: + tst r4, #1 + movne pc, lr mov r1, #0 mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache mcr p15, 0, r1, c7, c5, 0 @ flush I cache @@ -1078,13 +1081,16 @@ __fa526_cache_flush: __armv6_mmu_cache_flush: mov r1, #0 - mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D + tst r4, #1 + mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB - mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified + mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified mcr p15, 0, r1, c7, c10, 4 @ drain WB mov pc, lr __armv7_mmu_cache_flush: + tst r4, #1 + bne iflush mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 tst r10, #0xf << 16 @ hierarchical cache (ARMv7) mov r10, #0 @@ -1145,6 +1151,8 @@ iflush: mov pc, lr __armv5tej_mmu_cache_flush: + tst r4, #1 + movne pc, lr 1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache @@ -1152,6 +1160,8 @@ __armv5tej_mmu_cache_flush: mov pc, lr __armv4_mmu_cache_flush: + tst r4, #1 + movne pc, lr mov r2, #64*1024 @ default: 32K dcache size (*2) mov r11, #32 @ default: 32 byte line size mrc p15, 0, r3, c0, c0, 1 @ read cache type @@ -1185,6 +1195,8 @@ no_cache_id: __armv3_mmu_cache_flush: __armv3_mpu_cache_flush: + tst r4, #1 + movne pc, lr mov r1, #0 mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3 mov pc, lr diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index 2f66ded..e6e952e 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -120,7 +120,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; @@ -141,7 +141,7 @@ dr_mode = "host"; }; - dma-controller@07402000 { + dma-controller@47402000 { status = "okay"; }; }; diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index e8ec875..b2f4769 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -174,7 +174,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; @@ -195,7 +195,7 @@ dr_mode = "host"; }; - dma-controller@07402000 { + dma-controller@47402000 { status = "okay"; }; }; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 4f339fa..521d92a 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -211,7 +211,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index f9c5da9..e9b6775 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -346,7 +346,7 @@ ti,hwmods = "usb_otg_hs"; status = "disabled"; - ctrl_mod: control@44e10000 { + ctrl_mod: control@44e10620 { compatible = "ti,am335x-usb-ctrl-module"; reg = <0x44e10620 0x10 0x44e10648 0x4>; @@ -449,7 +449,7 @@ "tx14", "tx15"; }; - cppi41dma: dma-controller@07402000 { + cppi41dma: dma-controller@47402000 { compatible = "ti,am3359-cppi41"; reg = <0x47400000 0x1000 0x47402000 0x1000 diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 364a63d..beae26c 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -156,6 +156,7 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x72004 0x4>; + clocks = <&gateclk 4>; }; eth0: ethernet@70000 { diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index bcf6d79..8c2fe44 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -40,7 +40,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index 2298e4a..e325e62 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -49,7 +49,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index 5695afc..d6cce8a 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -37,7 +37,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index d5bd65f..55bb7f3 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -506,6 +506,7 @@ compatible = "atmel,hsmci"; reg = <0xfff80000 0x600>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -515,6 +516,7 @@ compatible = "atmel,hsmci"; reg = <0xfff84000 0x600>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts index cee55fa..47fa5ab 100644 --- a/arch/arm/boot/dts/exynos5250-arndale.dts +++ b/arch/arm/boot/dts/exynos5250-arndale.dts @@ -286,6 +286,7 @@ regulator-name = "vdd_g3d"; regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; + regulator-always-on; regulator-boot-on; op_mode = <1>; }; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 737ed5d..cf3300a 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -30,6 +30,7 @@ spi2 = &spi3; usb0 = &usbotg; usb1 = &usbhost1; + ethernet0 = &fec; }; cpus { @@ -157,7 +158,7 @@ #size-cells = <0>; compatible = "fsl,imx25-cspi", "fsl,imx35-cspi"; reg = <0x43fa4000 0x4000>; - clocks = <&clks 62>, <&clks 62>; + clocks = <&clks 78>, <&clks 78>; clock-names = "ipg", "per"; interrupts = <14>; status = "disabled"; @@ -351,7 +352,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa0000 0x4000>; - clocks = <&clks 106>, <&clks 36>; + clocks = <&clks 106>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <36>; }; @@ -370,7 +371,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa8000 0x4000>; - clocks = <&clks 107>, <&clks 36>; + clocks = <&clks 107>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <41>; }; @@ -411,7 +412,7 @@ pwm4: pwm@53fc8000 { compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; reg = <0x53fc8000 0x4000>; - clocks = <&clks 108>, <&clks 36>; + clocks = <&clks 108>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <42>; }; @@ -457,7 +458,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fe0000 0x4000>; - clocks = <&clks 105>, <&clks 36>; + clocks = <&clks 105>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <26>; }; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index b7a1c6d..c07aea4 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -30,6 +30,7 @@ spi0 = &cspi1; spi1 = &cspi2; spi2 = &cspi3; + ethernet0 = &fec; }; aitc: aitc-interrupt-controller@e0000000 { diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index 54cee65..6d2a534 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -27,6 +27,7 @@ spi0 = &ecspi1; spi1 = &ecspi2; spi2 = &cspi; + ethernet0 = &fec; }; tzic: tz-interrupt-controller@e0000000 { diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 4307e80..50eda50 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -33,6 +33,7 @@ spi0 = &ecspi1; spi1 = &ecspi2; spi2 = &cspi; + ethernet0 = &fec; }; cpus { @@ -87,7 +88,7 @@ ipu: ipu@18000000 { #crtc-cells = <1>; compatible = "fsl,imx53-ipu"; - reg = <0x18000000 0x080000000>; + reg = <0x18000000 0x08000000>; interrupts = <11 10>; clocks = <&clks 59>, <&clks 110>, <&clks 61>; clock-names = "bus", "di0", "di1"; diff --git a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts index 6317e1d..e650e35 100644 --- a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts +++ b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts @@ -30,6 +30,16 @@ bootargs = "console=ttyS0,115200n8 earlyprintk"; }; + mbus { + pcie-controller { + status = "okay"; + + pcie@1,0 { + status = "okay"; + }; + }; + }; + ocp@f1000000 { pinctrl@10000 { pmx_usb_led: pmx-usb-led { @@ -73,14 +83,6 @@ ehci@50000 { status = "okay"; }; - - pcie-controller { - status = "okay"; - - pcie@1,0 { - status = "okay"; - }; - }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi b/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi index 06267a9..7c3f4bc 100644 --- a/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi +++ b/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi @@ -4,6 +4,16 @@ / { model = "ZyXEL NSA310"; + mbus { + pcie-controller { + status = "okay"; + + pcie@1,0 { + status = "okay"; + }; + }; + }; + ocp@f1000000 { pinctrl: pinctrl@10000 { @@ -69,14 +79,6 @@ reg = <0x5040000 0x2fc0000>; }; }; - - pcie-controller { - status = "okay"; - - pcie@1,0 { - status = "okay"; - }; - }; }; gpio_poweroff { diff --git a/arch/arm/boot/dts/ste-ccu8540.dts b/arch/arm/boot/dts/ste-ccu8540.dts index 7f3baf5..32dd55e 100644 --- a/arch/arm/boot/dts/ste-ccu8540.dts +++ b/arch/arm/boot/dts/ste-ccu8540.dts @@ -18,6 +18,7 @@ compatible = "st-ericsson,ccu8540", "st-ericsson,u8540"; memory@0 { + device_type = "memory"; reg = <0x20000000 0x1f000000>, <0xc0000000 0x3f000000>; }; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fe88105..f8bca69 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -116,6 +116,7 @@ CONFIG_FB_SIMPLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_ISP1760_HCD=y diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877..e73ec2a 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -103,6 +103,6 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 76cd976..ace4cd6 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -175,5 +175,5 @@ module_exit(sha1_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 191ada6..662c7bd 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -156,7 +156,7 @@ /* Select the best insn combination to perform the */ \ /* actual __m * __n / (__p << 64) operation. */ \ if (!__c) { \ - asm ( "umull %Q0, %R0, %1, %Q2\n\t" \ + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \ "mov %Q0, #0" \ : "=&r" (__res) \ : "r" (__m), "r" (__n) \ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index e42cf59..2aff798 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -3,11 +3,6 @@ #ifdef __KERNEL__ -#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP) -/* ARM doesn't provide unprivileged exclusive memory accessors */ -#include <asm-generic/futex.h> -#else - #include <linux/futex.h> #include <linux/uaccess.h> #include <asm/errno.h> @@ -164,6 +159,5 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) return ret; } -#endif /* !(CPU_USE_DOMAINS && SMP) */ #endif /* __KERNEL__ */ #endif /* _ASM_ARM_FUTEX_H */ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index f97ee02..c98c9c8 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -140,6 +140,7 @@ #define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ #define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ #define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ +#define L_PTE_MT_VECTORS (_AT(pteval_t, 0x0f) << 2) /* 1111 */ #define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 33cb511..cef6be7 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -43,16 +43,6 @@ struct cpu_context_save { __u32 extra[2]; /* Xscale 'acc' register, etc */ }; -struct arm_restart_block { - union { - /* For user cache flushing */ - struct { - unsigned long start; - unsigned long end; - } cache; - }; -}; - /* * low level task data that entry.S needs immediate access to. * __switch_to() assumes cpu_context follows immediately after cpu_domain. @@ -79,7 +69,6 @@ struct thread_info { unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif struct restart_block restart_block; - struct arm_restart_block arm_restart_block; }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 83259b8..36172ad 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -1,6 +1,9 @@ #ifndef __ASMARM_TLS_H #define __ASMARM_TLS_H +#include <linux/compiler.h> +#include <asm/thread_info.h> + #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> .macro switch_tls_none, base, tp, tpuser, tmp1, tmp2 @@ -50,6 +53,47 @@ #endif #ifndef __ASSEMBLY__ + +static inline void set_tls(unsigned long val) +{ + struct thread_info *thread; + + thread = current_thread_info(); + + thread->tp_value[0] = val; + + /* + * This code runs with preemption enabled and therefore must + * be reentrant with respect to switch_tls. + * + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + * + * If we're preempted here, switch_tls will load TPIDRURO from + * thread_info upon resuming execution and the following mcr + * is merely redundant. + */ + barrier(); + + if (!tls_emu) { + if (has_tls_reg) { + asm("mcr p15, 0, %0, c13, c0, 3" + : : "r" (val)); + } else { + /* + * User space must never try to access this + * directly. Expect your app to break + * eventually if you do so. The user helper + * at 0xffff0fe0 must be used instead. (see + * entry-armv.S for details) + */ + *((unsigned int *)0xffff0ff0) = val; + } + + } +} + static inline unsigned long get_tpuser(void) { unsigned long reg = 0; @@ -59,5 +103,23 @@ static inline unsigned long get_tpuser(void) return reg; } + +static inline void set_tpuser(unsigned long val) +{ + /* Since TPIDRURW is fully context-switched (unlike TPIDRURO), + * we need not update thread_info. + */ + if (has_tls_reg && !tls_emu) { + asm("mcr p15, 0, %0, c13, c0, 2" + : : "r" (val)); + } +} + +static inline void flush_tls(void) +{ + set_tls(0); + set_tpuser(0); +} + #endif #endif /* __ASMARM_TLS_H */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 72abdc5..7f3f3cc 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -171,8 +171,9 @@ extern int __put_user_8(void *, unsigned long long); #define __put_user_check(x,p) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ + const typeof(*(p)) __user *__tmp_p = (p); \ register const typeof(*(p)) __r2 asm("r2") = (x); \ - register const typeof(*(p)) __user *__p asm("r0") = (p);\ + register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 141baa3..cbd6197 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -48,6 +48,5 @@ */ #define __IGNORE_fadvise64_64 #define __IGNORE_migrate_pages -#define __IGNORE_kcmp #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index 90c50d4..5d1286d 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -39,7 +39,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE); if (!vaddr) return -ENOMEM; diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 39f89fb..88c6bab 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -132,6 +132,10 @@ orrne r5, V7M_xPSR_FRAMEPTRALIGN biceq r5, V7M_xPSR_FRAMEPTRALIGN + @ ensure bit 0 is cleared in the PC, otherwise behaviour is + @ unpredictable + bic r4, #1 + @ write basic exception frame stmdb r2!, {r1, r3-r5} ldmia sp, {r1, r3-r5} diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index afa204a..4dd26a1 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -170,7 +170,7 @@ static bool migrate_one_irq(struct irq_desc *desc) c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) cpumask_copy(d->affinity, affinity); return ret; diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c index 18a7628..380c20f 100644 --- a/arch/arm/kernel/kprobes-common.c +++ b/arch/arm/kernel/kprobes-common.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <asm/system_info.h> +#include <asm/opcodes.h> #include "kprobes.h" @@ -305,7 +306,8 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) if (handler) { /* We can emulate the instruction in (possibly) modified form */ - asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist; + asi->insn[0] = __opcode_to_mem_arm((insn & 0xfff00000) | + (rn << 16) | reglist); asi->insn_handler = handler; return INSN_GOOD; } @@ -334,13 +336,14 @@ prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, #ifdef CONFIG_THUMB2_KERNEL if (thumb) { u16 *thumb_insn = (u16 *)asi->insn; - thumb_insn[1] = 0x4770; /* Thumb bx lr */ - thumb_insn[2] = 0x4770; /* Thumb bx lr */ + /* Thumb bx lr */ + thumb_insn[1] = __opcode_to_mem_thumb16(0x4770); + thumb_insn[2] = __opcode_to_mem_thumb16(0x4770); return insn; } - asi->insn[1] = 0xe12fff1e; /* ARM bx lr */ + asi->insn[1] = __opcode_to_mem_arm(0xe12fff1e); /* ARM bx lr */ #else - asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */ + asi->insn[1] = __opcode_to_mem_arm(0xe1a0f00e); /* mov pc, lr */ #endif /* Make an ARM instruction unconditional */ if (insn < 0xe0000000) @@ -360,12 +363,12 @@ set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, if (thumb) { u16 *ip = (u16 *)asi->insn; if (is_wide_instruction(insn)) - *ip++ = insn >> 16; - *ip++ = insn; + *ip++ = __opcode_to_mem_thumb16(insn >> 16); + *ip++ = __opcode_to_mem_thumb16(insn); return; } #endif - asi->insn[0] = insn; + asi->insn[0] = __opcode_to_mem_arm(insn); } /* diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c index 6123daf..241222c 100644 --- a/arch/arm/kernel/kprobes-thumb.c +++ b/arch/arm/kernel/kprobes-thumb.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> +#include <asm/opcodes.h> #include "kprobes.h" @@ -163,9 +164,9 @@ t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi); /* Fixup modified instruction to have halfwords in correct order...*/ - insn = asi->insn[0]; - ((u16 *)asi->insn)[0] = insn >> 16; - ((u16 *)asi->insn)[1] = insn & 0xffff; + insn = __mem_to_opcode_arm(asi->insn[0]); + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn >> 16); + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0xffff); return ret; } @@ -1153,7 +1154,7 @@ t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi) { insn &= ~0x00ff; insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */ - ((u16 *)asi->insn)[0] = insn; + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn); asi->insn_handler = t16_emulate_hiregs; return INSN_GOOD; } @@ -1182,8 +1183,10 @@ t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi) * and call it with R9=SP and LR in the register list represented * by R8. */ - ((u16 *)asi->insn)[0] = 0xe929; /* 1st half STMDB R9!,{} */ - ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ + /* 1st half STMDB R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe929); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); asi->insn_handler = t16_emulate_push; return INSN_GOOD; } @@ -1232,8 +1235,10 @@ t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi) * and call it with R9=SP and PC in the register list represented * by R8. */ - ((u16 *)asi->insn)[0] = 0xe8b9; /* 1st half LDMIA R9!,{} */ - ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ + /* 1st half LDMIA R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe8b9); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc : t16_emulate_pop_nopc; return INSN_GOOD; diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 170e9f3..1c6ece5 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -26,6 +26,7 @@ #include <linux/stop_machine.h> #include <linux/stringify.h> #include <asm/traps.h> +#include <asm/opcodes.h> #include <asm/cacheflush.h> #include "kprobes.h" @@ -62,10 +63,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) #ifdef CONFIG_THUMB2_KERNEL thumb = true; addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ - insn = ((u16 *)addr)[0]; + insn = __mem_to_opcode_thumb16(((u16 *)addr)[0]); if (is_wide_instruction(insn)) { - insn <<= 16; - insn |= ((u16 *)addr)[1]; + u16 inst2 = __mem_to_opcode_thumb16(((u16 *)addr)[1]); + insn = __opcode_thumb32_compose(insn, inst2); decode_insn = thumb32_kprobe_decode_insn; } else decode_insn = thumb16_kprobe_decode_insn; @@ -73,7 +74,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) thumb = false; if (addr & 0x3) return -EINVAL; - insn = *p->addr; + insn = __mem_to_opcode_arm(*p->addr); decode_insn = arm_kprobe_decode_insn; #endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 57221e3..8cf0996 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -14,11 +14,12 @@ #include <asm/pgalloc.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> +#include <asm/fncpy.h> #include <asm/mach-types.h> #include <asm/smp_plat.h> #include <asm/system_misc.h> -extern const unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; extern unsigned long kexec_start_address; @@ -142,6 +143,8 @@ void machine_kexec(struct kimage *image) { unsigned long page_list; unsigned long reboot_code_buffer_phys; + unsigned long reboot_entry = (unsigned long)relocate_new_kernel; + unsigned long reboot_entry_phys; void *reboot_code_buffer; /* @@ -168,16 +171,23 @@ void machine_kexec(struct kimage *image) /* copy our kernel relocation code to the control code page */ - memcpy(reboot_code_buffer, - relocate_new_kernel, relocate_new_kernel_size); + reboot_entry = fncpy(reboot_code_buffer, + reboot_entry, + relocate_new_kernel_size); + reboot_entry_phys = (unsigned long)reboot_entry + + (reboot_code_buffer_phys - (unsigned long)reboot_code_buffer); - - flush_icache_range((unsigned long) reboot_code_buffer, - (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); if (kexec_reinit) kexec_reinit(); - soft_restart(reboot_code_buffer_phys); + soft_restart(reboot_entry_phys); +} + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_ARM_LPAE + VMCOREINFO_CONFIG(ARM_LPAE); +#endif } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e186ee1..de5cd76 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -303,11 +303,18 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) struct arm_pmu *armpmu = (struct arm_pmu *) dev; struct platform_device *plat_device = armpmu->plat_device; struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev); + int ret; + u64 start_clock, finish_clock; + start_clock = sched_clock(); if (plat && plat->handle_irq) - return plat->handle_irq(irq, dev, armpmu->handle_irq); + ret = plat->handle_irq(irq, dev, armpmu->handle_irq); else - return armpmu->handle_irq(irq, dev); + ret = armpmu->handle_irq(irq, dev); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; } static void diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 83af229..e35007d 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -334,6 +334,8 @@ void flush_thread(void) memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); memset(&thread->fpstate, 0, sizeof(union fp_state)); + flush_tls(); + thread_notify(THREAD_NOTIFY_FLUSH, thread); } diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf..9585896 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -2,10 +2,12 @@ * relocate_kernel.S - put the kernel image in place to boot */ +#include <linux/linkage.h> #include <asm/kexec.h> - .globl relocate_new_kernel -relocate_new_kernel: + .align 3 /* not needed for this code, but keeps fncpy() happy */ + +ENTRY(relocate_new_kernel) ldr r0,kexec_indirection_page ldr r1,kexec_start_address @@ -79,6 +81,8 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +ENDPROC(relocate_new_kernel) + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 2a767d2..6ebeaf4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1016,6 +1016,15 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "model name\t: %s rev %d (%s)\n", cpu_name, cpuid & 15, elf_platform); +#if defined(CONFIG_SMP) + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ), + (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100); +#else + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100); +#endif /* dump out the processor features */ seq_puts(m, "Features\t: "); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 72024ea..bd1b9e6 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -383,8 +383,17 @@ asmlinkage void secondary_start_kernel(void) void __init smp_cpus_done(unsigned int max_cpus) { - printk(KERN_INFO "SMP: Total of %d processors activated.\n", - num_online_cpus()); + int cpu; + unsigned long bogosum = 0; + + for_each_online_cpu(cpu) + bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy; + + printk(KERN_INFO "SMP: Total of %d processors activated " + "(%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); hyp_mode_check(); } diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index af4e8c8..6582c4a 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -83,13 +83,16 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +/* This must be noinline to so that our skip calculation works correctly */ +static noinline void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) { struct stack_trace_data data; struct stackframe frame; data.trace = trace; data.skip = trace->skip; + data.no_sched_functions = nosched; if (tsk != current) { #ifdef CONFIG_SMP @@ -102,7 +105,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; return; #else - data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); frame.lr = 0; /* recovered from the stack */ @@ -111,11 +113,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) } else { register unsigned long current_sp asm ("sp"); - data.no_sched_functions = 0; + /* We don't want this function nor the caller */ + data.skip += 2; frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)save_stack_trace_tsk; + frame.pc = (unsigned long)__save_stack_trace; } walk_stackframe(&frame, save_trace, &data); @@ -123,9 +126,14 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} + void save_stack_trace(struct stack_trace *trace) { - save_stack_trace_tsk(current, trace); + __save_stack_trace(current, trace, 0); } EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c index 7b8403b..80f0d69 100644 --- a/arch/arm/kernel/thumbee.c +++ b/arch/arm/kernel/thumbee.c @@ -45,7 +45,7 @@ static int thumbee_notifier(struct notifier_block *self, unsigned long cmd, void switch (cmd) { case THREAD_NOTIFY_FLUSH: - thread->thumbee_state = 0; + teehbr_write(0); break; case THREAD_NOTIFY_SWITCH: current_thread_info()->thumbee_state = teehbr_read(); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1f735aa..8e6cd76 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -503,8 +503,6 @@ static int bad_syscall(int n, struct pt_regs *regs) return regs->ARM_r0; } -static long do_cache_op_restart(struct restart_block *); - static inline int __do_cache_op(unsigned long start, unsigned long end) { @@ -513,24 +511,8 @@ __do_cache_op(unsigned long start, unsigned long end) do { unsigned long chunk = min(PAGE_SIZE, end - start); - if (signal_pending(current)) { - struct thread_info *ti = current_thread_info(); - - ti->restart_block = (struct restart_block) { - .fn = do_cache_op_restart, - }; - - ti->arm_restart_block = (struct arm_restart_block) { - { - .cache = { - .start = start, - .end = end, - }, - }, - }; - - return -ERESTART_RESTARTBLOCK; - } + if (fatal_signal_pending(current)) + return 0; ret = flush_cache_user_range(start, start + chunk); if (ret) @@ -543,15 +525,6 @@ __do_cache_op(unsigned long start, unsigned long end) return 0; } -static long do_cache_op_restart(struct restart_block *unused) -{ - struct arm_restart_block *restart_block; - - restart_block = ¤t_thread_info()->arm_restart_block; - return __do_cache_op(restart_block->cache.start, - restart_block->cache.end); -} - static inline int do_cache_op(unsigned long start, unsigned long end, int flags) { @@ -571,7 +544,6 @@ do_cache_op(unsigned long start, unsigned long end, int flags) #define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) asmlinkage int arm_syscall(int no, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); siginfo_t info; if ((no >> 16) != (__ARM_NR_BASE>> 16)) @@ -622,21 +594,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value[0] = regs->ARM_r0; - if (tls_emu) - return 0; - if (has_tls_reg) { - asm ("mcr p15, 0, %0, c13, c0, 3" - : : "r" (regs->ARM_r0)); - } else { - /* - * User space must never try to access this directly. - * Expect your app to break eventually if you do so. - * The user helper at 0xffff0fe0 must be used instead. - * (see entry-armv.S for details) - */ - *((unsigned int *)0xffff0ff0) = regs->ARM_r0; - } + set_tls(regs->ARM_r0); return 0; #ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6b9e36a..6e52e54 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) @@ -211,14 +213,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -248,7 +250,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -940,7 +942,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -966,8 +968,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 6b2630a..0778e54 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -963,6 +963,7 @@ static int __init at91_clock_reset(void) } at91_pmc_write(AT91_PMC_SCDR, scdr); + at91_pmc_write(AT91_PMC_PCDR, pcdr); if (cpu_is_sama5d3()) at91_pmc_write(AT91_PMC_PCDR1, pcdr1); diff --git a/arch/arm/mach-at91/sysirq_mask.c b/arch/arm/mach-at91/sysirq_mask.c index 2ba694f..f8bc351 100644 --- a/arch/arm/mach-at91/sysirq_mask.c +++ b/arch/arm/mach-at91/sysirq_mask.c @@ -25,24 +25,28 @@ #include "generic.h" -#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */ -#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */ +#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */ +#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */ +#define AT91_RTC_IRQ_MASK 0x1f /* Available IRQs mask */ void __init at91_sysirq_mask_rtc(u32 rtc_base) { void __iomem *base; - u32 mask; base = ioremap(rtc_base, 64); if (!base) return; - mask = readl_relaxed(base + AT91_RTC_IMR); - if (mask) { - pr_info("AT91: Disabling rtc irq\n"); - writel_relaxed(mask, base + AT91_RTC_IDR); - (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */ - } + /* + * sam9x5 SoCs have the following errata: + * "RTC: Interrupt Mask Register cannot be used + * Interrupt Mask Register read always returns 0." + * + * Hence we're not relying on IMR values to disable + * interrupts. + */ + writel_relaxed(AT91_RTC_IRQ_MASK, base + AT91_RTC_IDR); + (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */ iounmap(base); } diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index ef85ac4..2eed3cf 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -304,8 +304,8 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) post_div_table[1].div = 1; post_div_table[2].div = 1; video_div_table[1].div = 1; - video_div_table[2].div = 1; - }; + video_div_table[3].div = 1; + } /* type name parent_name base div_mask */ clk[pll1_sys] = imx_clk_pllv3(IMX_PLLV3_SYS, "pll1_sys", "osc", base, 0x7f); diff --git a/arch/arm/mach-imx/devices/platform-ipu-core.c b/arch/arm/mach-imx/devices/platform-ipu-core.c index fc4dd7c..6bd7c3f 100644 --- a/arch/arm/mach-imx/devices/platform-ipu-core.c +++ b/arch/arm/mach-imx/devices/platform-ipu-core.c @@ -77,7 +77,7 @@ struct platform_device *__init imx_alloc_mx3_camera( pdev = platform_device_alloc("mx3-camera", 0); if (!pdev) - goto err; + return ERR_PTR(-ENOMEM); pdev->dev.dma_mask = kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL); if (!pdev->dev.dma_mask) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 58adf2f..7e0529b 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -124,6 +124,29 @@ int __init coherency_init(void) { struct device_node *np; + /* + * The coherency fabric is needed: + * - For coherency between processors on Armada XP, so only + * when SMP is enabled. + * - For coherency between the processor and I/O devices, but + * this coherency requires many pre-requisites (write + * allocate cache policy, shareable pages, SMP bit set) that + * are only meant in SMP situations. + * + * Note that this means that on Armada 370, there is currently + * no way to use hardware I/O coherency, because even when + * CONFIG_SMP is enabled, is_smp() returns false due to the + * Armada 370 being a single-core processor. To lift this + * limitation, we would have to find a way to make the cache + * policy set to write-allocate (on all Armada SoCs), and to + * set the shareable attribute in page tables (on all Armada + * SoCs except the Armada 370). Unfortunately, such decisions + * are taken very early in the kernel boot process, at a point + * where we don't know yet on which SoC we are running. + */ + if (!is_smp()) + return 0; + np = of_find_matching_node(NULL, of_coherency_table); if (np) { struct resource res; @@ -150,6 +173,9 @@ static int __init coherency_late_init(void) { struct device_node *np; + if (!is_smp()) + return 0; + np = of_find_matching_node(NULL, of_coherency_table); if (np) { bus_register_notifier(&platform_bus_type, diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index fd90caf..db57072 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -343,7 +343,7 @@ static struct omap_usb_config h2_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ /* .hmc_mode = 21,*/ /* 0:host(off) 1:dev(loopback) 2:host(loopback) */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c index 816ecd1..bfed4f9 100644 --- a/arch/arm/mach-omap1/board-h3.c +++ b/arch/arm/mach-omap1/board-h3.c @@ -366,7 +366,7 @@ static struct omap_usb_config h3_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) /* NONSTANDARD CABLE NEEDED (B-to-Mini-B) */ diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c index bd5f02e..c49ce83 100644 --- a/arch/arm/mach-omap1/board-innovator.c +++ b/arch/arm/mach-omap1/board-innovator.c @@ -312,7 +312,7 @@ static struct omap_usb_config h2_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ /* .hmc_mode = 21,*/ /* 0:host(off) 1:dev(loopback) 2:host(loopback) */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index a7ce692..006fbb5 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -280,7 +280,7 @@ static struct omap_usb_config osk_usb_config __initdata = { * be used, with a NONSTANDARD gender-bending cable/dongle, as * a peripheral. */ -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .register_dev = 1, .hmc_mode = 0, #else diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 334b767..9128b25 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -418,7 +418,8 @@ static struct clk_hw_omap dpll4_m5x2_ck_hw = { .clkdm_name = "dpll4_clkdm", }; -DEFINE_STRUCT_CLK(dpll4_m5x2_ck, dpll4_m5x2_ck_parent_names, dpll4_m5x2_ck_ops); +DEFINE_STRUCT_CLK_FLAGS(dpll4_m5x2_ck, dpll4_m5x2_ck_parent_names, + dpll4_m5x2_ck_ops, CLK_SET_RATE_PARENT); static struct clk dpll4_m5x2_ck_3630 = { .name = "dpll4_m5x2_ck", diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c index 31e0dfe..62a392b 100644 --- a/arch/arm/mach-omap2/control.c +++ b/arch/arm/mach-omap2/control.c @@ -324,7 +324,8 @@ void omap3_save_scratchpad_contents(void) scratchpad_contents.public_restore_ptr = virt_to_phys(omap3_restore_3630); else if (omap_rev() != OMAP3430_REV_ES3_0 && - omap_rev() != OMAP3430_REV_ES3_1) + omap_rev() != OMAP3430_REV_ES3_1 && + omap_rev() != OMAP3430_REV_ES3_1_2) scratchpad_contents.public_restore_ptr = virt_to_phys(omap3_restore); else diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c index e022a86..6037a9a 100644 --- a/arch/arm/mach-omap2/irq.c +++ b/arch/arm/mach-omap2/irq.c @@ -222,6 +222,7 @@ void __init ti81xx_init_irq(void) static inline void omap_intc_handle_irq(void __iomem *base_addr, struct pt_regs *regs) { u32 irqnr; + int handled_irq = 0; do { irqnr = readl_relaxed(base_addr + 0x98); @@ -249,8 +250,15 @@ out: if (irqnr) { irqnr = irq_find_mapping(domain, irqnr); handle_IRQ(irqnr, regs); + handled_irq = 1; } } while (irqnr); + + /* If an irq is masked or deasserted while active, we will + * keep ending up here with no irq handled. So remove it from + * the INTC with an ack.*/ + if (!handled_irq) + omap_ack_irq(NULL); } asmlinkage void __exception_irq_entry omap2_intc_handle_irq(struct pt_regs *regs) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index f82cf87..94c2f6d 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -183,8 +183,10 @@ static int __init _omap_mux_get_by_name(struct omap_mux_partition *partition, m0_entry = mux->muxnames[0]; /* First check for full name in mode0.muxmode format */ - if (mode0_len && strncmp(muxname, m0_entry, mode0_len)) - continue; + if (mode0_len) + if (strncmp(muxname, m0_entry, mode0_len) || + (strlen(m0_entry) != mode0_len)) + continue; /* Then check for muxmode only */ for (i = 0; i < OMAP_MUX_NR_MODES; i++) { diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 60f2344..857e76c 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -1968,7 +1968,7 @@ static struct omap_hwmod_irq_info omap3xxx_usb_host_hs_irqs[] = { static struct omap_hwmod omap3xxx_usb_host_hs_hwmod = { .name = "usb_host_hs", .class = &omap3xxx_usb_host_hs_hwmod_class, - .clkdm_name = "l3_init_clkdm", + .clkdm_name = "usbhost_clkdm", .mpu_irqs = omap3xxx_usb_host_hs_irqs, .main_clk = "usbhost_48m_fck", .prcm = { @@ -2053,7 +2053,7 @@ static struct omap_hwmod_irq_info omap3xxx_usb_tll_hs_irqs[] = { static struct omap_hwmod omap3xxx_usb_tll_hs_hwmod = { .name = "usb_tll_hs", .class = &omap3xxx_usb_tll_hs_hwmod_class, - .clkdm_name = "l3_init_clkdm", + .clkdm_name = "core_l4_clkdm", .mpu_irqs = omap3xxx_usb_tll_hs_irqs, .main_clk = "usbtll_fck", .prcm = { diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c index cde4155..6bbb1e6 100644 --- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c @@ -895,7 +895,7 @@ static struct omap_hwmod omap54xx_mcpdm_hwmod = { * current exception. */ - .flags = HWMOD_EXT_OPT_MAIN_CLK, + .flags = HWMOD_EXT_OPT_MAIN_CLK | HWMOD_SWSUP_SIDLE, .main_clk = "pad_clks_ck", .prcm = { .omap4 = { diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 7bdd22a..d4d0fce 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -103,7 +103,7 @@ static inline void enable_omap3630_toggle_l2_on_restore(void) { } #define PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD (1 << 0) -#if defined(CONFIG_ARCH_OMAP4) +#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP4) extern u16 pm44xx_errata; #define IS_PM44XX_ERRATUM(id) (pm44xx_errata & (id)) #else diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 82f06989..6be33ce 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -146,26 +146,6 @@ static inline int omap4_init_static_deps(void) struct clockdomain *ducati_clkdm, *l3_2_clkdm; int ret = 0; - if (omap_rev() == OMAP4430_REV_ES1_0) { - WARN(1, "Power Management not supported on OMAP4430 ES1.0\n"); - return -ENODEV; - } - - pr_err("Power Management for TI OMAP4.\n"); - /* - * OMAP4 chip PM currently works only with certain (newer) - * versions of bootloaders. This is due to missing code in the - * kernel to properly reset and initialize some devices. - * http://www.spinics.net/lists/arm-kernel/msg218641.html - */ - pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); - - ret = pwrdm_for_each(pwrdms_setup, NULL); - if (ret) { - pr_err("Failed to setup powerdomains\n"); - return ret; - } - /* * The dynamic dependency between MPUSS -> MEMIF and * MPUSS -> L4_PER/L3_* and DUCATI -> L3_* doesn't work as @@ -216,6 +196,15 @@ int __init omap4_pm_init(void) pr_info("Power Management for TI OMAP4+ devices.\n"); + /* + * OMAP4 chip PM currently works only with certain (newer) + * versions of bootloaders. This is due to missing code in the + * kernel to properly reset and initialize some devices. + * http://www.spinics.net/lists/arm-kernel/msg218641.html + */ + if (cpu_is_omap44xx()) + pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); + ret = pwrdm_for_each(pwrdms_setup, NULL); if (ret) { pr_err("Failed to setup powerdomains.\n"); diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index 4588df1..78f44f3 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -245,6 +245,8 @@ IS_AM_SUBCLASS(437x, 0x437) #define soc_is_omap54xx() 0 #define soc_is_omap543x() 0 #define soc_is_dra7xx() 0 +#define soc_is_dra74x() 0 +#define soc_is_dra72x() 0 #if defined(MULTI_OMAP2) # if defined(CONFIG_ARCH_OMAP2) @@ -393,7 +395,11 @@ IS_OMAP_TYPE(3430, 0x3430) #if defined(CONFIG_SOC_DRA7XX) #undef soc_is_dra7xx +#undef soc_is_dra74x +#undef soc_is_dra72x #define soc_is_dra7xx() (of_machine_is_compatible("ti,dra7")) +#define soc_is_dra74x() (of_machine_is_compatible("ti,dra74")) +#define soc_is_dra72x() (of_machine_is_compatible("ti,dra72")) #endif /* Various silicon revisions for omap2 */ diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ead48fa..bf83df1 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -503,11 +503,11 @@ static void __init realtime_counter_init(void) rate = clk_get_rate(sys_clk); /* Numerator/denumerator values refer TRM Realtime Counter section */ switch (rate) { - case 1200000: + case 12000000: num = 64; den = 125; break; - case 1300000: + case 13000000: num = 768; den = 1625; break; @@ -515,11 +515,11 @@ static void __init realtime_counter_init(void) num = 8; den = 25; break; - case 2600000: + case 26000000: num = 384; den = 1625; break; - case 2700000: + case 27000000: num = 256; den = 1125; break; diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index f565f99..7548db2 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -21,7 +21,7 @@ struct mv_sata_platform_data; #define ORION_MBUS_DEVBUS_BOOT_ATTR 0x0f #define ORION_MBUS_DEVBUS_TARGET(cs) 0x01 #define ORION_MBUS_DEVBUS_ATTR(cs) (~(1 << cs)) -#define ORION_MBUS_SRAM_TARGET 0x00 +#define ORION_MBUS_SRAM_TARGET 0x09 #define ORION_MBUS_SRAM_ATTR 0x00 /* diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c index 22de174..a10565d 100644 --- a/arch/arm/mach-shmobile/setup-sh73a0.c +++ b/arch/arm/mach-shmobile/setup-sh73a0.c @@ -746,6 +746,7 @@ static struct platform_device ipmmu_device = { static struct renesas_intc_irqpin_config irqpin0_platform_data = { .irq_base = irq_pin(0), /* IRQ0 -> IRQ7 */ + .control_parent = true, }; static struct resource irqpin0_resources[] = { @@ -807,6 +808,7 @@ static struct platform_device irqpin1_device = { static struct renesas_intc_irqpin_config irqpin2_platform_data = { .irq_base = irq_pin(16), /* IRQ16 -> IRQ23 */ + .control_parent = true, }; static struct resource irqpin2_resources[] = { @@ -837,6 +839,7 @@ static struct platform_device irqpin2_device = { static struct renesas_intc_irqpin_config irqpin3_platform_data = { .irq_base = irq_pin(24), /* IRQ24 -> IRQ31 */ + .control_parent = true, }; static struct resource irqpin3_resources[] = { diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cd2c88e..426f531 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -446,7 +446,6 @@ config CPU_32v5 config CPU_32v6 bool - select CPU_USE_DOMAINS if CPU_V6 && MMU select TLS_REG_EMUL if !CPU_32v6K && !MMU config CPU_32v6K @@ -671,7 +670,7 @@ config ARM_VIRT_EXT config SWP_EMULATE bool "Emulate SWP/SWPB instructions" - depends on !CPU_USE_DOMAINS && CPU_V7 + depends on CPU_V7 default y if SMP select HAVE_PROC_CPU if PROC_FS help @@ -799,6 +798,7 @@ config NEED_KUSER_HELPERS config KUSER_HELPERS bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + depends on MMU default y help Warning: disabling this option may break user programs. diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 8074199..5d777a5 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -17,12 +17,6 @@ */ .align 5 ENTRY(v6_early_abort) -#ifdef CONFIG_CPU_V6 - sub r1, sp, #4 @ Get unused stack location - strex r0, r1, [r1] @ Clear the exclusive monitor -#elif defined(CONFIG_CPU_32v6K) - clrex -#endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 7033752..4812ad0 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -13,12 +13,6 @@ */ .align 5 ENTRY(v7_early_abort) - /* - * The effect of data aborts on on the exclusive access monitor are - * UNPREDICTABLE. Do a CLREX to clear the state - */ - clrex - mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b..1fe0bf5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -39,6 +39,7 @@ * This code is not portable to processors with late data abort handling. */ #define CODING_BITS(i) (i & 0x0e000000) +#define COND_BITS(i) (i & 0xf0000000) #define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ #define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ @@ -812,6 +813,8 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x04000000: /* ldr or str immediate */ + if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */ + goto bad; offset.un = OFFSET_BITS(instr); handler = do_alignment_ldrstr; break; diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 54ee616..66781bf 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } - -int pmd_huge_support(void) -{ - return 1; -} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac..c61d237 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -24,6 +24,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, pr_warning("Failed to allocate identity pmd.\n"); return; } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); pud_populate(&init_mm, pud, pmd); pmd += pmd_index(addr); } else diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 0222ba7..b7c987d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -464,6 +464,16 @@ static void __init build_mem_type_table(void) s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2; /* + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. + */ +#ifndef CONFIG_ARM_LPAE + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; +#endif + + /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e3c48a3..ee1d805 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -112,13 +112,9 @@ * 100x 1 0 1 r/o no acc * 10x0 1 0 1 r/o no acc * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o - * 1111 0 1 1 r/w r/w - * - * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed: * 110x 1 1 1 r/o r/o * 11x0 1 1 1 r/o r/o + * 1111 0 1 1 r/w r/w */ .macro armv6_mt_table pfx \pfx\()_mt_table: @@ -137,7 +133,7 @@ .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED .long 0x00 @ unused .long 0x00 @ unused - .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS .endm .macro armv6_set_pte_ext pfx @@ -158,24 +154,21 @@ tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS - @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif + + @ user read-only -> kernel read-only + bicne r3, r3, #PTE_EXT_AP0 tst r1, #L_PTE_XN orrne r3, r3, #PTE_EXT_XN - orr r3, r3, r2 + eor r3, r3, r2 tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 -#ifndef CONFIG_CPU_USE_DOMAINS tstne r1, #L_PTE_NONE movne r3, #0 -#endif str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index bdd3be4..1f52915 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -90,21 +90,14 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS - @ allow kernel read/write access to read-only user pages - tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif tst r1, #L_PTE_XN orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_VALID -#ifndef CONFIG_CPU_USE_DOMAINS eorne r1, r1, #L_PTE_NONE tstne r1, #L_PTE_NONE -#endif moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 2267b73..cec33ab 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -211,7 +211,6 @@ __v7_pj4b_setup: /* Auxiliary Debug Modes Control 1 Register */ #define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ #define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ -#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */ #define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ /* Auxiliary Debug Modes Control 2 Register */ @@ -234,7 +233,6 @@ __v7_pj4b_setup: /* Auxiliary Debug Modes Control 1 Register */ mrc p15, 1, r0, c15, c1, 1 orr r0, r0, #PJ4B_CLEAN_LINE - orr r0, r0, #PJ4B_BCK_OFF_STREX orr r0, r0, #PJ4B_INTER_PARITY bic r0, r0, #PJ4B_STATIC_BP mcr p15, 1, r0, c15, c1, 1 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index d19b1cf..b34b95f 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -535,7 +535,7 @@ ENTRY(cpu_xscale_do_suspend) mrc p15, 0, r5, c15, c1, 0 @ CP access reg mrc p15, 0, r6, c13, c0, 0 @ PID mrc p15, 0, r7, c3, c0, 0 @ domain ID - mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg mrc p15, 0, r9, c1, c0, 0 @ control reg bic r4, r4, #2 @ clear frequency change bit stmia r0, {r4 - r9} @ store cp regs @@ -552,7 +552,7 @@ ENTRY(cpu_xscale_do_resume) mcr p15, 0, r6, c13, c0, 0 @ PID mcr p15, 0, r7, c3, c0, 0 @ domain ID mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg mov r0, r9 @ control register b cpu_resume_mmu ENDPROC(cpu_xscale_do_resume) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c044548..fe70eae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 899af80..c30a548 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -33,8 +33,8 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 __compat_uid_t; -typedef u32 __compat_gid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; typedef u16 __compat_uid16_t; typedef u16 __compat_gid16_t; typedef u32 __compat_uid32_t; diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index d064047..52b484b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -79,7 +79,6 @@ static inline void decode_ctrl_reg(u32 reg, */ #define ARM_MAX_BRP 16 #define ARM_MAX_WRP 16 -#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP) /* Virtual debug register bases. */ #define AARCH64_DBG_REG_BVR 0 diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 20925bc..e1134d0 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -51,6 +51,8 @@ #define TASK_SIZE_32 UL(0x100000000) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) #else #define TASK_SIZE TASK_SIZE_64 #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 82d95a7..05fe332 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -168,7 +168,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { - if (pte_exec(pte)) + if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (!pte_dirty(pte)) pte = pte_wrprotect(pte); @@ -253,11 +253,11 @@ static inline int has_transparent_hugepage(void) * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3881fd1..028a1b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -275,7 +275,6 @@ el1_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - mov x1, x25 mov x2, sp b do_sp_pc_abort el1_undef: diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7ae8a1f..7af6183 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -183,9 +183,27 @@ void exit_thread(void) { } +static void tls_thread_flush(void) +{ + asm ("msr tpidr_el0, xzr"); + + if (is_compat_task()) { + current->thread.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + */ + barrier(); + asm ("msr tpidrro_el0, xzr"); + } +} + void flush_thread(void) { fpsimd_flush_thread(); + tls_thread_flush(); flush_ptrace_hw_breakpoint(current); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c484d56..ee79a1a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -81,7 +81,8 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { + + for (i = 0; i < ARM_MAX_WRP; ++i) { if (current->thread.debug.hbp_watch[i] == bp) { info.si_errno = -((i << 1) + 1); break; @@ -823,6 +824,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -830,10 +832,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4e..7803992 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *regs) case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. + * See comment in tls_thread_flush. + */ + barrier(); asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); return 0; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 6e0ed93..c17967f 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -46,7 +46,7 @@ USER(9f, strh wzr, [x0], #2 ) sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f - strb wzr, [x0] +USER(9f, strb wzr, [x0] ) 5: mov x0, #0 ret ENDPROC(__clear_user) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e4193e3..0d64089 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -79,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) return; if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), PAGE_SIZE); + __flush_dcache_area(page_address(page), + PAGE_SIZE << compound_order(page)); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 5e9aec3..023747b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,12 +51,11 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { +#ifndef __PAGETABLE_PMD_FOLDED return !(pud_val(pud) & PUD_TABLE_BIT); -} - -int pmd_huge_support(void) -{ - return 1; +#else + return 0; +#endif } static __init int setup_hugepagesz(char *opt) diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 68232db..76069c1 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -114,11 +114,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/m68k/mm/hwtest.c b/arch/m68k/mm/hwtest.c index 2c7dde3..2a5259f 100644 --- a/arch/m68k/mm/hwtest.c +++ b/arch/m68k/mm/hwtest.c @@ -28,9 +28,11 @@ int hwreg_present( volatile void *regp ) { int ret = 0; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr1,%4@(8)\n\t" @@ -46,6 +48,7 @@ int hwreg_present( volatile void *regp ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors) ); + local_irq_restore(flags); return( ret ); } @@ -58,9 +61,11 @@ EXPORT_SYMBOL(hwreg_present); int hwreg_write( volatile void *regp, unsigned short val ) { int ret; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr2,%4@(8)\n\t" @@ -78,6 +83,7 @@ int hwreg_write( volatile void *regp, unsigned short val ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors), "g" (val) ); + local_irq_restore(flags); return( ret ); } diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c90bfc6..e355a4c 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -15,6 +15,7 @@ static inline void wr_fence(void) volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE; barrier(); *flushptr = 0; + barrier(); } #else /* CONFIG_METAG_META21 */ @@ -35,6 +36,7 @@ static inline void wr_fence(void) *flushptr = 0; *flushptr = 0; *flushptr = 0; + barrier(); } #endif /* !CONFIG_METAG_META21 */ @@ -68,6 +70,7 @@ static inline void fence(void) volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK; barrier(); *flushptr = 0; + barrier(); } #define smp_mb() fence() #define smp_rmb() fence() diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index f16477d..3be8581 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -22,6 +22,8 @@ /* Add an extra page of padding at the top of the stack for the guard page. */ #define STACK_TOP (TASK_SIZE - PAGE_SIZE) #define STACK_TOP_MAX STACK_TOP +/* Maximum virtual space for stack */ +#define STACK_SIZE_MAX (1 << 28) /* 256 MB */ /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 0424315..3c52fa6 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -110,11 +110,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 2c95730..d498a1f 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/string.h> #include <asm/addrspace.h> diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 25fbfae..ab7dc01 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -635,7 +635,7 @@ static void octeon_irq_cpu_offline_ciu(struct irq_data *data) cpumask_clear(&new_affinity); cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity); } - __irq_set_affinity_locked(data, &new_affinity); + irq_set_affinity_locked(data, &new_affinity, false); } static int octeon_irq_ciu_set_affinity(struct irq_data *data, diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index b212ae1..8a00799 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -458,6 +458,18 @@ static void octeon_halt(void) octeon_kill_core(NULL); } +static char __read_mostly octeon_system_type[80]; + +static int __init init_octeon_system_type(void) +{ + snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)", + cvmx_board_type_to_string(octeon_bootinfo->board_type), + octeon_model_get_string(read_c0_prid())); + + return 0; +} +early_initcall(init_octeon_system_type); + /** * Return a string representing the system type * @@ -465,11 +477,7 @@ static void octeon_halt(void) */ const char *octeon_board_type_string(void) { - static char name[80]; - sprintf(name, "%s (%s)", - cvmx_board_type_to_string(octeon_bootinfo->board_type), - octeon_model_get_string(read_c0_prid())); - return name; + return octeon_system_type; } const char *get_system_type(void) diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index ce35c9a..370ae7c 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -24,7 +24,7 @@ do { \ asm volatile ( \ "1: " load " %[" STR(dst) "], 0(%[" STR(src) "])\n"\ " li %[" STR(error) "], 0\n" \ - "2:\n" \ + "2: .insn\n" \ \ ".section .fixup, \"ax\"\n" \ "3: li %[" STR(error) "], 1\n" \ @@ -46,7 +46,7 @@ do { \ asm volatile ( \ "1: " store " %[" STR(src) "], 0(%[" STR(dst) "])\n"\ " li %[" STR(error) "], 0\n" \ - "2:\n" \ + "2: .insn\n" \ \ ".section .fixup, \"ax\"\n" \ "3: li %[" STR(error) "], 1\n" \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 61215a3..897cd58 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -134,7 +134,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) #define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \ - _TIF_SYSCALL_AUDIT) + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 5b5ddb2..78f1843 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -255,11 +255,13 @@ static void __init gic_setup_intr(unsigned int intr, unsigned int cpu, /* Setup Intr to Pin mapping */ if (pin & GIC_MAP_TO_NMI_MSK) { + int i; + GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin); /* FIXME: hack to route NMI to all cpu's */ - for (cpu = 0; cpu < NR_CPUS; cpu += 32) { + for (i = 0; i < NR_CPUS; i += 32) { GICWRITE(GIC_REG_ADDR(SHARED, - GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)), + GIC_SH_MAP_TO_VPE_REG_OFF(intr, i)), 0xffffffff); } } else { diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c index fab40f7..ac9facc 100644 --- a/arch/mips/kernel/irq-msc01.c +++ b/arch/mips/kernel/irq-msc01.c @@ -131,7 +131,7 @@ void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqma board_bind_eic_interrupt = &msc_bind_eic_interrupt; - for (; nirq >= 0; nirq--, imp++) { + for (; nirq > 0; nirq--, imp++) { int n = imp->im_irq; switch (imp->im_type) { diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index 539b629..8f89ff4 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -123,7 +123,11 @@ NESTED(_mcount, PT_SIZE, ra) nop #endif b ftrace_stub +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#else nop +#endif static_trace: MCOUNT_SAVE_REGS @@ -133,6 +137,9 @@ static_trace: move a1, AT /* arg2: parent's return address */ MCOUNT_RESTORE_REGS +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif .globl ftrace_stub ftrace_stub: RETURN_BACK @@ -177,6 +184,11 @@ NESTED(ftrace_graph_caller, PT_SIZE, ra) jal prepare_ftrace_return nop MCOUNT_RESTORE_REGS +#ifndef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif +#endif RETURN_BACK END(ftrace_graph_caller) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 8ae1ebe..5404cab 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -162,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) __get_user(fregs[i], i + (__u64 __user *) data); __get_user(child->thread.fpu.fcr31, data + 64); + child->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; /* FIR may not be written. */ @@ -452,7 +453,7 @@ long arch_ptrace(struct task_struct *child, long request, break; #endif case FPC_CSR: - child->thread.fpu.fcr31 = data; + child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X; break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index c369a5d..b897dde 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -605,7 +605,6 @@ static void emulate_load_store_insn(struct pt_regs *regs, case sdc1_op: die_if_kernel("Unaligned FP access in kernel code", regs); BUG_ON(!used_math()); - BUG_ON(!is_fpu_owner()); lose_fpu(1); /* Save FPU state for the emulator. */ res = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b0445..3f3e5b2 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -149,9 +149,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]); } - - if (kvm->arch.guest_pmap) - kfree(kvm->arch.guest_pmap); + kfree(kvm->arch.guest_pmap); kvm_for_each_vcpu(i, vcpu, kvm) { kvm_arch_vcpu_free(vcpu); @@ -303,7 +301,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (cpu_has_veic || cpu_has_vint) { size = 0x200 + VECTORSPACING * 64; } else { - size = 0x200; + size = 0x4000; } /* Save Linux EBASE */ @@ -388,12 +386,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mips_dump_stats(vcpu); - if (vcpu->arch.guest_ebase) - kfree(vcpu->arch.guest_ebase); - - if (vcpu->arch.kseg0_commpage) - kfree(vcpu->arch.kseg0_commpage); - + kfree(vcpu->arch.guest_ebase); + kfree(vcpu->arch.kseg0_commpage); + kfree(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 4b6274b..e75ef82 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -1571,17 +1571,17 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); #else /* UserLocal not implemented */ - er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + er = EMULATE_FAIL; #endif break; default: - printk("RDHWR not supported\n"); + kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc); er = EMULATE_FAIL; break; } } else { - printk("Emulate RI not supported @ %p: %#x\n", opc, inst); + kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst); er = EMULATE_FAIL; } @@ -1590,6 +1590,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, */ if (er == EMULATE_FAIL) { vcpu->arch.pc = curr_pc; + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); } return er; } diff --git a/arch/mips/lantiq/dts/easy50712.dts b/arch/mips/lantiq/dts/easy50712.dts index fac1f5b..143b8a3 100644 --- a/arch/mips/lantiq/dts/easy50712.dts +++ b/arch/mips/lantiq/dts/easy50712.dts @@ -8,6 +8,7 @@ }; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile index 9e4484c..9005a8d6 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson/common/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_PCI) += pci.o # Serial port support # obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_SERIAL_8250) += serial.o +loongson-serial-$(CONFIG_SERIAL_8250) := serial.o +obj-y += $(loongson-serial-m) $(loongson-serial-y) obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o obj-$(CONFIG_LOONGSON_MC146818) += rtc.o diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 01fda44..a7fee0d 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -85,11 +85,6 @@ int pud_huge(pud_t pud) return (pud_val(pud) & _PAGE_HUGE) != 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 9bb3a93..a39b415 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1095,6 +1095,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) struct mips_huge_tlb_info { int huge_pte; int restore_scratch; + bool need_reload_pte; }; static struct mips_huge_tlb_info @@ -1109,6 +1110,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, rv.huge_pte = scratch; rv.restore_scratch = 0; + rv.need_reload_pte = false; if (check_for_high_segbits) { UASM_i_MFC0(p, tmp, C0_BADVADDR); @@ -1297,6 +1299,7 @@ static void build_r4000_tlb_refill_handler(void) } else { htlb_info.huge_pte = K0; htlb_info.restore_scratch = 0; + htlb_info.need_reload_pte = true; vmalloc_mode = refill_noscratch; /* * create the plain linear handler @@ -1333,6 +1336,8 @@ static void build_r4000_tlb_refill_handler(void) } #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT uasm_l_tlb_huge_update(&l, p); + if (htlb_info.need_reload_pte) + UASM_i_LW(&p, htlb_info.huge_pte, 0, K1); build_huge_update_entries(&p, htlb_info.huge_pte, K1); build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, htlb_info.restore_scratch); diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c index 6854ed5..83a1dfd 100644 --- a/arch/mips/oprofile/backtrace.c +++ b/arch/mips/oprofile/backtrace.c @@ -92,7 +92,7 @@ static inline int unwind_user_frame(struct stackframe *old_frame, /* This marks the end of the previous function, which means we overran. */ break; - stack_size = (unsigned) stack_adjustment; + stack_size = (unsigned long) stack_adjustment; } else if (is_ra_save_ins(&ip)) { int ra_slot = ip.i_format.simmediate; if (ra_slot < 0) diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S index 7e0277a..32a7c82 100644 --- a/arch/mips/power/hibernate.S +++ b/arch/mips/power/hibernate.S @@ -43,6 +43,7 @@ LEAF(swsusp_arch_resume) bne t1, t3, 1b PTR_L t0, PBE_NEXT(t0) bnez t0, 0b + jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */ PTR_LA t0, saved_regs PTR_L ra, PT_R31(t0) PTR_L sp, PT_R29(t0) diff --git a/arch/mips/ralink/dts/mt7620a_eval.dts b/arch/mips/ralink/dts/mt7620a_eval.dts index 35eb874..709f581 100644 --- a/arch/mips/ralink/dts/mt7620a_eval.dts +++ b/arch/mips/ralink/dts/mt7620a_eval.dts @@ -7,6 +7,7 @@ model = "Ralink MT7620A evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt2880_eval.dts b/arch/mips/ralink/dts/rt2880_eval.dts index 322d700..0a685db 100644 --- a/arch/mips/ralink/dts/rt2880_eval.dts +++ b/arch/mips/ralink/dts/rt2880_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT2880 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x8000000 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt3052_eval.dts b/arch/mips/ralink/dts/rt3052_eval.dts index 0ac73ea..ec9e9a0 100644 --- a/arch/mips/ralink/dts/rt3052_eval.dts +++ b/arch/mips/ralink/dts/rt3052_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT3052 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt3883_eval.dts b/arch/mips/ralink/dts/rt3883_eval.dts index 2fa6b33..e8df21a 100644 --- a/arch/mips/ralink/dts/rt3883_eval.dts +++ b/arch/mips/ralink/dts/rt3883_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT3883 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index d8a455e..fec8bf9 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -853,37 +853,44 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00) /* ========================================================[ return ] === */ +_resume_userspace: + DISABLE_INTERRUPTS(r3,r4) + l.lwz r4,TI_FLAGS(r10) + l.andi r13,r4,_TIF_WORK_MASK + l.sfeqi r13,0 + l.bf _restore_all + l.nop + _work_pending: - /* - * if (current_thread_info->flags & _TIF_NEED_RESCHED) - * schedule(); - */ - l.lwz r5,TI_FLAGS(r10) - l.andi r3,r5,_TIF_NEED_RESCHED - l.sfnei r3,0 - l.bnf _work_notifysig + l.lwz r5,PT_ORIG_GPR11(r1) + l.sfltsi r5,0 + l.bnf 1f l.nop - l.jal schedule + l.andi r5,r5,0 +1: + l.jal do_work_pending + l.ori r3,r1,0 /* pt_regs */ + + l.sfeqi r11,0 + l.bf _restore_all l.nop - l.j _resume_userspace + l.sfltsi r11,0 + l.bnf 1f l.nop - -/* Handle pending signals and notify-resume requests. - * do_notify_resume must be passed the latest pushed pt_regs, not - * necessarily the "userspace" ones. Also, pt_regs->syscallno - * must be set so that the syscall restart functionality works. - */ -_work_notifysig: - l.jal do_notify_resume - l.ori r3,r1,0 /* pt_regs */ - -_resume_userspace: - DISABLE_INTERRUPTS(r3,r4) - l.lwz r3,TI_FLAGS(r10) - l.andi r3,r3,_TIF_WORK_MASK - l.sfnei r3,0 - l.bf _work_pending + l.and r11,r11,r0 + l.ori r11,r11,__NR_restart_syscall + l.j _syscall_check_trace_enter l.nop +1: + l.lwz r11,PT_ORIG_GPR11(r1) + /* Restore arg registers */ + l.lwz r3,PT_GPR3(r1) + l.lwz r4,PT_GPR4(r1) + l.lwz r5,PT_GPR5(r1) + l.lwz r6,PT_GPR6(r1) + l.lwz r7,PT_GPR7(r1) + l.j _syscall_check_trace_enter + l.lwz r8,PT_GPR8(r1) _restore_all: RESTORE_ALL diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index ae167f7..c277ec8 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -28,24 +28,24 @@ #include <linux/tracehook.h> #include <asm/processor.h> +#include <asm/syscall.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #define DEBUG_SIG 0 struct rt_sigframe { - struct siginfo *pinfo; - void *puc; struct siginfo info; struct ucontext uc; unsigned char retcode[16]; /* trampoline code */ }; -static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) { - unsigned int err = 0; + int err = 0; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; /* @@ -53,25 +53,21 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) * (sc is already checked for VERIFY_READ since the sigframe was * checked in sys_sigreturn previously) */ - if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long))) - goto badframe; + err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)); + err |= __copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long)); + err |= __copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long)); /* make sure the SM-bit is cleared so user-mode cannot fool us */ regs->sr &= ~SPR_SR_SM; + regs->orig_gpr11 = -1; /* Avoid syscall restart checks */ + /* TODO: the other ports use regs->orig_XX to disable syscall checks * after this completes, but we don't use that mechanism. maybe we can * use it now ? */ return err; - -badframe: - return 1; } asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) @@ -111,21 +107,18 @@ badframe: * Set up a signal frame. */ -static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, - unsigned long mask) +static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; /* copy the regs */ - + /* There should be no need to save callee-saved registers here... + * ...but we save them anyway. Revisit this + */ err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.pc, ®s->pc, sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.sr, ®s->sr, sizeof(unsigned long)); - /* then some other stuff */ - - err |= __put_user(mask, &sc->oldmask); - return err; } @@ -181,24 +174,18 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int err = 0; frame = get_sigframe(ka, regs, sizeof(*frame)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - + /* Create siginfo. */ if (ka->sa.sa_flags & SA_SIGINFO) err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - /* Clear all the bits of the ucontext we don't use. */ - err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]); + err |= setup_sigcontext(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -207,9 +194,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* trampoline - the desired return ip is the retcode itself */ return_ip = (unsigned long)&frame->retcode; - /* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); + /* This is: + l.ori r11,r0,__NR_sigreturn + l.sys 1 + */ + err |= __put_user(0xa960, (short *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); @@ -262,82 +252,106 @@ handle_signal(unsigned long sig, * mode below. */ -void do_signal(struct pt_regs *regs) +int do_signal(struct pt_regs *regs, int syscall) { siginfo_t info; int signr; struct k_sigaction ka; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if (!user_mode(regs)) - return; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* If we are coming out of a syscall then we need - * to check if the syscall was interrupted and wants to be - * restarted after handling the signal. If so, the original - * syscall number is put back into r11 and the PC rewound to - * point at the l.sys instruction that resulted in the - * original syscall. Syscall results other than the four - * below mean that the syscall executed to completion and no - * restart is necessary. - */ - if (regs->orig_gpr11) { - int restart = 0; - - switch (regs->gpr[11]) { + unsigned long continue_addr = 0; + unsigned long restart_addr = 0; + unsigned long retval = 0; + int restart = 0; + + if (syscall) { + continue_addr = regs->pc; + restart_addr = continue_addr - 4; + retval = regs->gpr[11]; + + /* + * Setup syscall restart here so that a debugger will + * see the already changed PC. + */ + switch (retval) { case -ERESTART_RESTARTBLOCK: + restart = -2; + /* Fall through */ case -ERESTARTNOHAND: - /* Restart if there is no signal handler */ - restart = (signr <= 0); - break; case -ERESTARTSYS: - /* Restart if there no signal handler or - * SA_RESTART flag is set */ - restart = (signr <= 0 || (ka.sa.sa_flags & SA_RESTART)); - break; case -ERESTARTNOINTR: - /* Always restart */ - restart = 1; + restart++; + regs->gpr[11] = regs->orig_gpr11; + regs->pc = restart_addr; break; } + } - if (restart) { - if (regs->gpr[11] == -ERESTART_RESTARTBLOCK) - regs->gpr[11] = __NR_restart_syscall; - else - regs->gpr[11] = regs->orig_gpr11; - regs->pc -= 4; - } else { - regs->gpr[11] = -EINTR; + /* + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... + */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (signr > 0) { + if (unlikely(restart) && regs->pc == restart_addr) { + if (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + /* No automatic restart */ + regs->gpr[11] = -EINTR; + regs->pc = continue_addr; + } } - } - if (signr <= 0) { - /* no signal to deliver so we just put the saved sigmask - * back */ - restore_saved_sigmask(); - } else { /* signr > 0 */ - /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, &ka, regs); + } else { + /* no handler */ + restore_saved_sigmask(); + /* + * Restore pt_regs PC as syscall restart will be handled by + * kernel without return to userspace + */ + if (unlikely(restart) && regs->pc == restart_addr) { + regs->pc = continue_addr; + return restart; + } } - return; + return 0; } -asmlinkage void do_notify_resume(struct pt_regs *regs) +asmlinkage int +do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { - if (current_thread_info()->flags & _TIF_SIGPENDING) - do_signal(regs); - - if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; } diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index e02f665..ceadda9 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -48,7 +48,12 @@ cflags-y := -pipe # These flags should be implied by an hppa-linux configuration, but they # are not in gcc 3.2. -cflags-y += -mno-space-regs -mfast-indirect-calls +cflags-y += -mno-space-regs + +# -mfast-indirect-calls is only relevant for 32-bit kernels. +ifndef CONFIG_64BIT +cflags-y += -mfast-indirect-calls +endif # Currently we save and restore fpregs on all kernel entry/interruption paths. # If that gets optimized, we might need to disable the use of fpregs in the diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index cc2290a..c6ee865 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -53,6 +53,8 @@ #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX DEFAULT_TASK_SIZE +#define STACK_SIZE_MAX (1 << 30) /* 1 GB */ + #endif #ifndef __ASSEMBLY__ diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h index 0a3eada..f395cde 100644 --- a/arch/parisc/include/uapi/asm/shmbuf.h +++ b/arch/parisc/include/uapi/asm/shmbuf.h @@ -36,23 +36,16 @@ struct shmid64_ds { unsigned int __unused2; }; -#ifdef CONFIG_64BIT -/* The 'unsigned int' (formerly 'unsigned long') data types below will - * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on - * a wide kernel, but if some of these values are meant to contain pointers - * they may need to be 'long long' instead. -PB XXX FIXME - */ -#endif struct shminfo64 { - unsigned int shmmax; - unsigned int shmmin; - unsigned int shmmni; - unsigned int shmseg; - unsigned int shmall; - unsigned int __unused1; - unsigned int __unused2; - unsigned int __unused3; - unsigned int __unused4; + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; }; #endif /* _PARISC_SHMBUF_H */ diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index a2fa2971..f5645d6 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -69,8 +69,6 @@ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 10a0c2a..b24732d 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -286,11 +286,11 @@ ENTRY_COMP(msgsnd) ENTRY_COMP(msgrcv) ENTRY_SAME(msgget) /* 190 */ - ENTRY_SAME(msgctl) - ENTRY_SAME(shmat) + ENTRY_COMP(msgctl) + ENTRY_COMP(shmat) ENTRY_SAME(shmdt) ENTRY_SAME(shmget) - ENTRY_SAME(shmctl) /* 195 */ + ENTRY_COMP(shmctl) /* 195 */ ENTRY_SAME(ni_syscall) /* streams1 */ ENTRY_SAME(ni_syscall) /* streams2 */ ENTRY_SAME(lstat64) @@ -323,7 +323,7 @@ ENTRY_SAME(epoll_ctl) /* 225 */ ENTRY_SAME(epoll_wait) ENTRY_SAME(remap_file_pages) - ENTRY_SAME(semtimedop) + ENTRY_COMP(semtimedop) ENTRY_COMP(mq_open) ENTRY_SAME(mq_unlink) /* 230 */ ENTRY_COMP(mq_timedsend) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cafd166..aa545b5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,6 +140,7 @@ config PPC select OLD_SIGSUSPEND select OLD_SIGACTION if PPC32 select HAVE_DEBUG_STACKOVERFLOW + select ARCH_SUPPORTS_ATOMIC_RMW config EARLY_PRINTK bool @@ -397,7 +398,7 @@ config KEXEC config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if PPC64 || 44x + select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x select DYNAMIC_MEMSTART if FSL_BOOKE help Build a kernel suitable for use as a kdump capture kernel. @@ -994,6 +995,7 @@ endmenu if PPC64 config RELOCATABLE bool "Build a relocatable kernel" + depends on !COMPILE_TEST select NONSTATIC_KERNEL help This builds a kernel image that is capable of running anywhere diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1b4973b..e029fe0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -121,7 +121,9 @@ endif CFLAGS-$(CONFIG_TUNE_CELL) += $(call cc-option,-mtune=cell) -KBUILD_CPPFLAGS += -Iarch/$(ARCH) +asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) + +KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr) KBUILD_AFLAGS += -Iarch/$(ARCH) KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index f9e8b94..b51da91 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -154,4 +154,5 @@ module_exit(sha1_powerpc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1-powerpc"); +MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-powerpc"); diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a613d2c..b142b8e 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -8,7 +8,11 @@ #include <linux/sched.h> #define COMPAT_USER_HZ 100 +#ifdef __BIG_ENDIAN__ #define COMPAT_UTS_MACHINE "ppc\0\0" +#else +#define COMPAT_UTS_MACHINE "ppcle\0\0" +#endif typedef u32 compat_size_t; typedef s32 compat_ssize_t; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index bb96a21..a3ec4f8 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -58,10 +58,10 @@ struct machdep_calls { void (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(struct mm_struct *mm, + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize); - + int psize, int ssize); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3fd2f1b..cefc7b4 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -60,8 +60,7 @@ struct power_pmu { #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */ #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ -#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ -#define PPMU_EBB 0x00000100 /* supports event based branch */ +#define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 46db094..832a39d 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -409,7 +409,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) } extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); + pmd_t *pmdp, unsigned long old_pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 87bfd34..250106b 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -295,11 +295,16 @@ n: * ld rY,ADDROFF(name)(rX) */ #ifdef __powerpc64__ +#ifdef HAVE_AS_ATHIGH +#define __AS_ATHIGH high +#else +#define __AS_ATHIGH h +#endif #define LOAD_REG_IMMEDIATE(reg,expr) \ lis reg,(expr)@highest; \ ori reg,reg,(expr)@higher; \ rldicr reg,reg,32,31; \ - oris reg,reg,(expr)@h; \ + oris reg,reg,(expr)@__AS_ATHIGH; \ ori reg,reg,(expr)@l; #define LOAD_REG_ADDR(reg,name) \ diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index d836d94..9ecede1 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -46,11 +46,31 @@ * in order to deal with 64K made of 4K HW pages. Thus we override the * generic accessors and iterators here */ -#define __real_pte(e,p) ((real_pte_t) { \ - (e), (pte_val(e) & _PAGE_COMBO) ? \ - (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) -#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ - (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> 12) & 0xf; +} + #define __rpte_to_pte(r) ((r).pte) #define __rpte_sub_valid(rpte, index) \ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5402fb1..1addf15 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -116,6 +116,7 @@ /* Server variant */ #define MSR_ (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) @@ -208,6 +209,7 @@ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ +#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 5f54a74..826be86 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,8 +28,6 @@ #include <asm/synch.h> #include <asm/ppc-opcode.h> -#define arch_spin_is_locked(x) ((x)->slock != 0) - #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ #ifdef __BIG_ENDIAN__ @@ -54,6 +52,12 @@ #define SYNC_IO #endif +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + smp_mb(); + return lock->slock != 0; +} + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index aace905..9a15f05 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -86,6 +86,8 @@ static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.ebbrr = 0; + t->thread.ebbhr = 0; t->thread.bescr = 0; t->thread.mmcr2 = 0; t->thread.mmcr0 = 0; diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 43523fe..05fcdd8 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -190,7 +190,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -COMPAT_SYS_SPU(sendfile) +SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 5b76579..de2c0e4 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -41,5 +41,6 @@ #define PPC_FEATURE2_EBB 0x10000000 #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b0550a2..bbb80db 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -105,7 +105,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f..df93072 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -84,6 +84,22 @@ _GLOBAL(power7_nap) std r9,_MSR(r1) std r1,PACAR1(r13) + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + +power7_enter_nap_mode: #ifdef CONFIG_KVM_BOOK3S_64_HV /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 22e88dd..a531358 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -48,6 +48,9 @@ static struct __initdata of_device_id legacy_serial_parents[] = { static unsigned int legacy_serial_count; static int legacy_serial_console = -1; +static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | + UPF_SHARE_IRQ | UPF_FIXED_PORT; + static unsigned int tsi_serial_in(struct uart_port *p, int offset) { unsigned int tmp; @@ -153,8 +156,6 @@ static int __init add_legacy_soc_port(struct device_node *np, { u64 addr; const __be32 *addrp; - upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ - | UPF_FIXED_PORT; struct device_node *tsi = of_get_parent(np); /* We only support ports that have a clock frequency properly @@ -185,9 +186,11 @@ static int __init add_legacy_soc_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ if (tsi && !strcmp(tsi->type, "tsi-bridge")) - return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_TSI, addr, addr, + NO_IRQ, legacy_port_flags, 0); else - return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, + NO_IRQ, legacy_port_flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -233,7 +236,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), - taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); + taddr, NO_IRQ, legacy_port_flags, 0); } @@ -306,7 +309,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, - UPF_BOOT_AUTOCONF, np != pci_dev); + legacy_port_flags, np != pci_dev); } #endif diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 611acdf..263e445 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -237,7 +237,7 @@ static void wake_offline_cpus(void) if (!cpu_online(cpu)) { printk(KERN_INFO "kexec: Waking offline cpu %d.\n", cpu); - cpu_up(cpu); + WARN_ON(cpu_up(cpu)); } } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 815b540..a4dcc7e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -526,6 +526,31 @@ out_and_saveregs: tm_save_sprs(thr); } +extern void __tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr); + +void tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr) +{ + unsigned long flags; + + /* We really can't be interrupted here as the TEXASR registers can't + * change and later in the trecheckpoint code, we have a userspace R1. + * So let's hard disable over this region. + */ + local_irq_save(flags); + hard_irq_disable(); + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(thread); + + __tm_recheckpoint(thread, orig_msr); + + local_irq_restore(flags); +} + static inline void tm_recheckpoint_new_task(struct task_struct *new) { unsigned long msr; @@ -544,13 +569,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) if (!new->thread.regs) return; - /* The TM SPRs are restored here, so that TEXASR.FS can be set - * before the trecheckpoint and no explosion occurs. - */ - tm_restore_sprs(&new->thread); - - if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ + tm_restore_sprs(&new->thread); return; + } msr = new->thread.tm_orig_msr; /* Recheckpoint to restore original checkpointed register state. */ TM_DEBUG("*** tm_recheckpoint of pid %d " diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9ab397a..a2aa3e9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -460,9 +460,17 @@ void __init smp_setup_cpu_maps(void) nthreads = len / sizeof(int); for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + bool avail; + DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + + avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); + + set_cpu_present(cpu, avail); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5d5fe63..095537b 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -874,6 +874,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, * transactional versions should be loaded. */ tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); /* Get the top half of the MSR */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index df5b948..cc88b17 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -520,6 +520,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, } #endif tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc12e6a..1917acb5 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -515,7 +515,7 @@ void timer_interrupt(struct pt_regs * regs) __get_cpu_var(irq_stat).timer_irqs++; -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 761af4f..c908b56 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -307,7 +307,7 @@ dont_backup_fp: * Call with IRQs off, stacks get all out of sync for * some periods in here! */ -_GLOBAL(tm_recheckpoint) +_GLOBAL(__tm_recheckpoint) mfcr r5 mflr r0 std r5, 8(r1) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e2..f7089fc 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -69,8 +69,12 @@ static void udbg_uart_putc(char c) static int udbg_uart_getc_poll(void) { - if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + if (!udbg_uart_in) + return -1; + + if (!(udbg_uart_in(UART_LSR) & LSR_DR)) return udbg_uart_in(UART_RBR); + return -1; } diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 23eb9a9..c62be60 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -30,8 +30,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_SPRG_VDSO_READ - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 + cmpwi cr0,r3,0 + cmpwi cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e1ab62e..211974a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -82,7 +82,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) /* CPU points to the first thread of the core */ if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { -#ifdef CONFIG_KVM_XICS +#ifdef CONFIG_PPC_ICP_NATIVE int real_cpu = cpu + vcpu->arch.ptid; if (paca[real_cpu].kvm_hstate.xics_phys) xics_wake_cpu(real_cpu); @@ -1092,9 +1092,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { -#ifdef CONFIG_KVM_XICS xics_wake_cpu(cpu); -#endif ++vc->n_woken; } #endif diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index b2c68ce..a5b30c7 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -231,6 +231,87 @@ _GLOBAL(_rest32gpr_31_x) mr 1,11 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +_GLOBAL(_savevr_20) + li r11,-192 + stvx vr20,r11,r0 +_GLOBAL(_savevr_21) + li r11,-176 + stvx vr21,r11,r0 +_GLOBAL(_savevr_22) + li r11,-160 + stvx vr22,r11,r0 +_GLOBAL(_savevr_23) + li r11,-144 + stvx vr23,r11,r0 +_GLOBAL(_savevr_24) + li r11,-128 + stvx vr24,r11,r0 +_GLOBAL(_savevr_25) + li r11,-112 + stvx vr25,r11,r0 +_GLOBAL(_savevr_26) + li r11,-96 + stvx vr26,r11,r0 +_GLOBAL(_savevr_27) + li r11,-80 + stvx vr27,r11,r0 +_GLOBAL(_savevr_28) + li r11,-64 + stvx vr28,r11,r0 +_GLOBAL(_savevr_29) + li r11,-48 + stvx vr29,r11,r0 +_GLOBAL(_savevr_30) + li r11,-32 + stvx vr30,r11,r0 +_GLOBAL(_savevr_31) + li r11,-16 + stvx vr31,r11,r0 + blr + +_GLOBAL(_restvr_20) + li r11,-192 + lvx vr20,r11,r0 +_GLOBAL(_restvr_21) + li r11,-176 + lvx vr21,r11,r0 +_GLOBAL(_restvr_22) + li r11,-160 + lvx vr22,r11,r0 +_GLOBAL(_restvr_23) + li r11,-144 + lvx vr23,r11,r0 +_GLOBAL(_restvr_24) + li r11,-128 + lvx vr24,r11,r0 +_GLOBAL(_restvr_25) + li r11,-112 + lvx vr25,r11,r0 +_GLOBAL(_restvr_26) + li r11,-96 + lvx vr26,r11,r0 +_GLOBAL(_restvr_27) + li r11,-80 + lvx vr27,r11,r0 +_GLOBAL(_restvr_28) + li r11,-64 + lvx vr28,r11,r0 +_GLOBAL(_restvr_29) + li r11,-48 + lvx vr29,r11,r0 +_GLOBAL(_restvr_30) + li r11,-32 + lvx vr30,r11,r0 +_GLOBAL(_restvr_31) + li r11,-16 + lvx vr31,r11,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #else /* CONFIG_PPC64 */ .section ".text.save.restore","ax",@progbits @@ -356,6 +437,111 @@ _restgpr0_31: mtlr r0 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +.globl _savevr_20 +_savevr_20: + li r12,-192 + stvx vr20,r12,r0 +.globl _savevr_21 +_savevr_21: + li r12,-176 + stvx vr21,r12,r0 +.globl _savevr_22 +_savevr_22: + li r12,-160 + stvx vr22,r12,r0 +.globl _savevr_23 +_savevr_23: + li r12,-144 + stvx vr23,r12,r0 +.globl _savevr_24 +_savevr_24: + li r12,-128 + stvx vr24,r12,r0 +.globl _savevr_25 +_savevr_25: + li r12,-112 + stvx vr25,r12,r0 +.globl _savevr_26 +_savevr_26: + li r12,-96 + stvx vr26,r12,r0 +.globl _savevr_27 +_savevr_27: + li r12,-80 + stvx vr27,r12,r0 +.globl _savevr_28 +_savevr_28: + li r12,-64 + stvx vr28,r12,r0 +.globl _savevr_29 +_savevr_29: + li r12,-48 + stvx vr29,r12,r0 +.globl _savevr_30 +_savevr_30: + li r12,-32 + stvx vr30,r12,r0 +.globl _savevr_31 +_savevr_31: + li r12,-16 + stvx vr31,r12,r0 + blr + +.globl _restvr_20 +_restvr_20: + li r12,-192 + lvx vr20,r12,r0 +.globl _restvr_21 +_restvr_21: + li r12,-176 + lvx vr21,r12,r0 +.globl _restvr_22 +_restvr_22: + li r12,-160 + lvx vr22,r12,r0 +.globl _restvr_23 +_restvr_23: + li r12,-144 + lvx vr23,r12,r0 +.globl _restvr_24 +_restvr_24: + li r12,-128 + lvx vr24,r12,r0 +.globl _restvr_25 +_restvr_25: + li r12,-112 + lvx vr25,r12,r0 +.globl _restvr_26 +_restvr_26: + li r12,-96 + lvx vr26,r12,r0 +.globl _restvr_27 +_restvr_27: + li r12,-80 + lvx vr27,r12,r0 +.globl _restvr_28 +_restvr_28: + li r12,-64 + lvx vr28,r12,r0 +.globl _restvr_29 +_restvr_29: + li r12,-48 + lvx vr29,r12,r0 +.globl _restvr_30 +_restvr_30: + li r12,-32 + lvx vr30,r12,r0 +.globl _restvr_31 +_restvr_31: + li r12,-16 + lvx vr31,r12,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #endif /* CONFIG_PPC64 */ #endif diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 0c9c8d7..170a034 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw) void arch_spin_unlock_wait(arch_spinlock_t *lock) { + smp_mb(); + while (lock->slock) { HMT_low(); if (SHARED_PROCESSOR) __spin_yield(lock); } HMT_medium(); + + smp_mb(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b1faa15..aec4dbf 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1397,7 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c33d939..9ca9c16 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -413,18 +413,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } -static void native_hugepage_invalidate(struct mm_struct *mm, +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize) + int psize, int ssize) { - int ssize = 0, i; - int lock_tlbie; + int i; struct hash_pte *hptep; int actual_psize = MMU_PAGE_16M; unsigned int max_hpte_count, valid; unsigned long flags, s_addr = addr; unsigned long hpte_v, want_v, shift; - unsigned long hidx, vpn = 0, vsid, hash, slot; + unsigned long hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -438,15 +438,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -466,22 +457,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + /* + * We need to do tlb invalidate for all the address, tlbie + * instruction compares entry_VA in tlb with the VA specified + * here + */ + tlbie(vpn, psize, actual_psize, ssize, 0); } - /* - * Since this is a hugepage, we just need a single tlbie. - * use the last vpn. - */ - lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); - - asm volatile("ptesync":::"memory"); - __tlbie(vpn, psize, actual_psize, ssize); - asm volatile("eieio; tlbsync; ptesync":::"memory"); - - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bde8b55..503a5d0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -947,6 +947,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access, trap, vsid, ssize, psize, lpsize, pte); } +static void check_paca_psize(unsigned long ea, struct mm_struct *mm, + int psize, bool user_region) +{ + if (user_region) { + if (psize != get_paca_psize(ea)) { + get_paca()->context = mm->context; + slb_flush_and_rebolt(); + } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_vmalloc_update(); + } +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -1068,6 +1084,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif + check_paca_psize(ea, mm, psize, user_region); + goto bail; } @@ -1108,17 +1126,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #endif } } - if (user_region) { - if (psize != get_paca_psize(ea)) { - get_paca()->context = mm->context; - slb_flush_and_rebolt(); - } - } else if (get_paca()->vmalloc_sllp != - mmu_psize_defs[mmu_vmalloc_psize].sllp) { - get_paca()->vmalloc_sllp = - mmu_psize_defs[mmu_vmalloc_psize].sllp; - slb_vmalloc_update(); - } + + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 34de9e0..7d86c86 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,6 +18,57 @@ #include <linux/mm.h> #include <asm/machdep.h> +static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + + int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * atomically mark the linux large page PMD busy and dirty */ do { - old_pmd = pmd_val(*pmdp); + pmd_t pmd = ACCESS_ONCE(*pmdp); + + old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; @@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); + if (psize == MMU_PAGE_4K) { + /* + * invalidate the old hpte entry if we have that mapped via 64K + * base page size. This is because demote_segment won't flush + * hash page table entries. + */ + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + } valid = hpte_valid(hpte_slot_array, index); if (valid) { @@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * safely update this here. */ valid = 0; - new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; - } else - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } } if (!valid) { @@ -119,15 +178,13 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pmd |= _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED)); +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -168,8 +225,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * No need to use ldarx/stdcx here + * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * base page size 4k. + */ + if (psize == MMU_PAGE_4K) + new_pmd |= _PAGE_COMBO; + /* + * The hpte valid is stored in the pgtable whose address is in the + * second half of the PMD. Order this against clearing of the busy bit in + * huge pmd. */ + smp_wmb(); *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d67db4b..834ca8e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd) */ return ((pgd_val(pgd) & 0x3) != 0x0); } - -int pmd_huge_support(void) -{ - return 1; -} #else int pmd_huge(pmd_t pmd) { @@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd) { return 0; } - -int pmd_huge_support(void) -{ - return 0; -} #endif pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4788ea2..e91079b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -232,6 +232,7 @@ int __node_distance(int a, int b) return distance; } +EXPORT_SYMBOL(__node_distance); static void initialize_distance_lookup_table(int nid, const __be32 *associativity) @@ -588,8 +589,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 536eec72..c9379a2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -524,7 +524,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, *pmdp = __pmd(old & ~clr); #endif if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(mm, addr, pmdp); + hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -631,7 +631,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, if (!(old & _PAGE_SPLITTING)) { /* We need to flush the hpte */ if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); } } @@ -704,7 +704,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * neesd to be flushed. */ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) + pmd_t *pmdp, unsigned long old_pmd) { int ssize, i; unsigned long s_addr; @@ -726,12 +726,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!hpte_slot_array) return; - /* get the base page size */ + /* get the base page size,vsid and segment size */ +#ifdef CONFIG_DEBUG_VM psize = get_slice_psize(mm, s_addr); + BUG_ON(psize == MMU_PAGE_16M); +#endif + if (old_pmd & _PAGE_COMBO) + psize = MMU_PAGE_4K; + else + psize = MMU_PAGE_64K; + + if (!is_kernel_addr(s_addr)) { + ssize = user_segment_size(s_addr); + vsid = get_vsid(mm->context.id, s_addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(mm, hpte_slot_array, - s_addr, psize); + return ppc_md.hugepage_invalidate(vsid, s_addr, + hpte_slot_array, + psize, ssize); /* * No bluk hpte removal support, invalidate each entry */ @@ -749,15 +766,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 36e44b4..c66e445 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -217,7 +217,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, if (!(pte & _PAGE_HASHPTE)) continue; if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) - hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else hpte_need_flush(mm, start, ptep, pte, 0); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29b89e8..57a8ff9 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -483,7 +483,7 @@ static bool is_ebb_event(struct perf_event *event) * check that the PMU supports EBB, meaning those that don't can still * use bit 63 of the event code for something else if they wish. */ - return (ppmu->flags & PPMU_EBB) && + return (ppmu->flags & PPMU_ARCH_207S) && ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1); } @@ -851,7 +851,22 @@ static void power_pmu_read(struct perf_event *event) } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); local64_add(delta, &event->count); - local64_sub(delta, &event->hw.period_left); + + /* + * A number of places program the PMC with (0x80000000 - period_left). + * We never want period_left to be less than 1 because we will program + * the PMC with a value >= 0x800000000 and an edge detected PMC will + * roll around to 0 before taking an exception. We have seen this + * on POWER8. + * + * To fix this, clamp the minimum value of period_left to 1. + */ + do { + prev = local64_read(&event->hw.period_left); + val = prev - delta; + if (val < 1) + val = 1; + } while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev); } /* @@ -1149,6 +1164,9 @@ static void power_pmu_enable(struct pmu *pmu) mb(); write_mmcr0(cpuhw, mmcr0); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, 0); + /* * Enable instruction sampling if necessary */ @@ -1547,7 +1565,7 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { /* PMU has BHRB enabled */ - if (!(ppmu->flags & PPMU_BHRB)) + if (!(ppmu->flags & PPMU_ARCH_207S)) return -EOPNOTSUPP; } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index a3f7abd..79b7e20 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -608,7 +608,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, - .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, + .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, .attr_groups = power8_pmu_attr_groups, diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 87ba7cf..65d633f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -164,7 +164,7 @@ static void spufs_prune_dir(struct dentry *dir) struct dentry *dentry, *tmp; mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_dlock(dentry); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 227c7fe..b910833 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -493,7 +493,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) ret = ioda_eeh_phb_reset(hose, option); } else { bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus)) + if (pci_is_root_bus(bus) || + pci_is_root_bus(bus->parent)) ret = ioda_eeh_root_reset(hose, option); else ret = ioda_eeh_bridge_reset(hose, bus->self, option); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 50e67d2..bfb2960 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -792,7 +792,6 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - struct pci_dn *pdn = pci_get_pdn(dev); struct irq_data *idata; struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; @@ -809,7 +808,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -ENXIO; /* Force 32-bit MSI on some broken devices */ - if (pdn && pdn->force_32bit_msi) + if (dev->no_64bit_msi) is_64 = 0; /* Assign XIVE to PE */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index c005011..5ba6e297 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1,3 +1,4 @@ + /* * Support PCI/PCIe on PowerNV platforms * @@ -50,9 +51,8 @@ static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - if (pdn && pdn->force_32bit_msi && !phb->msi32_support) + if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 7cfdaae..679df55 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -380,7 +380,7 @@ static int dlpar_online_cpu(struct device_node *dn) BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_up(cpu); + rc = device_online(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); @@ -471,7 +471,7 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_down(cpu); + rc = device_offline(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 7fbc25b..7444870 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -461,6 +461,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) } else { result = EEH_STATE_NOT_SUPPORT; } + break; default: result = EEH_STATE_NOT_SUPPORT; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 0ea99e3..2d6fe89 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -88,13 +88,14 @@ void set_default_offline_state(int cpu) static void rtas_stop_self(void) { - struct rtas_args args = { - .token = cpu_to_be32(rtas_stop_self_token), + static struct rtas_args args = { .nargs = 0, .nret = 1, .rets = &args.args[0], }; + args.token = cpu_to_be32(rtas_stop_self_token); + local_irq_disable(); BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9a432de..bebe64e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -158,7 +158,7 @@ static int pseries_remove_memory(struct device_node *np) static inline int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { - return -EOPNOTSUPP; + return 0; } static inline int pseries_remove_memory(struct device_node *np) { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d9d0d7a..7cd3370 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -733,13 +733,13 @@ static inline void __remove_ddw(struct device_node *np, const u32 *ddw_avail, u6 np->full_name, ret, ddw_avail[2], liobn); } -static void remove_ddw(struct device_node *np) +static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; const u32 *ddw_avail; u64 liobn; - int len, ret; + int len, ret = 0; ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); @@ -765,7 +765,8 @@ static void remove_ddw(struct device_node *np) __remove_ddw(np, ddw_avail, liobn); delprop: - ret = of_remove_property(np, win64); + if (remove_prop) + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -837,7 +838,7 @@ static int find_existing_ddw_windows(void) * can clear the table or find the holes. To that end, * first, remove any existing DDW configuration. */ - remove_ddw(pdn); + remove_ddw(pdn, true); /* * Second, if we are running on a new enough level of @@ -1127,7 +1128,7 @@ out_free_window: kfree(window); out_clear_window: - remove_ddw(pdn); + remove_ddw(pdn, true); out_free_prop: kfree(win64->name); @@ -1339,7 +1340,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti switch (action) { case OF_RECONFIG_DETACH_NODE: - remove_ddw(np); + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + remove_ddw(np, false); if (pci && pci->iommu_table) iommu_free_table(pci->iommu_table, np->full_name); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 356bc75..691a479 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -412,16 +412,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } -static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, - unsigned char *hpte_slot_array, - unsigned long addr, int psize) +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize) { - int ssize = 0, i, index = 0; + int i, index = 0; unsigned long s_addr = addr; unsigned int max_hpte_count, valid; unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; - unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + unsigned long shift, hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -434,15 +435,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6d2f0ab..3b350fb 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -426,7 +426,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) */ again: if (type == PCI_CAP_ID_MSI) { - if (pdn->force_32bit_msi) { + if (pdev->no_64bit_msi) { rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); if (rc < 0) { /* diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index af9d346..4898203 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -288,10 +288,10 @@ static inline void disable_surveillance(void) args.token = rtas_token("set-indicator"); if (args.token == RTAS_UNKNOWN_SERVICE) return; - args.nargs = 3; - args.nret = 1; + args.nargs = cpu_to_be32(3); + args.nret = cpu_to_be32(1); args.rets = &args.args[3]; - args.args[0] = SURVEILLANCE_TOKEN; + args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN); args.args[1] = 0; args.args[2] = 0; enter_rtas(__pa(&args)); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6671e8d..faa97bd49 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 2a245b5..f8d9cb1 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -967,7 +967,7 @@ static void __exit aes_s390_fini(void) module_init(aes_s390_init); module_exit(aes_s390_fini); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 2d96e68..a3e24d4 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -616,8 +616,8 @@ static void __exit des_s390_exit(void) module_init(des_s390_init); module_exit(des_s390_exit); -MODULE_ALIAS("des"); -MODULE_ALIAS("des3_ede"); +MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des3_ede"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index d43485d..7940dc9 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -160,7 +160,7 @@ static void __exit ghash_mod_exit(void) module_init(ghash_mod_init); module_exit(ghash_mod_exit); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index a1b3a9d..5b2bee3 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -103,6 +103,6 @@ static void __exit sha1_s390_fini(void) module_init(sha1_s390_init); module_exit(sha1_s390_fini); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 9b85380..b74ff15 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -143,7 +143,7 @@ static void __exit sha256_s390_fini(void) module_init(sha256_s390_init); module_exit(sha256_s390_fini); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 32a8138..0c36989 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -86,7 +86,7 @@ static struct shash_alg sha512_alg = { } }; -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha512"); static int sha384_init(struct shash_desc *desc) { @@ -126,7 +126,7 @@ static struct shash_alg sha384_alg = { } }; -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha384"); static int __init init(void) { diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f201af8..31b5ca8 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -219,7 +219,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdev(n) container_of(n, struct ccw_device, dev) #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) -extern struct ccw_device *ccw_device_probe_console(void); +extern struct ccw_device *ccw_device_probe_console(struct ccw_driver *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index bbf8141..2bed4f0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -142,9 +142,9 @@ struct _lowcore { __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ /* Interrupt response block */ - __u8 irb[64]; /* 0x0300 */ + __u8 irb[96]; /* 0x0300 */ - __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ + __u8 pad_0x0360[0x0e00-0x0360]; /* 0x0360 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -288,12 +288,13 @@ struct _lowcore { __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ /* Interrupt response block. */ - __u8 irb[64]; /* 0x0400 */ + __u8 irb[96]; /* 0x0400 */ + __u8 pad_0x0460[0x0480-0x0460]; /* 0x0460 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x0440 */ + __u32 paste[16]; /* 0x0480 */ - __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ + __u8 pad_0x04c0[0x0e00-0x04c0]; /* 0x04c0 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 1f1b8c7..0ebb699 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -249,7 +249,7 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9556905..d4c5e6b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -322,7 +322,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && - ((data & ~PSW_MASK_USER) != psw_user_bits || + (((data^psw_user_bits) & ~PSW_MASK_USER) || + (((data^psw_user_bits) & PSW_MASK_ASC) && + ((data|psw_user_bits) & PSW_MASK_ASC) == PSW_MASK_ASC) || ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; @@ -655,7 +657,10 @@ static int __poke_user_compat(struct task_struct *child, */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) + if (((tmp^psw32_user_bits) & ~PSW32_MASK_USER) || + (((tmp^psw32_user_bits) & PSW32_MASK_ASC) && + ((tmp|psw32_user_bits) & PSW32_MASK_ASC) + == PSW32_MASK_ASC)) /* Invalid psw mask. */ return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7f1f7ac..2df491b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -71,6 +71,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) return 1; + return 0; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 776dafe..48bb1c1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -788,7 +788,8 @@ rerun_vcpu: if (rc) break; if (kvm_is_ucontrol(vcpu->kvm)) - rc = -EOPNOTSUPP; + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; else rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d261c62..248445f 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -223,11 +223,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int write) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index de8cbc3..5664be4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -788,11 +788,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep; down_read(&mm->mmap_sem); +retry: ptep = get_locked_pte(current->mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } + if (!(pte_val(*ptep) & _PAGE_INVALID) && + (pte_val(*ptep) & _PAGE_PROTECT)) { + pte_unmap_unlock(*ptep, ptl); + if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + goto retry; + } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 96a4b15..906fba6 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -276,7 +276,6 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) case BPF_S_LD_W_IND: case BPF_S_LD_H_IND: case BPF_S_LD_B_IND: - case BPF_S_LDX_B_MSH: case BPF_S_LD_IMM: case BPF_S_LD_MEM: case BPF_S_MISC_TXA: diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index b959f55..8dfe645 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -115,7 +115,7 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk(data); + printk("%s", (char *)data); printk_address(addr, reliable); } diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 0d676a4..d776234 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -83,11 +83,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ffc749e..3ff0c25 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -66,6 +66,7 @@ config SPARC64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS select HAVE_DEBUG_KMEMLEAK + select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V @@ -76,6 +77,7 @@ config SPARC64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_C_RECORDMCOUNT select NO_BOOTMEM + select ARCH_SUPPORTS_ATOMIC_RMW config ARCH_DEFCONFIG string diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 503e6d9..ded4cee3 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -499,6 +499,6 @@ module_exit(aes_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 888f6260..641f55c 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 5162fad..d1064e4 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -176,6 +176,6 @@ module_exit(crc32c_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); -MODULE_ALIAS("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 3065bc6..d115009 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -532,6 +532,6 @@ module_exit(des_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); -MODULE_ALIAS("des"); +MODULE_ALIAS_CRYPTO("des"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 09a9ea1..64c7ff5 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -185,6 +185,6 @@ module_exit(md5_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); -MODULE_ALIAS("md5"); +MODULE_ALIAS_CRYPTO("md5"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index 6cd5f29..1b3e47a 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -180,6 +180,6 @@ module_exit(sha1_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index 04f555a..41f27cc 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -237,7 +237,7 @@ module_exit(sha256_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); -MODULE_ALIAS("sha224"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index f04d199..9fff885 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -222,7 +222,7 @@ module_exit(sha512_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); -MODULE_ALIAS("sha384"); -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); #include "crop_devid.c" diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 905832a..a0ed182 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -21,7 +21,7 @@ extern int __atomic_add_return(int, atomic_t *); extern int atomic_cmpxchg(atomic_t *, int, int); -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +extern int atomic_xchg(atomic_t *, int); extern int __atomic_add_unless(atomic_t *, int, int); extern void atomic_set(atomic_t *, int); diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h index 1fae1a0..ae0f9a7 100644 --- a/arch/sparc/include/asm/cmpxchg_32.h +++ b/arch/sparc/include/asm/cmpxchg_32.h @@ -11,22 +11,14 @@ #ifndef __ARCH_SPARC_CMPXCHG__ #define __ARCH_SPARC_CMPXCHG__ -static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val) -{ - __asm__ __volatile__("swap [%2], %0" - : "=&r" (val) - : "0" (val), "r" (m) - : "memory"); - return val; -} - +extern unsigned long __xchg_u32(volatile u32 *m, u32 new); extern void __xchg_called_with_bad_pointer(void); static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size) { switch (size) { case 4: - return xchg_u32(ptr, x); + return __xchg_u32(ptr, x); } __xchg_called_with_bad_pointer(); return x; diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index ca121f0..17be9d6 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2944,6 +2944,16 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif +#define HV_FAST_T5_GET_PERFREG 0x1a8 +#define HV_FAST_T5_SET_PERFREG 0x1a9 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2975,6 +2985,7 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_GRP_VF_CPU 0x0205 #define HV_GRP_KT_CPU 0x0209 #define HV_GRP_VT_CPU 0x020c +#define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index abf6afe..78c9f2d 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -37,7 +37,7 @@ * * ino_bucket->irq allocation is made during {sun4v_,}build_irq(). */ -#define NR_IRQS 255 +#define NR_IRQS (2048) extern void irq_install_pre_handler(int irq, void (*func)(unsigned int, void *, void *), @@ -57,11 +57,8 @@ extern unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p, unsigned long iclr_base); extern void sun4u_destroy_msi(unsigned int irq); -extern unsigned char irq_alloc(unsigned int dev_handle, - unsigned int dev_ino); -#ifdef CONFIG_PCI_MSI +extern unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); extern void irq_free(unsigned int irq); -#endif extern void __init init_IRQ(void); extern void fixup_irqs(void); diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h index bdb524a..8732ed3 100644 --- a/arch/sparc/include/asm/ldc.h +++ b/arch/sparc/include/asm/ldc.h @@ -53,13 +53,14 @@ struct ldc_channel; /* Allocate state for a channel. */ extern struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg); + void *event_arg, + const char *name); /* Shut down and free state for a channel. */ extern void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -extern int ldc_bind(struct ldc_channel *lp, const char *name); +extern int ldc_bind(struct ldc_channel *lp); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index e945ddb..0845de0 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -93,7 +93,6 @@ typedef struct { raw_spinlock_t lock; unsigned long sparc64_ctx_val; unsigned long huge_pte_count; - struct page *pgtable_page; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index a12dbe3..e48fdf4 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h @@ -62,7 +62,8 @@ struct linux_mem_p1275 { /* You must call prom_init() before using any of the library services, * preferably as early as possible. Pass it the romvec pointer. */ -extern void prom_init(void *cif_handler, void *cif_stack); +extern void prom_init(void *cif_handler); +extern void prom_init_report(void); /* Boot argument acquisition, returns the boot command line string. */ extern char *prom_getbootargs(void); diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e155388..b18e602 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -15,7 +15,10 @@ #define DCACHE_ALIASING_POSSIBLE #endif -#define HPAGE_SHIFT 22 +#define HPAGE_SHIFT 23 +#define REAL_HPAGE_SHIFT 22 + +#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) @@ -53,19 +56,22 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag /* These are used to make use of C type-checking.. */ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long iopte; } iopte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define iopte_val(x) ((x).iopte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) #define __iopte(x) ((iopte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -73,19 +79,22 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* .. while these make it easier on the compiler */ typedef unsigned long pte_t; typedef unsigned long iopte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define iopte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __iopte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -93,18 +102,33 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; +extern unsigned long sparc64_va_hole_top; +extern unsigned long sparc64_va_hole_bottom; + +/* The next two defines specify the actual exclusion region we + * enforce, wherein we use a 4GB red zone on each side of the VA hole. + */ +#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL)) +#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL)) + #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ - (_AC(0x0000000070000000,UL)) : \ - (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) + _AC(0x0000000070000000,UL) : \ + VA_EXCLUDE_END) #include <asm-generic/memory_model.h> +extern unsigned long PAGE_OFFSET; + #endif /* !(__ASSEMBLY__) */ -/* We used to stick this into a hard-coded global register (%g4) - * but that does not make sense anymore. +/* The maximum number of physical memory address bits we support. The + * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" + * evaluates to. */ -#define PAGE_OFFSET _AC(0xFFFFF80000000000,UL) +#define MAX_PHYS_ADDRESS_BITS 53 + +#define ILOG2_4MB 22 +#define ILOG2_256MB 28 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index bcfe063..2c8d41f 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -15,6 +15,13 @@ extern struct kmem_cache *pgtable_cache; +static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, pud); +} + +#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); @@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgtable_cache, pgd); } -#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +static inline void __pud_populate(pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, pmd); +} + +#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(pgtable_cache, pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, false) +#define __pud_free_tlb(tlb, pud, addr) \ + pgtable_free_tlb(tlb, pud, false) + #endif /* _SPARC64_PGALLOC_H */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 90f289f..e8dfabf 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -20,11 +20,10 @@ #include <asm/page.h> #include <asm/processor.h> -#include <asm-generic/pgtable-nopud.h> - /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). * The page copy blockops can use 0x6000000 to 0x8000000. - * The TSB is mapped in the 0x8000000 to 0xa000000 range. + * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. + * The 4M TSB is mapped in the 0x8400000 to 0x8800000 range. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. * The vmalloc area spans 0x100000000 to 0x200000000. * Since modules need to be in the lowest 32-bits of the address space, @@ -33,33 +32,43 @@ * 0x400000000. */ #define TLBTEMP_BASE _AC(0x0000000006000000,UL) -#define TSBMAP_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_8K_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_4M_BASE _AC(0x0000000008400000,UL) #define MODULES_VADDR _AC(0x0000000010000000,UL) #define MODULES_LEN _AC(0x00000000e0000000,UL) #define MODULES_END _AC(0x00000000f0000000,UL) #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000010000000000,UL) -#define VMEMMAP_BASE _AC(0x0000010000000000,UL) - -#define vmemmap ((struct page *)VMEMMAP_BASE) +#define VMEMMAP_BASE VMALLOC_END /* PMD_SHIFT determines the size of the area a second-level page * table can map */ -#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4)) +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PMD_BITS (PAGE_SHIFT - 2) +#define PMD_BITS (PAGE_SHIFT - 3) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS) +/* PUD_SHIFT determines the size of the area a third-level page + * table can map + */ +#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PUD_BITS (PAGE_SHIFT - 3) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PGDIR_BITS (PAGE_SHIFT - 2) +#define PGDIR_BITS (PAGE_SHIFT - 3) -#if (PGDIR_SHIFT + PGDIR_BITS) != 44 +#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS) +#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support +#endif + +#if (PGDIR_SHIFT + PGDIR_BITS) != 53 #error Page table parameters do not cover virtual address space properly. #endif @@ -67,44 +76,34 @@ #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. #endif -/* PMDs point to PTE tables which are 4K aligned. */ -#define PMD_PADDR _AC(0xfffffffe,UL) -#define PMD_PADDR_SHIFT _AC(11,UL) - -#define PMD_ISHUGE _AC(0x00000001,UL) +#ifndef __ASSEMBLY__ -/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge - * pages, this frees up a bunch of bits in the layout that we can - * use for the protection settings and software metadata. - */ -#define PMD_HUGE_PADDR _AC(0xfffff800,UL) -#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL) -#define PMD_HUGE_PRESENT _AC(0x00000400,UL) -#define PMD_HUGE_WRITE _AC(0x00000200,UL) -#define PMD_HUGE_DIRTY _AC(0x00000100,UL) -#define PMD_HUGE_ACCESSED _AC(0x00000080,UL) -#define PMD_HUGE_EXEC _AC(0x00000040,UL) -#define PMD_HUGE_SPLITTING _AC(0x00000020,UL) - -/* PGDs point to PMD tables which are 8K aligned. */ -#define PGD_PADDR _AC(0xfffffffc,UL) -#define PGD_PADDR_SHIFT _AC(11,UL) +extern unsigned long VMALLOC_END; -#ifndef __ASSEMBLY__ +#define vmemmap ((struct page *)VMEMMAP_BASE) #include <linux/sched.h> +bool kern_addr_valid(unsigned long addr); + /* Entries per page directory level. */ -#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4)) +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PUD (1UL << PUD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ #define FIRST_USER_ADDRESS 0 -#define pte_ERROR(e) __builtin_trap() -#define pmd_ERROR(e) __builtin_trap() -#define pgd_ERROR(e) __builtin_trap() +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) #endif /* !(__ASSEMBLY__) */ @@ -112,6 +111,8 @@ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ +#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE /* Advertise support for _PAGE_SPECIAL */ #define __HAVE_ARCH_PTE_SPECIAL @@ -125,6 +126,7 @@ #define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ #define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ #define _PAGE_SPECIAL_4U _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4U _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ #define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ #define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ @@ -155,6 +157,7 @@ #define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ #define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ #define _PAGE_SPECIAL_4V _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4V _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ #define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ #define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ @@ -180,6 +183,10 @@ #define _PAGE_SZBITS_4U _PAGE_SZ8K_4U #define _PAGE_SZBITS_4V _PAGE_SZ8K_4V +#if REAL_HPAGE_SHIFT != 22 +#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up +#endif + #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V @@ -239,16 +246,13 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot); -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) - -extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); - -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - /* Do nothing, mk_pmd() does this part. */ - return pmd; + pte_t pte = pfn_pte(page_nr, pgprot); + + return __pmd(pte_val(pte)); } +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #endif /* This one can be done with two shifts. */ @@ -277,8 +281,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) { unsigned long mask, tmp; - /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347) - * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8) + /* SUN4U: 0x630107ffffffec38 (negated == 0x9cfef800000013c7) + * SUN4V: 0x33ffffffffffee07 (negated == 0xcc000000000011f8) * * Even if we use negation tricks the result is still a 6 * instruction sequence, so don't try to play fancy and just @@ -308,15 +312,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | - _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | - _PAGE_SPECIAL), + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | - _PAGE_SPECIAL)); + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_modify(pte, newprot); + + return __pmd(pte_val(pte)); +} +#endif + static inline pte_t pgoff_to_pte(unsigned long off) { off <<= PAGE_SHIFT; @@ -357,7 +372,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) */ #define pgprot_noncached pgprot_noncached -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline pte_t pte_mkhuge(pte_t pte) { unsigned long mask; @@ -375,6 +390,17 @@ static inline pte_t pte_mkhuge(pte_t pte) return __pte(pte_val(pte) | mask); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkhuge(pte); + pte_val(pte) |= _PAGE_PMD_HUGE; + + return __pmd(pte_val(pte)); +} +#endif #endif static inline pte_t pte_mkdirty(pte_t pte) @@ -626,95 +652,136 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } -static inline int pmd_large(pmd_t pmd) +static inline unsigned long pmd_large(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == - (PMD_ISHUGE | PMD_HUGE_PRESENT); + pte_t pte = __pte(pmd_val(pmd)); + + return pte_val(pte) & _PAGE_PMD_HUGE; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_young(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_ACCESSED; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_pfn(pte); } -static inline int pmd_write(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_young(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_WRITE; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_young(pte); } -static inline unsigned long pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_write(pmd_t pmd) { - unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR; + pte_t pte = __pte(pmd_val(pmd)); - return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); + return pte_write(pte); } -static inline int pmd_trans_splitting(pmd_t pmd) +static inline unsigned long pmd_trans_huge(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == - (PMD_ISHUGE|PMD_HUGE_SPLITTING); + pte_t pte = __pte(pmd_val(pmd)); + + return pte_val(pte) & _PAGE_PMD_HUGE; } -static inline int pmd_trans_huge(pmd_t pmd) +static inline unsigned long pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & PMD_ISHUGE; + pte_t pte = __pte(pmd_val(pmd)); + + return pmd_trans_huge(pmd) && pte_special(pte); } #define has_transparent_hugepage() 1 static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkold(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_WRITE; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_wrprotect(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_DIRTY; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkdirty(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkyoung(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_WRITE; - return pmd; -} + pte_t pte = __pte(pmd_val(pmd)); -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - pmd_val(pmd) &= ~PMD_HUGE_PRESENT; - return pmd; + pte = pte_mkwrite(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mksplitting(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_SPLITTING; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkspecial(pte); + + return __pmd(pte_val(pte)); } -extern pgprot_t pmd_pgprot(pmd_t entry); +static inline pgprot_t pmd_pgprot(pmd_t entry) +{ + unsigned long val = pmd_val(entry); + + return __pgprot(val); +} #endif static inline int pmd_present(pmd_t pmd) { - return pmd_val(pmd) != 0U; + return pmd_val(pmd) != 0UL; } #define pmd_none(pmd) (!pmd_val(pmd)) +/* pmd_bad() is only called on non-trans-huge PMDs. Our encoding is + * very simple, it's just the physical address. PTE tables are of + * size PAGE_SIZE so make sure the sub-PAGE_SIZE bits are clear and + * the top bits outside of the range of any physical address size we + * support are clear as well. We also validate the physical itself. + */ +#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK) + +#define pud_none(pud) (!pud_val(pud)) + +#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) + +#define pgd_none(pgd) (!pgd_val(pgd)) + +#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); @@ -728,37 +795,54 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT; + unsigned long val = __pa((unsigned long) (ptep)); pmd_val(*pmdp) = val; } #define pud_set(pudp, pmdp) \ - (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT)) + (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)))) static inline unsigned long __pmd_page(pmd_t pmd) { - unsigned long paddr = (unsigned long) pmd_val(pmd); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_val(pmd) & PMD_ISHUGE) - paddr &= PMD_HUGE_PADDR; -#endif - paddr <<= PMD_PADDR_SHIFT; - return ((unsigned long) __va(paddr)); + pte_t pte = __pte(pmd_val(pmd)); + unsigned long pfn; + + pfn = pte_pfn(pte); + + return ((unsigned long) __va(pfn << PAGE_SHIFT)); } #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page_vaddr(pud) \ - ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT))) + ((unsigned long) __va(pud_val(pud))) #define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) -#define pmd_bad(pmd) (0) -#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U) -#define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (0) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0U) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pgd_page_vaddr(pgd) \ + ((unsigned long) __va(pgd_val(pgd))) +#define pgd_present(pgd) (pgd_val(pgd) != 0U) +#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + +static inline unsigned long pud_large(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_val(pte) & _PAGE_PMD_HUGE; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_pfn(pte); +} /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) +#define pgd_set(pgdp, pudp) \ + (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) + /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) @@ -766,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) +/* Find an entry in the third-level page table.. */ +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgdp, address) \ + ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) + /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page_vaddr(*(pudp)) + \ @@ -789,7 +878,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, pmd_t *pmdp) { pmd_t pmd = *pmdp; - set_pmd_at(mm, addr, pmdp, __pmd(0U)); + set_pmd_at(mm, addr, pmdp, __pmd(0UL)); return pmd; } @@ -837,8 +926,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, }) #endif -extern pgd_t swapper_pg_dir[2048]; -extern pmd_t swapper_low_pmd_dir[2048]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init(void); extern unsigned long find_ecache_flush_span(unsigned long size); @@ -852,6 +940,10 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); +#define __HAVE_ARCH_PMDP_INVALIDATE +extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); + #define __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); @@ -878,18 +970,6 @@ extern unsigned long pte_file(pte_t); extern pte_t pgoff_to_pte(unsigned long); #define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) -extern unsigned long sparc64_valid_addr_bitmap[]; - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -static inline bool kern_addr_valid(unsigned long addr) -{ - unsigned long paddr = __pa(addr); - - if ((paddr >> 41UL) != 0UL) - return false; - return test_bit(paddr >> 22, sparc64_valid_addr_bitmap); -} - extern int page_in_phys_avail(unsigned long paddr); /* diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 5e35e05..acd6146 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -24,6 +24,10 @@ static inline int con_is_present(void) } #endif +#ifdef CONFIG_SPARC64 +extern void __init start_early_boot(void); +#endif + extern void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index b99d4e4..e5e1752 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -3,9 +3,11 @@ #ifdef __KERNEL__ +#include <asm/page.h> + #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS +#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 6b67e50..69424d4 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -45,6 +45,8 @@ #define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_NIAGARA4 0x04 #define SUN4V_CHIP_NIAGARA5 0x05 +#define SUN4V_CHIP_SPARC_M6 0x06 +#define SUN4V_CHIP_SPARC_M7 0x07 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_UNKNOWN 0xff diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index d5e5042..6cda09d 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -63,7 +63,8 @@ struct thread_info { struct pt_regs *kern_una_regs; unsigned int kern_una_insn; - unsigned long fpregs[0] __attribute__ ((aligned(64))); + unsigned long fpregs[(7 * 256) / sizeof(unsigned long)] + __attribute__ ((aligned(64))); }; #endif /* !(__ASSEMBLY__) */ @@ -102,6 +103,7 @@ struct thread_info { #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ +#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ #if PAGE_SHIFT == 13 #define THREAD_SIZE (2*PAGE_SIZE) diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index f0d6a97..1a4bb97 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -35,6 +35,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, { } +void flush_tlb_kernel_range(unsigned long start, unsigned long end); + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE extern void flush_tlb_pending(void); @@ -49,11 +51,6 @@ extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP -#define flush_tlb_kernel_range(start,end) \ -do { flush_tsb_kernel_range(start,end); \ - __flush_tlb_kernel_range(start,end); \ -} while (0) - static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); @@ -64,11 +61,6 @@ static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vad extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); -#define flush_tlb_kernel_range(start, end) \ -do { flush_tsb_kernel_range(start,end); \ - smp_flush_tlb_kernel_range(start, end); \ -} while (0) - #define global_flush_tlb_page(mm, vaddr) \ smp_flush_tlb_page(mm, vaddr) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index e696432..ecb49cf 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -133,107 +133,89 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); - /* Do a kernel page table walk. Leaves physical PTE pointer in - * REG1. Jumps to FAIL_LABEL on early page table walk termination. - * VADDR will not be clobbered, but REG2 will. + /* Do a kernel page table walk. Leaves valid PTE value in + * REG1. Jumps to FAIL_LABEL on early page table walk + * termination. VADDR will not be clobbered, but REG2 will. + * + * There are two masks we must apply to propagate bits from + * the virtual address into the PTE physical address field + * when dealing with huge pages. This is because the page + * table boundaries do not match the huge page size(s) the + * hardware supports. + * + * In these cases we propagate the bits that are below the + * page table level where we saw the huge page mapping, but + * are still within the relevant physical bits for the huge + * page size in question. So for PMD mappings (which fall on + * bit 23, for 8MB per PMD) we must propagate bit 22 for a + * 4MB huge page. For huge PUDs (which fall on bit 33, for + * 8GB per PUD), we have to accomodate 256MB and 2GB huge + * pages. So for those we propagate bits 32 to 28. */ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ sethi %hi(swapper_pg_dir), REG1; \ or REG1, %lo(swapper_pg_dir), REG1; \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduw [REG1 + REG2], REG1; \ + andn REG2, 0x7, REG2; \ + ldx [REG1 + REG2], REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + sethi %hi(0xf8000000), REG2; \ + bne,pt %xcc, 697f; \ + sllx REG2, 1, REG2; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ - add REG1, REG2, REG1; - - /* These macros exists only to make the PMD translator below - * easier to read. It hides the ELF section switch for the - * sun4v code patching. - */ -#define OR_PTE_BIT_1INSN(REG, NAME) \ -661: or REG, _PAGE_##NAME##_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - -#define OR_PTE_BIT_2INSN(REG, TMP, NAME) \ -661: sethi %hi(_PAGE_##NAME##_4U), TMP; \ - or REG, TMP, REG; \ - .section .sun4v_2insn_patch, "ax"; \ - .word 661b; \ - mov -1, TMP; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - - /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ -#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ -661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - sethi %uhi(_PAGE_VALID), REG; \ - .previous; \ - sllx REG, 32, REG; \ -661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \ - .previous; + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pn %xcc, 698f; \ + sethi %hi(0x400000), REG2; \ +697: brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + ba,pt %xcc, 699f; \ + or REG1, REG2, REG1; \ +698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brgez,pn REG1, FAIL_LABEL; \ + nop; \ +699: /* PMD has been loaded into REG1, interpret the value, seeing * if it is a HUGE PMD or a normal one. If it is not valid * then jump to FAIL_LABEL. If it is a HUGE PMD, and it * translates to a valid PTE, branch to PTE_LABEL. * - * We translate the PMD by hand, one bit at a time, - * constructing the huge PTE. - * - * So we construct the PTE in REG2 as follows: - * - * 1) Extract the PMD PFN from REG1 and place it into REG2. - * - * 2) Translate PMD protection bits in REG1 into REG2, one bit - * at a time using andcc tests on REG1 and OR's into REG2. - * - * Only two bits to be concerned with here, EXEC and WRITE. - * Now REG1 is freed up and we can use it as a temporary. - * - * 3) Construct the VALID, CACHE, and page size PTE bits in - * REG1, OR with REG2 to form final PTE. + * We have to propagate the 4MB bit of the virtual address + * because we are fabricating 8MB pages using 4MB hw pages. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ - brz,pn REG1, FAIL_LABEL; \ - andcc REG1, PMD_ISHUGE, %g0; \ - be,pt %xcc, 700f; \ - and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \ - cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \ - bne,pn %xcc, FAIL_LABEL; \ - andn REG1, PMD_HUGE_PROTBITS, REG2; \ - sllx REG2, PMD_PADDR_SHIFT, REG2; \ - /* REG2 now holds PFN << PAGE_SHIFT */ \ - andcc REG1, PMD_HUGE_WRITE, %g0; \ - bne,a,pt %xcc, 1f; \ - OR_PTE_BIT_1INSN(REG2, W); \ -1: andcc REG1, PMD_HUGE_EXEC, %g0; \ - be,pt %xcc, 1f; \ - nop; \ - OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \ - /* REG1 can now be clobbered, build final PTE */ \ -1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ - ba,pt %xcc, PTE_LABEL; \ - or REG1, REG2, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pt %xcc, 700f; \ + sethi %hi(4 * 1024 * 1024), REG2; \ + brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + brlz,pt REG1, PTE_LABEL; \ + or REG1, REG2, REG1; \ 700: #else #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ @@ -253,18 +235,21 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; #define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; \ ldxa [REG1] ASI_PHYS_USE_EC, REG1; \ @@ -306,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 -#define KTSB_PHYS_SHIFT 15 - /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -316,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_tsb), REG1; \ - or REG1, %lo(swapper_tsb), REG1; \ +661: sethi %uhi(swapper_tsb), REG1; \ + sethi %hi(swapper_tsb), REG2; \ + or REG1, %ulo(swapper_tsb), REG1; \ + or REG2, %lo(swapper_tsb), REG2; \ .section .swapper_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ @@ -341,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_4m_tsb), REG1; \ - or REG1, %lo(swapper_4m_tsb), REG1; \ +661: sethi %uhi(swapper_4m_tsb), REG1; \ + sethi %hi(swapper_4m_tsb), REG2; \ + or REG1, %ulo(swapper_4m_tsb), REG1; \ + or REG2, %lo(swapper_4m_tsb), REG2; \ .section .swapper_4m_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index e562d3c..ad7e178 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -262,8 +262,8 @@ extern unsigned long __must_check __clear_user(void __user *, unsigned long); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#define __copy_to_user_inatomic ___copy_to_user -#define __copy_from_user_inatomic ___copy_from_user +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user struct pt_regs; extern unsigned long compute_effective_address(struct pt_regs *, diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 432afa8..55841c1 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -118,12 +118,18 @@ struct vio_disk_attr_info { u8 vdisk_type; #define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ #define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ - u16 resv1; + u8 vdisk_mtype; /* v1.1 */ +#define VD_MEDIA_TYPE_FIXED 0x01 /* Fixed device */ +#define VD_MEDIA_TYPE_CD 0x02 /* CD Device */ +#define VD_MEDIA_TYPE_DVD 0x03 /* DVD Device */ + u8 resv1; u32 vdisk_block_size; u64 operations; - u64 vdisk_size; + u64 vdisk_size; /* v1.1 */ u64 max_xfer_size; - u64 resv2[2]; + u32 phys_block_size; /* v1.2 */ + u32 resv2; + u64 resv3[1]; }; struct vio_disk_desc { @@ -259,7 +265,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, unsigned int ring_size) { return (dr->pending - - ((dr->prod - dr->cons) & (ring_size - 1))); + ((dr->prod - dr->cons) & (ring_size - 1)) - 1); } #define VIO_MAX_TYPE_LEN 32 diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 39ca301..11fdf0e 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -39,6 +39,14 @@ 297: wr %o5, FPRS_FEF, %fprs; \ 298: +#define VISEntryHalfFast(fail_label) \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,pt %icc, 297f; \ + nop; \ + ba,a,pt %xcc, fail_label; \ +297: wr %o5, FPRS_FEF, %fprs; + #define VISExitHalf \ wr %o5, 0, %fprs; diff --git a/arch/sparc/include/uapi/asm/swab.h b/arch/sparc/include/uapi/asm/swab.h index a34ad07..4c7c12d 100644 --- a/arch/sparc/include/uapi/asm/swab.h +++ b/arch/sparc/include/uapi/asm/swab.h @@ -9,9 +9,9 @@ static inline __u16 __arch_swab16p(const __u16 *addr) { __u16 ret; - __asm__ __volatile__ ("lduha [%1] %2, %0" + __asm__ __volatile__ ("lduha [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab16p __arch_swab16p @@ -20,9 +20,9 @@ static inline __u32 __arch_swab32p(const __u32 *addr) { __u32 ret; - __asm__ __volatile__ ("lduwa [%1] %2, %0" + __asm__ __volatile__ ("lduwa [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab32p __arch_swab32p @@ -31,9 +31,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr) { __u64 ret; - __asm__ __volatile__ ("ldxa [%1] %2, %0" + __asm__ __volatile__ ("ldxa [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab64p __arch_swab64p diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 5c51258..52e10de 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -493,6 +493,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara5"; break; + case SUN4V_CHIP_SPARC_M6: + sparc_cpu_type = "SPARC-M6"; + sparc_fpu_type = "SPARC-M6 integrated FPU"; + sparc_pmu_type = "sparc-m6"; + break; + + case SUN4V_CHIP_SPARC_M7: + sparc_cpu_type = "SPARC-M7"; + sparc_fpu_type = "SPARC-M7 integrated FPU"; + sparc_pmu_type = "sparc-m7"; + break; + case SUN4V_CHIP_SPARC64X: sparc_cpu_type = "SPARC64-X"; sparc_fpu_type = "SPARC64-X integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index cb5d272..b031c9c 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -327,6 +327,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA3: case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC_M6: + case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; break; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index dff60ab..f87a55d 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) ds_cfg.tx_irq = vdev->tx_irq; ds_cfg.rx_irq = vdev->rx_irq; - lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out_free_ds_states; } dp->lp = lp; - err = ldc_bind(lp, "DS"); + err = ldc_bind(lp); if (err) goto out_free_ldc; diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S index b2c2c5b..d668ca1 100644 --- a/arch/sparc/kernel/dtlb_prot.S +++ b/arch/sparc/kernel/dtlb_prot.S @@ -24,11 +24,11 @@ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ + ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup - ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 - ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 - nop + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + nop nop nop nop diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 9c179fb..3ad726c 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -66,13 +66,10 @@ struct pause_patch_entry { extern struct pause_patch_entry __pause_3insn_patch, __pause_3insn_patch_end; -extern void __init per_cpu_patch(void); extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); -extern void __init sun4v_patch(void); -extern void __init boot_cpu_id_too_large(int cpu); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 26b706a..3d61fca 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -282,8 +282,8 @@ sun4v_chip_type: stx %l2, [%l4 + 0x0] ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low /* 4MB align */ - srlx %l3, 22, %l3 - sllx %l3, 22, %l3 + srlx %l3, ILOG2_4MB, %l3 + sllx %l3, ILOG2_4MB, %l3 stx %l3, [%l4 + 0x8] /* Leave service as-is, "call-method" */ @@ -427,6 +427,12 @@ sun4v_chip_type: cmp %g2, '5' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 + cmp %g2, '6' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M6, %g4 + cmp %g2, '7' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M7, %g4 ba,pt %xcc, 49f nop @@ -585,6 +591,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA5 be,pt %xcc, niagara4_patch nop + cmp %g1, SUN4V_CHIP_SPARC_M6 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop call generic_patch_copyops nop @@ -660,14 +672,12 @@ tlb_fixup_done: sethi %hi(init_thread_union), %g6 or %g6, %lo(init_thread_union), %g6 ldx [%g6 + TI_TASK], %g4 - mov %sp, %l6 wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 add %g6, %g1, %sp - mov 0, %fp /* Set per-cpu pointer initially to zero, this makes * the boot-cpu use the in-kernel-image per-cpu areas @@ -694,44 +704,14 @@ tlb_fixup_done: nop #endif - mov %l6, %o1 ! OpenPROM stack call prom_init mov %l7, %o0 ! OpenPROM cif handler - /* Initialize current_thread_info()->cpu as early as possible. - * In order to do that accurately we have to patch up the get_cpuid() - * assembler sequences. And that, in turn, requires that we know - * if we are on a Starfire box or not. While we're here, patch up - * the sun4v sequences as well. + /* To create a one-register-window buffer between the kernel's + * initial stack and the last stack frame we use from the firmware, + * do the rest of the boot from a C helper function. */ - call check_if_starfire - nop - call per_cpu_patch - nop - call sun4v_patch - nop - -#ifdef CONFIG_SMP - call hard_smp_processor_id - nop - cmp %o0, NR_CPUS - blu,pt %xcc, 1f - nop - call boot_cpu_id_too_large - nop - /* Not reached... */ - -1: -#else - mov 0, %o0 -#endif - sth %o0, [%g6 + TI_CPU] - - call prom_init_report - nop - - /* Off we go.... */ - call start_kernel + call start_early_boot nop /* Not reached... */ diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index c0a2de0..5c55145 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -46,6 +46,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_KT_CPU, }, { .group = HV_GRP_VT_CPU, }, + { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index f3ab509..caedf83 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg) retl nop ENDPROC(sun4v_vt_set_perfreg) + +ENTRY(sun4v_t5_get_perfreg) + mov %o1, %o4 + mov HV_FAST_T5_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_t5_get_perfreg) + +ENTRY(sun4v_t5_set_perfreg) + mov HV_FAST_T5_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_t5_set_perfreg) diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S index 4eb1a5a..4ad8138 100644 --- a/arch/sparc/kernel/hvtramp.S +++ b/arch/sparc/kernel/hvtramp.S @@ -110,7 +110,6 @@ hv_cpu_startup: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp call init_irqwork_curcpu nop diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 2096468..6cacf2d 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0) + va = __get_free_pages(gfp, order); + if (va == 0) goto err_nopages; if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) @@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - va = (void *) __get_free_pages(GFP_KERNEL, order); + va = (void *) __get_free_pages(gfp, order); if (va == NULL) { printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); goto err_nopages; diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index d74fa7f..6598b71 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -47,8 +47,6 @@ #include "cpumap.h" #include "kstack.h" -#define NUM_IVECS (IMAP_INR + 1) - struct ino_bucket *ivector_table; unsigned long ivector_table_pa; @@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq) #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) -static struct { - unsigned int dev_handle; - unsigned int dev_ino; - unsigned int in_use; -} irq_table[NR_IRQS]; -static DEFINE_SPINLOCK(irq_alloc_lock); +static unsigned long hvirq_major __initdata; +static int __init early_hvirq_major(char *p) +{ + int rc = kstrtoul(p, 10, &hvirq_major); + + return rc; +} +early_param("hvirq", early_hvirq_major); -unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +static int hv_irq_version; + +/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie + * based interfaces, but: + * + * 1) Several OSs, Solaris and Linux included, use them even when only + * negotiating version 1.0 (or failing to negotiate at all). So the + * hypervisor has a workaround that provides the VIRQ interfaces even + * when only verion 1.0 of the API is in use. + * + * 2) Second, and more importantly, with major version 2.0 these VIRQ + * interfaces only were actually hooked up for LDC interrupts, even + * though the Hypervisor specification clearly stated: + * + * The new interrupt API functions will be available to a guest + * when it negotiates version 2.0 in the interrupt API group 0x2. When + * a guest negotiates version 2.0, all interrupt sources will only + * support using the cookie interface, and any attempt to use the + * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the + * ENOTSUPPORTED error being returned. + * + * with an emphasis on "all interrupt sources". + * + * To correct this, major version 3.0 was created which does actually + * support VIRQs for all interrupt sources (not just LDC devices). So + * if we want to move completely over the cookie based VIRQs we must + * negotiate major version 3.0 or later of HV_GRP_INTR. + */ +static bool sun4v_cookie_only_virqs(void) { - unsigned long flags; - unsigned char ent; + if (hv_irq_version >= 3) + return true; + return false; +} - BUILD_BUG_ON(NR_IRQS >= 256); +static void __init irq_init_hv(void) +{ + unsigned long hv_error, major, minor = 0; + + if (tlb_type != hypervisor) + return; + + if (hvirq_major) + major = hvirq_major; + else + major = 3; - spin_lock_irqsave(&irq_alloc_lock, flags); + hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor); + if (!hv_error) + hv_irq_version = major; + else + hv_irq_version = 1; - for (ent = 1; ent < NR_IRQS; ent++) { - if (!irq_table[ent].in_use) + pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n", + hv_irq_version, + sun4v_cookie_only_virqs() ? "enabled" : "disabled"); +} + +/* This function is for the timer interrupt.*/ +int __init arch_probe_nr_irqs(void) +{ + return 1; +} + +#define DEFAULT_NUM_IVECS (0xfffU) +static unsigned int nr_ivec = DEFAULT_NUM_IVECS; +#define NUM_IVECS (nr_ivec) + +static unsigned int __init size_nr_ivec(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + /* Athena's devhandle|devino is large.*/ + case SUN4V_CHIP_SPARC64X: + nr_ivec = 0xffff; break; + } } - if (ent >= NR_IRQS) { - printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); - ent = 0; - } else { - irq_table[ent].dev_handle = dev_handle; - irq_table[ent].dev_ino = dev_ino; - irq_table[ent].in_use = 1; - } + return nr_ivec; +} + +struct irq_handler_data { + union { + struct { + unsigned int dev_handle; + unsigned int dev_ino; + }; + unsigned long sysino; + }; + struct ino_bucket bucket; + unsigned long iclr; + unsigned long imap; +}; + +static inline unsigned int irq_data_to_handle(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_handle; +} + +static inline unsigned int irq_data_to_ino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_ino; +} - spin_unlock_irqrestore(&irq_alloc_lock, flags); +static inline unsigned long irq_data_to_sysino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - return ent; + return ihd->sysino; } -#ifdef CONFIG_PCI_MSI void irq_free(unsigned int irq) { - unsigned long flags; + void *data = irq_get_handler_data(irq); - if (irq >= NR_IRQS) - return; + kfree(data); + irq_set_handler_data(irq, NULL); + irq_free_descs(irq, 1); +} - spin_lock_irqsave(&irq_alloc_lock, flags); +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +{ + int irq; - irq_table[irq].in_use = 0; + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + if (irq <= 0) + goto out; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return irq; +out: + return 0; +} + +static unsigned int cookie_exists(u32 devhandle, unsigned int devino) +{ + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + unsigned int irq = 0U; + + hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie); + if (hv_err) { + pr_err("HV get cookie failed hv_err = %ld\n", hv_err); + goto out; + } + + if (cookie & ((1UL << 63UL))) { + cookie = ~cookie; + bucket = (struct ino_bucket *) __va(cookie); + irq = bucket->__irq; + } +out: + return irq; +} + +static unsigned int sysino_exists(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + struct ino_bucket *bucket; + unsigned int irq; + + bucket = &ivector_table[sysino]; + irq = bucket_get_irq(__pa(bucket)); + + return irq; +} + +void ack_bad_irq(unsigned int irq) +{ + pr_crit("BAD IRQ ack %d\n", irq); +} + +void irq_install_pre_handler(int irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + pr_warn("IRQ pre handler NOT supported.\n"); } -#endif /* * /proc/interrupts printing: @@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) return tid; } -struct irq_handler_data { - unsigned long iclr; - unsigned long imap; - - void (*pre_handler)(unsigned int, void *, void *); - void *arg1; - void *arg2; -}; - #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) { @@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data) static void sun4v_irq_enable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data) static int sun4v_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, mask); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data, static void sun4v_irq_disable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); @@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data) static void sun4v_irq_eoi(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); @@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data) static void sun4v_virq_enable(struct irq_data *data) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, data->affinity); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data) static int sun4v_virt_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, mask); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data, static void sun4v_virq_disable(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_DISABLED); @@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data) static void sun4v_virq_eoi(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) @@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = { .flags = IRQCHIP_EOI_IF_HANDLED, }; -static void pre_flow_handler(struct irq_data *d) -{ - struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d); - unsigned int ino = irq_table[d->irq].dev_ino; - - handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2); -} - -void irq_install_pre_handler(int irq, - void (*func)(unsigned int, void *, void *), - void *arg1, void *arg2) -{ - struct irq_handler_data *handler_data = irq_get_handler_data(irq); - - handler_data->pre_handler = func; - handler_data->arg1 = arg1; - handler_data->arg2 = arg2; - - __irq_set_preflow_handler(irq, pre_flow_handler); -} - unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) { - struct ino_bucket *bucket; struct irq_handler_data *handler_data; + struct ino_bucket *bucket; unsigned int irq; int ino; @@ -537,119 +641,166 @@ out: return irq; } -static unsigned int sun4v_build_common(unsigned long sysino, - struct irq_chip *chip) +static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino, + void (*handler_data_init)(struct irq_handler_data *data, + u32 devhandle, unsigned int devino), + struct irq_chip *chip) { - struct ino_bucket *bucket; - struct irq_handler_data *handler_data; + struct irq_handler_data *data; unsigned int irq; - BUG_ON(tlb_type != hypervisor); + irq = irq_alloc(devhandle, devino); + if (!irq) + goto out; - bucket = &ivector_table[sysino]; - irq = bucket_get_irq(__pa(bucket)); - if (!irq) { - irq = irq_alloc(0, sysino); - bucket_set_irq(__pa(bucket), irq); - irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, - "IVEC"); + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + pr_err("IRQ handler data allocation failed.\n"); + irq_free(irq); + irq = 0; + goto out; } - handler_data = irq_get_handler_data(irq); - if (unlikely(handler_data)) - goto out; + irq_set_handler_data(irq, data); + handler_data_init(data, devhandle, devino); + irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC"); + data->imap = ~0UL; + data->iclr = ~0UL; +out: + return irq; +} - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) { - prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); - prom_halt(); - } - irq_set_handler_data(irq, handler_data); +static unsigned long cookie_assign(unsigned int irq, u32 devhandle, + unsigned int devino) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + unsigned long hv_error, cookie; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. + /* handler_irq needs to find the irq. cookie is seen signed in + * sun4v_dev_mondo and treated as a non ivector_table delivery. */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + ihd->bucket.__irq = irq; + cookie = ~__pa(&ihd->bucket); -out: - return irq; + hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_error) + pr_err("HV vintr set cookie failed = %ld\n", hv_error); + + return hv_error; } -unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +static void cookie_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) { - unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + data->dev_handle = devhandle; + data->dev_ino = devino; +} + +static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned long hv_error; + unsigned int irq; + + irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip); + + hv_error = cookie_assign(irq, devhandle, devino); + if (hv_error) { + irq_free(irq); + irq = 0; + } - return sun4v_build_common(sysino, &sun4v_irq); + return irq; } -unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino) { - struct irq_handler_data *handler_data; - unsigned long hv_err, cookie; - struct ino_bucket *bucket; unsigned int irq; - bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); - if (unlikely(!bucket)) - return 0; + irq = cookie_exists(devhandle, devino); + if (irq) + goto out; - /* The only reference we store to the IRQ bucket is - * by physical address which kmemleak can't see, tell - * it that this object explicitly is not a leak and - * should be scanned. - */ - kmemleak_not_leak(bucket); + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); - __flush_dcache_range((unsigned long) bucket, - ((unsigned long) bucket + - sizeof(struct ino_bucket))); +out: + return irq; +} - irq = irq_alloc(devhandle, devino); +static void sysino_set_bucket(unsigned int irq) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino); + BUG_ON(sysino >= nr_ivec); + bucket = &ivector_table[sysino]; bucket_set_irq(__pa(bucket), irq); +} - irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq, - "IVEC"); +static void sysino_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) +{ + unsigned long sysino; - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) - return 0; + sysino = sun4v_devino_to_sysino(devhandle, devino); + data->sysino = sysino; +} - /* In order to make the LDC channel startup sequence easier, - * especially wrt. locking, we do not let request_irq() enable - * the interrupt. - */ - irq_set_status_flags(irq, IRQ_NOAUTOEN); - irq_set_handler_data(irq, handler_data); +static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned int irq; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. - */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip); + if (!irq) + goto out; - cookie = ~__pa(bucket); - hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); - if (hv_err) { - prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " - "err=%lu\n", devhandle, devino, hv_err); - prom_halt(); - } + sysino_set_bucket(irq); +out: + return irq; +} +static int sun4v_build_sysino(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = sysino_exists(devhandle, devino); + if (irq) + goto out; + + irq = sysino_build_irq(devhandle, devino, &sun4v_irq); +out: return irq; } -void ack_bad_irq(unsigned int irq) +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) { - unsigned int ino = irq_table[irq].dev_ino; + unsigned int irq; - if (!ino) - ino = 0xdeadbeef; + if (sun4v_cookie_only_virqs()) + irq = sun4v_build_cookie(devhandle, devino); + else + irq = sun4v_build_sysino(devhandle, devino); - printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n", - ino, irq); + return irq; +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); + if (!irq) + goto out; + + /* This is borrowed from the original function. + */ + irq_set_status_flags(irq, IRQ_NOAUTOEN); + +out: + return irq; } void *hardirq_stack[NR_CPUS]; @@ -733,9 +884,12 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_data *data; unsigned long flags; + if (!desc) + continue; + data = irq_desc_get_irq_data(desc); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && !irqd_is_per_cpu(data)) { if (data->chip->irq_set_affinity) @@ -935,16 +1089,22 @@ static struct irqaction timer_irq_action = { .name = "timer", }; -/* Only invoked on boot processor. */ -void __init init_IRQ(void) +static void __init irq_ivector_init(void) { - unsigned long size; + unsigned long size, order; + unsigned int ivecs; - map_prom_timers(); - kill_prom_timer(); + /* If we are doing cookie only VIRQs then we do not need the ivector + * table to process interrupts. + */ + if (sun4v_cookie_only_virqs()) + return; - size = sizeof(struct ino_bucket) * NUM_IVECS; - ivector_table = kzalloc(size, GFP_KERNEL); + ivecs = size_nr_ivec(); + size = sizeof(struct ino_bucket) * ivecs; + order = get_order(size); + ivector_table = (struct ino_bucket *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!ivector_table) { prom_printf("Fatal error, cannot allocate ivector_table\n"); prom_halt(); @@ -953,6 +1113,15 @@ void __init init_IRQ(void) ((unsigned long) ivector_table) + size); ivector_table_pa = __pa(ivector_table); +} + +/* Only invoked on boot processor.*/ +void __init init_IRQ(void) +{ + irq_init_hv(); + irq_ivector_init(); + map_prom_timers(); + kill_prom_timer(); if (tlb_type == hypervisor) sun4v_init_mondo_queues(); diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index fde5a41..ef0d8e9 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_itlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -118,6 +110,12 @@ kvmap_dtlb_obp: ba,pt %xcc, kvmap_dtlb_load nop +kvmap_linear_early: + sethi %hi(kern_linear_pte_xor), %g7 + ldx [%g7 + %lo(kern_linear_pte_xor)], %g2 + ba,pt %xcc, kvmap_dtlb_tsb4m_load + xor %g2, %g4, %g5 + .align 32 kvmap_dtlb_tsb4m_load: TSB_LOCK_TAG(%g1, %g2, %g7) @@ -146,85 +144,17 @@ kvmap_dtlb_4v: /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) #endif - /* TSB entry address left in %g1, lookup linear PTE. - * Must preserve %g1 and %g6 (TAG). - */ -kvmap_dtlb_tsb4m_miss: - /* Clear the PAGE_OFFSET top virtual bits, shift - * down to get PFN, and make sure PFN is in range. - */ - sllx %g4, 21, %g5 - - /* Check to see if we know about valid memory at the 4MB - * chunk this physical address will reside within. - */ - srlx %g5, 21 + 41, %g2 - brnz,pn %g2, kvmap_dtlb_longpath - nop - - /* This unconditional branch and delay-slot nop gets patched - * by the sethi sequence once the bitmap is properly setup. + /* Linear mapping TSB lookup failed. Fallthrough to kernel + * page table based lookup. */ - .globl valid_addr_bitmap_insn -valid_addr_bitmap_insn: - ba,pt %xcc, 2f - nop - .subsection 2 - .globl valid_addr_bitmap_patch -valid_addr_bitmap_patch: - sethi %hi(sparc64_valid_addr_bitmap), %g7 - or %g7, %lo(sparc64_valid_addr_bitmap), %g7 - .previous - - srlx %g5, 21 + 22, %g2 - srlx %g2, 6, %g5 - and %g2, 63, %g2 - sllx %g5, 3, %g5 - ldx [%g7 + %g5], %g5 - mov 1, %g7 - sllx %g7, %g2, %g7 - andcc %g5, %g7, %g0 - be,pn %xcc, kvmap_dtlb_longpath - -2: sethi %hi(kpte_linear_bitmap), %g2 - - /* Get the 256MB physical address index. */ - sllx %g4, 21, %g5 - or %g2, %lo(kpte_linear_bitmap), %g2 - srlx %g5, 21 + 28, %g5 - and %g5, (32 - 1), %g7 - - /* Divide by 32 to get the offset into the bitmask. */ - srlx %g5, 5, %g5 - add %g7, %g7, %g7 - sllx %g5, 3, %g5 - - /* kern_linear_pte_xor[(mask >> shift) & 3)] */ - ldx [%g2 + %g5], %g2 - srlx %g2, %g7, %g7 - sethi %hi(kern_linear_pte_xor), %g5 - and %g7, 3, %g7 - or %g5, %lo(kern_linear_pte_xor), %g5 - sllx %g7, 3, %g7 - ldx [%g5 + %g7], %g2 - .globl kvmap_linear_patch kvmap_linear_patch: - ba,pt %xcc, kvmap_dtlb_tsb4m_load - xor %g2, %g4, %g5 + ba,a,pt %xcc, kvmap_linear_early kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_dtlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -256,13 +186,8 @@ kvmap_dtlb_load: #ifdef CONFIG_SPARSEMEM_VMEMMAP kvmap_vmemmap: - sub %g4, %g5, %g5 - srlx %g5, 22, %g5 - sethi %hi(vmemmap_table), %g1 - sllx %g5, 3, %g5 - or %g1, %lo(vmemmap_table), %g1 - ba,pt %xcc, kvmap_dtlb_load - ldx [%g1 + %g5], %g5 + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + ba,a,pt %xcc, kvmap_dtlb_load #endif kvmap_dtlb_nonlinear: @@ -274,8 +199,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMEMMAP_BASE), %g5 + ldx [%g5 + %lo(VMEMMAP_BASE)], %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -287,8 +212,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMALLOC_END), %g5 + ldx [%g5 + %lo(VMALLOC_END)], %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index e01d75d..27bb554 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp) struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg) + void *event_arg, + const char *name) { struct ldc_channel *lp; const struct ldc_mode_ops *mops; @@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id, err = -EINVAL; if (!cfgp) goto out_err; + if (!name) + goto out_err; switch (cfgp->mode) { case LDC_MODE_RAW: @@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id, INIT_HLIST_HEAD(&lp->mh_list); + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, + lp->rx_irq_name, lp); + if (err) + goto out_free_txq; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, + lp->tx_irq_name, lp); + if (err) { + free_irq(lp->cfg.rx_irq, lp); + goto out_free_txq; + } + return lp; out_free_txq: @@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free); * state. This does not initiate a handshake, ldc_connect() does * that. */ -int ldc_bind(struct ldc_channel *lp, const char *name) +int ldc_bind(struct ldc_channel *lp) { unsigned long hv_err, flags; int err = -EINVAL; - if (!name || - (lp->state != LDC_STATE_INIT)) + if (lp->state != LDC_STATE_INIT) return -EINVAL; - snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); - snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - - err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, - lp->rx_irq_name, lp); - if (err) - return err; - - err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, - lp->tx_irq_name, lp); - if (err) { - free_irq(lp->cfg.rx_irq, lp); - return err; - } - - spin_lock_irqsave(&lp->lock, flags); enable_irq(lp->cfg.rx_irq); @@ -1336,7 +1337,7 @@ int ldc_connect(struct ldc_channel *lp) if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) || lp->hs_state != LDC_HS_OPEN) - err = -EINVAL; + err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL); else err = start_handshake(lp); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 6479256..fce8ab1 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -141,7 +141,6 @@ static inline unsigned int get_nmi_count(int cpu) static __init void nmi_cpu_busy(void *data) { - local_irq_enable_in_hardirq(); while (endflag == 0) mb(); } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index bc4d3f5..cb02145 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -398,8 +398,8 @@ static void apb_fake_ranges(struct pci_dev *dev, apb_calc_first_last(map, &first, &last); res = bus->resource[1]; res->flags = IORESOURCE_MEM; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); + region.start = (first << 29); + region.end = (last << 29) + ((1 << 29) - 1); pcibios_bus_to_resource(dev, res, ®ion); } diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index 8f76f23..f9c6813 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -581,7 +581,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) { unsigned long csr_reg, csr, csr_error_bits; irqreturn_t ret = IRQ_NONE; - u16 stat; + u32 stat; csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL; csr = upa_readq(csr_reg); @@ -617,7 +617,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) pbm->name); ret = IRQ_HANDLED; } - pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat); + pbm->pci_ops->read(pbm->pci_bus, 0, PCI_STATUS, 2, &stat); if (stat & (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | PCI_STATUS_REC_TARGET_ABORT | @@ -625,7 +625,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) PCI_STATUS_SIG_SYSTEM_ERROR)) { printk("%s: PCI bus error, PCI_STATUS[%04x]\n", pbm->name, stat); - pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff); + pbm->pci_ops->write(pbm->pci_bus, 0, PCI_STATUS, 2, 0xffff); ret = IRQ_HANDLED; } return ret; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index dbb51a6..927d9c5 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -193,12 +193,41 @@ static const struct pcr_ops n4_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 n5_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_t5_get_perfreg(reg_num, &val); + + return val; +} + +static void n5_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_t5_set_perfreg(reg_num, val); +} + +static const struct pcr_ops n5_pcr_ops = { + .read_pcr = n5_pcr_read, + .write_pcr = n5_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; + + static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; static unsigned long perf_hsvc_minor; static int __init register_perf_hsvc(void) { + unsigned long hverror; + if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: @@ -217,6 +246,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_VT_CPU; break; + case SUN4V_CHIP_NIAGARA5: + perf_hsvc_group = HV_GRP_T5_CPU; + break; + default: return -ENODEV; } @@ -224,10 +257,12 @@ static int __init register_perf_hsvc(void) perf_hsvc_major = 1; perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register hvapi.\n"); + hverror = sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor); + if (hverror) { + pr_err("perfmon: Could not register hvapi(0x%lx).\n", + hverror); return -ENODEV; } } @@ -256,6 +291,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n4_pcr_ops; break; + case SUN4V_CHIP_NIAGARA5: + pcr_ops = &n5_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index b5c38fa..617b9fe 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } - if (!strcmp(sparc_pmu_type, "niagara4")) { + if (!strcmp(sparc_pmu_type, "niagara4") || + !strcmp(sparc_pmu_type, "niagara5")) { sparc_pmu = &niagara4_pmu; return true; } @@ -1671,9 +1672,12 @@ static bool __init supported_pmu(void) int __init init_hw_perf_events(void) { + int err; + pr_info("Performance events: "); - if (!supported_pmu()) { + err = pcr_arch_init(); + if (err || !supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); return 0; } @@ -1685,7 +1689,7 @@ int __init init_hw_perf_events(void) return 0; } -early_initcall(init_hw_perf_events); +pure_initcall(init_hw_perf_events); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index b9cc976..fa49b80 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -305,6 +305,9 @@ static void __global_pmu_self(int this_cpu) struct global_pmu_snapshot *pp; int i, num; + if (!pcr_ops) + return; + pp = &global_cpu_snapshot[this_cpu].pmu; num = 1; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 4306d44..8ec28cc 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -30,6 +30,7 @@ #include <linux/cpu.h> #include <linux/initrd.h> #include <linux/module.h> +#include <linux/start_kernel.h> #include <asm/io.h> #include <asm/processor.h> @@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void __init per_cpu_patch(void) +static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; unsigned long ver; @@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } -void __init sun4v_patch(void) +static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -335,14 +336,25 @@ static void __init pause_patch(void) } } -#ifdef CONFIG_SMP -void __init boot_cpu_id_too_large(int cpu) +void __init start_early_boot(void) { - prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", - cpu, NR_CPUS); - prom_halt(); + int cpu; + + check_if_starfire(); + per_cpu_patch(); + sun4v_patch(); + + cpu = hard_smp_processor_id(); + if (cpu >= NR_CPUS) { + prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", + cpu, NR_CPUS); + prom_halt(); + } + current_thread_info()->cpu = cpu; + + prom_init_report(); + start_kernel(); } -#endif /* On Ultra, we support all of the v8 capabilities. */ unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | @@ -500,12 +512,16 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; } @@ -533,6 +549,8 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | @@ -540,6 +558,8 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8c68424..3f5c129 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -150,7 +150,7 @@ void cpu_panic(void) #define NUM_ROUNDS 64 /* magic value */ #define NUM_ITERS 5 /* likewise */ -static DEFINE_SPINLOCK(itc_sync_lock); +static DEFINE_RAW_SPINLOCK(itc_sync_lock); static unsigned long go[SLAVE + 1]; #define DEBUG_TICK_SYNC 0 @@ -258,7 +258,7 @@ static void smp_synchronize_one_tick(int cpu) go[MASTER] = 0; membar_safe("#StoreLoad"); - spin_lock_irqsave(&itc_sync_lock, flags); + raw_spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) @@ -269,7 +269,7 @@ static void smp_synchronize_one_tick(int cpu) membar_safe("#StoreLoad"); } } - spin_unlock_irqrestore(&itc_sync_lock, flags); + raw_spin_unlock_irqrestore(&itc_sync_lock, flags); } #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) @@ -822,13 +822,17 @@ void arch_send_call_function_single_ipi(int cpu) void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_interrupt(); + irq_exit(); } void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_single_interrupt(); + irq_exit(); } static void tsb_sync(void *info) @@ -1394,7 +1398,6 @@ void __cpu_die(unsigned int cpu) void __init smp_cpus_done(unsigned int max_cpus) { - pcr_arch_init(); } void smp_send_reschedule(int cpu) @@ -1474,6 +1477,13 @@ static void __init pcpu_populate_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); + } + pud = pud_offset(pgd, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index bde867f..6179e19 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -182,7 +182,7 @@ sun4v_tsb_miss_common: cmp %g5, -1 be,pt %xcc, 80f nop - COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7) + COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7) /* That clobbered %g2, reload it. */ ldxa [%g0] ASI_SCRATCHPAD, %g2 @@ -195,6 +195,11 @@ sun4v_tsb_miss_common: ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 sun4v_itlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 + sethi %hi(sun4v_err_itlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] sethi %hi(sun4v_err_itlb_ctx), %g1 @@ -206,15 +211,10 @@ sun4v_itlb_error: sethi %hi(sun4v_err_itlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_itlb_error_report add %sp, PTREGS_OFF, %o0 @@ -222,6 +222,11 @@ sun4v_itlb_error: /* NOTREACHED */ sun4v_dtlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 + sethi %hi(sun4v_err_dtlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] sethi %hi(sun4v_err_dtlb_ctx), %g1 @@ -233,21 +238,23 @@ sun4v_dtlb_error: sethi %hi(sun4v_err_dtlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_dtlb_error_report add %sp, PTREGS_OFF, %o0 /* NOTREACHED */ +sun4v_bad_ra: + or %g0, %g4, %g5 + ba,pt %xcc, sparc64_realfault_common + or %g1, %g0, %g4 + + /* NOTREACHED */ + /* Instruction Access Exception, tl0. */ sun4v_iacc: ldxa [%g0] ASI_SCRATCHPAD, %g2 diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index f7c72b6..d066eb1 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -44,7 +44,7 @@ SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1) SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) SIGN1(sys32_select, compat_sys_select, %o0) -SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) +SIGN1(sys32_futex, compat_sys_futex, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 51561b8..d05eb9c 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -39,9 +39,6 @@ asmlinkage unsigned long sys_getpagesize(void) return PAGE_SIZE; } -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) - /* Does addr --> addr+len fall within 4GB of the VA-space hole or * overflow past the end of the 64-bit address space? */ diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S index ad4bde3..092a39d 100644 --- a/arch/sparc/kernel/trampoline_64.S +++ b/arch/sparc/kernel/trampoline_64.S @@ -110,10 +110,13 @@ startup_continue: brnz,pn %g1, 1b nop - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x10], %l2 - add %l2, -(192 + 128), %sp + /* Get onto temporary stack which will be in the locked + * kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp flushw /* Setup the loop variables: @@ -395,7 +398,6 @@ after_lock_tlb: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index b3f833a..1a33850 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2092,6 +2092,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) atomic_inc(&sun4v_nonresum_oflow_cnt); } +static void sun4v_tlb_error(struct pt_regs *regs) +{ + die_if_kernel("TLB/TSB error", regs); +} + unsigned long sun4v_err_itlb_vaddr; unsigned long sun4v_err_itlb_ctx; unsigned long sun4v_err_itlb_pte; @@ -2099,8 +2104,7 @@ unsigned long sun4v_err_itlb_error; void sun4v_itlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2113,7 +2117,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, sun4v_err_itlb_pte, sun4v_err_itlb_error); - prom_halt(); + sun4v_tlb_error(regs); } unsigned long sun4v_err_dtlb_vaddr; @@ -2123,8 +2127,7 @@ unsigned long sun4v_err_dtlb_error; void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2137,7 +2140,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, sun4v_err_dtlb_pte, sun4v_err_dtlb_error); - prom_halt(); + sun4v_tlb_error(regs); } void hypervisor_tlbop_error(unsigned long err, unsigned long op) diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index a313e4a..be98685 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -75,7 +75,7 @@ tsb_miss_page_table_walk: mov 512, %g7 andn %g5, 0x7, %g5 sllx %g7, %g6, %g7 - srlx %g4, HPAGE_SHIFT, %g6 + srlx %g4, REAL_HPAGE_SHIFT, %g6 sub %g7, 1, %g7 and %g6, %g7, %g6 sllx %g6, 4, %g6 @@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath: nop .previous - rdpr %tl, %g3 - cmp %g3, 1 + rdpr %tl, %g7 + cmp %g7, 1 bne,pn %xcc, winfix_trampoline - nop + mov %g3, %g4 ba,pt %xcc, etrap rd %pc, %g7 call hugetlb_setup diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 8201c25e..4db8898 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -163,17 +163,23 @@ static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) unsigned long compute_effective_address(struct pt_regs *regs, unsigned int insn, unsigned int rd) { + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; unsigned int rs1 = (insn >> 14) & 0x1f; unsigned int rs2 = insn & 0x1f; - int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + unsigned long addr; if (insn & 0x2000) { maybe_flush_windows(rs1, 0, rd, from_kernel); - return (fetch_reg(rs1, regs) + sign_extend_imm13(insn)); + addr = (fetch_reg(rs1, regs) + sign_extend_imm13(insn)); } else { maybe_flush_windows(rs1, rs2, rd, from_kernel); - return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs)); + addr = (fetch_reg(rs1, regs) + fetch_reg(rs2, regs)); } + + if (!from_kernel && test_thread_flag(TIF_32BIT)) + addr &= 0xffffffff; + + return addr; } /* This is just to make gcc think die_if_kernel does return... */ diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index f8e7dd5..9c5fbd0 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio, cfg.tx_irq = vio->vdev->tx_irq; cfg.rx_irq = vio->vdev->rx_irq; - lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state *vio) err = 0; if (state == LDC_STATE_INIT) { - err = ldc_bind(vio->lp, vio->name); + err = ldc_bind(vio->lp); if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 0bacceb..0924305 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -35,8 +35,9 @@ jiffies = jiffies_64; SECTIONS { - /* swapper_low_pmd_dir is sparc64 only */ - swapper_low_pmd_dir = 0x0000000000402000; +#ifdef CONFIG_SPARC64 + swapper_pg_dir = 0x0000000000402000; +#endif . = INITIAL_ADDRESS; .text TEXTSTART : { diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 2c20ad6..30eee6e 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -236,6 +236,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ VISEntryHalf + membar #Sync alignaddr %o1, %g0, %g0 add %o1, (64 - 1), %o4 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 9cf2ee0..140527a 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -41,6 +41,10 @@ #endif #endif +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + #ifndef EX_LD #define EX_LD(x) x #endif @@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o3), %o0 .Llarge_src_unaligned: +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail) +#else + VISEntryHalf +#endif andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 - VISEntryHalf alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) @@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %icc, .Lmedium_unaligned +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail: + or %o0, %o1, %g2 +#endif .Lmedium: LOAD(prefetch, %o1 + 0x40, #n_reads_strong) andcc %g2, 0x7, %g0 diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 1d32b54..8f2f94d 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -40,6 +40,19 @@ int __atomic_add_return(int i, atomic_t *v) } EXPORT_SYMBOL(__atomic_add_return); +int atomic_xchg(atomic_t *v, int new) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(ATOMIC_HASH(v), flags); + ret = v->counter; + v->counter = new; + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); + return ret; +} +EXPORT_SYMBOL(atomic_xchg); + int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -132,3 +145,17 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) return (unsigned long)prev; } EXPORT_SYMBOL(__cmpxchg_u32); + +unsigned long __xchg_u32(volatile u32 *ptr, u32 new) +{ + unsigned long flags; + u32 prev; + + spin_lock_irqsave(ATOMIC_HASH(ptr), flags); + prev = *ptr; + *ptr = new; + spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags); + + return (unsigned long)prev; +} +EXPORT_SYMBOL(__xchg_u32); diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f..46272df 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -37,10 +37,10 @@ _clear_page: /* %o0=dest */ .globl clear_user_page clear_user_page: /* %o0=dest, %o1=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o2 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o4 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index 4d2df32..dd16c61 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -46,10 +46,10 @@ .type copy_user_page,#function copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o4 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o3 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 99c017b..f75e690 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -3,8 +3,9 @@ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * - * Returns 0, if ok, and number of bytes not yet set if exception - * occurs and we were called as clear_user. + * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and + * number of bytes not yet set if exception occurs and we were called as + * clear_user. */ #include <asm/ptrace.h> @@ -65,6 +66,8 @@ __bzero_begin: .globl __memset_start, __memset_end __memset_start: memset: + mov %o0, %g1 + mov 1, %g4 and %o1, 0xff, %g3 sll %g3, 8, %g2 or %g3, %g2, %g3 @@ -89,6 +92,7 @@ memset: sub %o0, %o2, %o0 __bzero: + clr %g4 mov %g0, %g3 1: cmp %o1, 7 @@ -151,8 +155,8 @@ __bzero: bne,a 8f EX(stb %g3, [%o0], and %o1, 1) 8: - retl - clr %o0 + b 0f + nop 7: be 13b orcc %o1, 0, %g0 @@ -164,6 +168,12 @@ __bzero: bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: + andcc %g4, 1, %g0 + be 5f + nop + retl + mov %g1, %o0 +5: retl clr %o0 __memset_end: diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index aa4d55b..5ce8f2f 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -499,7 +499,7 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs) case 0: fsr = *pfsr; if (IR == -1) IR = 2; /* fcc is always fcc0 */ - fsr &= ~0xc00; fsr |= (IR << 10); break; + fsr &= ~0xc00; fsr |= (IR << 10); *pfsr = fsr; break; case 1: rd->s = IR; break; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a1d35e2..1de7683 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -95,38 +95,51 @@ static unsigned int get_user_insn(unsigned long tpc) pte_t *ptep, pte; unsigned long pa; u32 insn = 0; - unsigned long pstate; - if (pgd_none(*pgdp)) - goto outret; + if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) + goto out; pudp = pud_offset(pgdp, tpc); - if (pud_none(*pudp)) - goto outret; - pmdp = pmd_offset(pudp, tpc); - if (pmd_none(*pmdp)) - goto outret; + if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) + goto out; /* This disables preemption for us as well. */ - __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); - __asm__ __volatile__("wrpr %0, %1, %%pstate" - : : "r" (pstate), "i" (PSTATE_IE)); - ptep = pte_offset_map(pmdp, tpc); - pte = *ptep; - if (!pte_present(pte)) - goto out; + local_irq_disable(); + + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) + goto out_irq_enable; - pa = (pte_pfn(pte) << PAGE_SHIFT); - pa += (tpc & ~PAGE_MASK); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(*pmdp)) { + if (pmd_trans_splitting(*pmdp)) + goto out_irq_enable; - /* Use phys bypass so we don't pollute dtlb/dcache. */ - __asm__ __volatile__("lduwa [%1] %2, %0" - : "=r" (insn) - : "r" (pa), "i" (ASI_PHYS_USE_EC)); + pa = pmd_pfn(*pmdp) << PAGE_SHIFT; + pa += tpc & ~HPAGE_MASK; + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } else +#endif + { + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (pte_present(pte)) { + pa = (pte_pfn(pte) << PAGE_SHIFT); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } + pte_unmap(ptep); + } +out_irq_enable: + local_irq_enable(); out: - pte_unmap(ptep); - __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); -outret: return insn; } @@ -152,7 +165,8 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, } static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, - unsigned int insn, int fault_code) + unsigned long fault_addr, unsigned int insn, + int fault_code) { unsigned long addr; siginfo_t info; @@ -160,10 +174,18 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, info.si_code = code; info.si_signo = sig; info.si_errno = 0; - if (fault_code & FAULT_CODE_ITLB) + if (fault_code & FAULT_CODE_ITLB) { addr = regs->tpc; - else - addr = compute_effective_address(regs, insn, 0); + } else { + /* If we were able to probe the faulting instruction, use it + * to compute a precise fault address. Otherwise use the fault + * time provided address which may only have page granularity. + */ + if (insn) + addr = compute_effective_address(regs, insn, 0); + else + addr = fault_addr; + } info.si_addr = (void __user *) addr; info.si_trapno = 0; @@ -238,7 +260,7 @@ static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code, /* The si_code was set to make clear whether * this was a SEGV_MAPERR or SEGV_ACCERR fault. */ - do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code); + do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code); return; } @@ -258,18 +280,6 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } -static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs, - unsigned long addr) -{ - static int times; - - if (times++ < 10) - printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " - "reports 64-bit fault address [%lx]\n", - current->comm, current->pid, addr); - show_regs(regs); -} - asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -298,10 +308,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) goto intr_or_no_mm; } } - if (unlikely((address >> 32) != 0)) { - bogus_32bit_fault_address(regs, address); + if (unlikely((address >> 32) != 0)) goto intr_or_no_mm; - } } if (regs->tstate & TSTATE_PRIV) { @@ -338,6 +346,9 @@ retry: down_read(&mm->mmap_sem); } + if (fault_code & FAULT_CODE_BAD_RA) + goto do_sigbus; + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -521,7 +532,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code); + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code); /* Kernel mode? Handle exceptions or die */ if (regs->tstate & TSTATE_PRIV) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 01ee23d..ae6ce38 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -71,13 +71,12 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, int *nr) { struct page *head, *page, *tail; - u32 mask; int refs; - mask = PMD_HUGE_PRESENT; - if (write) - mask |= PMD_HUGE_WRITE; - if ((pmd_val(pmd) & mask) != mask) + if (!(pmd_val(pmd) & _PAGE_VALID)) + return 0; + + if (write && !pmd_write(pmd)) return 0; refs = 0; @@ -161,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 9639964..8545f62 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -21,8 +21,6 @@ /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff */ -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, @@ -234,11 +232,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ec995b0..1bfeb5c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -73,7 +73,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly; * 'cpu' properties, but we need to have this table setup before the * MDESC is initialized. */ -unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. @@ -82,10 +81,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; static unsigned long cpu_pgsz_mask; -#define MAX_BANKS 32 +#define MAX_BANKS 1024 static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; @@ -163,10 +163,6 @@ static void __init read_obp_memory(const char *property, cmp_p64, NULL); } -unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / - sizeof(unsigned long)]; -EXPORT_SYMBOL(sparc64_valid_addr_bitmap); - /* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; @@ -350,11 +346,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * mm = vma->vm_mm; + /* Don't insert a non-valid PTE into the TSB, we'll deadlock. */ + if (!pte_accessible(mm, pte)) + return; + raw_spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); else #endif @@ -588,7 +588,7 @@ static void __init remap_kernel(void) int i, tlb_ent = sparc64_highest_locked_tlbent(); tte_vaddr = (unsigned long) KERNBASE; - phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; @@ -834,7 +834,10 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - return -1; + /* The following condition has been observed on LDOM guests.*/ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" + " rule. Some physical memory will be owned by node 0."); + return 0; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1355,9 +1358,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base) static struct linux_prom64_registers pall[MAX_BANKS] __initdata; static int pall_ents __initdata; -#ifdef CONFIG_DEBUG_PAGEALLOC +static unsigned long max_phys_bits = 40; + +bool kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + + if ((addr >> max_phys_bits) != 0UL) + return false; + + return pfn_valid(pa >> PAGE_SHIFT); + } + + if (addr >= (unsigned long) KERNBASE && + addr < (unsigned long)&_end) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + if (pmd_large(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +EXPORT_SYMBOL(kern_addr_valid); + +static unsigned long __ref kernel_map_hugepud(unsigned long vstart, + unsigned long vend, + pud_t *pud) +{ + const unsigned long mask16gb = (1UL << 34) - 1UL; + u64 pte_val = vstart; + + /* Each PUD is 8GB */ + if ((vstart & mask16gb) || + (vend - vstart <= mask16gb)) { + pte_val ^= kern_linear_pte_xor[2]; + pud_val(*pud) = pte_val | _PAGE_PUD_HUGE; + + return vstart + PUD_SIZE; + } + + pte_val ^= kern_linear_pte_xor[3]; + pte_val |= _PAGE_PUD_HUGE; + + vend = vstart + mask16gb + 1UL; + while (vstart < vend) { + pud_val(*pud) = pte_val; + + pte_val += PUD_SIZE; + vstart += PUD_SIZE; + pud++; + } + return vstart; +} + +static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE) + return true; + + return false; +} + +static unsigned long __ref kernel_map_hugepmd(unsigned long vstart, + unsigned long vend, + pmd_t *pmd) +{ + const unsigned long mask256mb = (1UL << 28) - 1UL; + const unsigned long mask2gb = (1UL << 31) - 1UL; + u64 pte_val = vstart; + + /* Each PMD is 8MB */ + if ((vstart & mask256mb) || + (vend - vstart <= mask256mb)) { + pte_val ^= kern_linear_pte_xor[0]; + pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE; + + return vstart + PMD_SIZE; + } + + if ((vstart & mask2gb) || + (vend - vstart <= mask2gb)) { + pte_val ^= kern_linear_pte_xor[1]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask256mb + 1UL; + } else { + pte_val ^= kern_linear_pte_xor[2]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask2gb + 1UL; + } + + while (vstart < vend) { + pmd_val(*pmd) = pte_val; + + pte_val += PMD_SIZE; + vstart += PMD_SIZE; + pmd++; + } + + return vstart; +} + +static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE) + return true; + + return false; +} + static unsigned long __ref kernel_map_range(unsigned long pstart, - unsigned long pend, pgprot_t prot) + unsigned long pend, pgprot_t prot, + bool use_huge) { unsigned long vstart = PAGE_OFFSET + pstart; unsigned long vend = PAGE_OFFSET + pend; @@ -1376,19 +1514,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, pmd_t *pmd; pte_t *pte; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pgd_populate(&init_mm, pgd, new); + } pud = pud_offset(pgd, vstart); if (pud_none(*pud)) { pmd_t *new; + if (kernel_can_map_hugepud(vstart, vend, use_huge)) { + vstart = kernel_map_hugepud(vstart, vend, pud); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pud_populate(&init_mm, pud, new); } pmd = pmd_offset(pud, vstart); - if (!pmd_present(*pmd)) { + if (pmd_none(*pmd)) { pte_t *new; + if (kernel_can_map_hugepmd(vstart, vend, use_huge)) { + vstart = kernel_map_hugepmd(vstart, vend, pmd); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pmd_populate_kernel(&init_mm, pmd, new); @@ -1411,100 +1564,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, return alloc_bytes; } -extern unsigned int kvmap_linear_patch[1]; -#endif /* CONFIG_DEBUG_PAGEALLOC */ - -static void __init kpte_set_val(unsigned long index, unsigned long val) -{ - unsigned long *ptr = kpte_linear_bitmap; - - val <<= ((index % (BITS_PER_LONG / 2)) * 2); - ptr += (index / (BITS_PER_LONG / 2)); - - *ptr |= val; -} - -static const unsigned long kpte_shift_min = 28; /* 256MB */ -static const unsigned long kpte_shift_max = 34; /* 16GB */ -static const unsigned long kpte_shift_incr = 3; - -static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, - unsigned long shift) +static void __init flush_all_kernel_tsbs(void) { - unsigned long size = (1UL << shift); - unsigned long mask = (size - 1UL); - unsigned long remains = end - start; - unsigned long val; - - if (remains < size || (start & mask)) - return start; - - /* VAL maps: - * - * shift 28 --> kern_linear_pte_xor index 1 - * shift 31 --> kern_linear_pte_xor index 2 - * shift 34 --> kern_linear_pte_xor index 3 - */ - val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; - - remains &= ~mask; - if (shift != kpte_shift_max) - remains = size; - - while (remains) { - unsigned long index = start >> kpte_shift_min; + int i; - kpte_set_val(index, val); + for (i = 0; i < KERNEL_TSB_NENTRIES; i++) { + struct tsb *ent = &swapper_tsb[i]; - start += 1UL << kpte_shift_min; - remains -= 1UL << kpte_shift_min; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#ifndef CONFIG_DEBUG_PAGEALLOC + for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) { + struct tsb *ent = &swapper_4m_tsb[i]; - return start; -} - -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) -{ - unsigned long smallest_size, smallest_mask; - unsigned long s; - - smallest_size = (1UL << kpte_shift_min); - smallest_mask = (smallest_size - 1UL); - - while (start < end) { - unsigned long orig_start = start; - - for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { - start = kpte_mark_using_shift(start, end, s); - - if (start != orig_start) - break; - } - - if (start == orig_start) - start = (start + smallest_size) & ~smallest_mask; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#endif } -static void __init init_kpte_bitmap(void) -{ - unsigned long i; - - for (i = 0; i < pall_ents; i++) { - unsigned long phys_start, phys_end; - - phys_start = pall[i].phys_addr; - phys_end = phys_start + pall[i].reg_size; - - mark_kpte_bitmap(phys_start, phys_end); - } -} +extern unsigned int kvmap_linear_patch[1]; static void __init kernel_physical_mapping_init(void) { -#ifdef CONFIG_DEBUG_PAGEALLOC unsigned long i, mem_alloced = 0UL; + bool use_huge = true; +#ifdef CONFIG_DEBUG_PAGEALLOC + use_huge = false; +#endif for (i = 0; i < pall_ents; i++) { unsigned long phys_start, phys_end; @@ -1512,7 +1599,7 @@ static void __init kernel_physical_mapping_init(void) phys_end = phys_start + pall[i].reg_size; mem_alloced += kernel_map_range(phys_start, phys_end, - PAGE_KERNEL); + PAGE_KERNEL, use_huge); } printk("Allocated %ld bytes for kernel page tables.\n", @@ -1521,8 +1608,9 @@ static void __init kernel_physical_mapping_init(void) kvmap_linear_patch[0] = 0x01000000; /* nop */ flushi(&kvmap_linear_patch[0]); + flush_all_kernel_tsbs(); + __flush_tlb_all(); -#endif } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -1532,7 +1620,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); kernel_map_range(phys_start, phys_end, - (enable ? PAGE_KERNEL : __pgprot(0))); + (enable ? PAGE_KERNEL : __pgprot(0)), false); flush_tsb_kernel_range(PAGE_OFFSET + phys_start, PAGE_OFFSET + phys_end); @@ -1557,6 +1645,80 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +unsigned long PAGE_OFFSET; +EXPORT_SYMBOL(PAGE_OFFSET); + +unsigned long VMALLOC_END = 0x0000010000000000UL; +EXPORT_SYMBOL(VMALLOC_END); + +unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; +unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; + +static void __init setup_page_offset(void) +{ + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + /* Cheetah/Panther support a full 64-bit virtual + * address, so we can use all that our page tables + * support. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + /* T1 and T2 support 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: + /* T3 supports 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + case SUN4V_CHIP_SPARC_M6: + /* T4 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 47; + break; + case SUN4V_CHIP_SPARC_M7: + default: + /* M7 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 49; + break; + } + } + + if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { + prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", + max_phys_bits); + prom_halt(); + } + + PAGE_OFFSET = sparc64_va_hole_top; + VMALLOC_END = ((sparc64_va_hole_bottom >> 1) + + (sparc64_va_hole_bottom >> 2)); + + pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); + pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n", + VMALLOC_START, VMALLOC_END); + pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n", + VMEMMAP_BASE, VMEMMAP_BASE << 1); +} + static void __init tsb_phys_patch(void) { struct tsb_ldquad_phys_patch_entry *pquad; @@ -1599,21 +1761,42 @@ static void __init tsb_phys_patch(void) #define NUM_KTSB_DESCR 1 #endif static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; -extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +/* The swapper TSBs are loaded with a base sequence of: + * + * sethi %uhi(SYMBOL), REG1 + * sethi %hi(SYMBOL), REG2 + * or REG1, %ulo(SYMBOL), REG1 + * or REG2, %lo(SYMBOL), REG2 + * sllx REG1, 32, REG1 + * or REG1, REG2, REG1 + * + * When we use physical addressing for the TSB accesses, we patch the + * first four instructions in the above sequence. + */ static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) { - pa >>= KTSB_PHYS_SHIFT; + unsigned long high_bits, low_bits; + + high_bits = (pa >> 32) & 0xffffffff; + low_bits = (pa >> 0) & 0xffffffff; while (start < end) { unsigned int *ia = (unsigned int *)(unsigned long)*start; - ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia)); - ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 2)); + + ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 3)); + start++; } } @@ -1722,7 +1905,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1731,7 +1914,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1740,7 +1923,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1752,7 +1935,6 @@ static void __init sun4v_linear_pte_xor_finalize(void) /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; -pgd_t swapper_pg_dir[2048]; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); @@ -1763,6 +1945,8 @@ void __init paging_init(void) unsigned long real_end, i; int node; + setup_page_offset(); + /* These build time checkes make sure that the dcache_dirty_cpu() * page->flags usage will work. * @@ -1788,7 +1972,7 @@ void __init paging_init(void) BUILD_BUG_ON(NR_CPUS > 4096); - kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; /* Invalidate both kernel TSBs. */ @@ -1844,7 +2028,7 @@ void __init paging_init(void) shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); real_end = (unsigned long)_end; - num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << 22); + num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB); printk("Kernel: Using %d locked TLB entries for main kernel image.\n", num_kernel_image_mappings); @@ -1853,16 +2037,10 @@ void __init paging_init(void) */ init_mm.pgd += ((shift) / (sizeof(pgd_t))); - memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); - /* Now can init the kernel/bad page tables. */ - pud_set(pud_offset(&swapper_pg_dir[0], 0), - swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - inherit_prom_mappings(); - init_kpte_bitmap(); - /* Ok, we can use our TLB miss and window trap handlers safely. */ setup_tba(); @@ -1969,70 +2147,6 @@ int page_in_phys_avail(unsigned long paddr) return 0; } -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; -static int pavail_rescan_ents __initdata; - -/* Certain OBP calls, such as fetching "available" properties, can - * claim physical memory. So, along with initializing the valid - * address bitmap, what we do here is refetch the physical available - * memory list again, and make sure it provides at least as much - * memory as 'pavail' does. - */ -static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) -{ - int i; - - read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); - - for (i = 0; i < pavail_ents; i++) { - unsigned long old_start, old_end; - - old_start = pavail[i].phys_addr; - old_end = old_start + pavail[i].reg_size; - while (old_start < old_end) { - int n; - - for (n = 0; n < pavail_rescan_ents; n++) { - unsigned long new_start, new_end; - - new_start = pavail_rescan[n].phys_addr; - new_end = new_start + - pavail_rescan[n].reg_size; - - if (new_start <= old_start && - new_end >= (old_start + PAGE_SIZE)) { - set_bit(old_start >> 22, bitmap); - goto do_next_page; - } - } - - prom_printf("mem_init: Lost memory in pavail\n"); - prom_printf("mem_init: OLD start[%lx] size[%lx]\n", - pavail[i].phys_addr, - pavail[i].reg_size); - prom_printf("mem_init: NEW start[%lx] size[%lx]\n", - pavail_rescan[i].phys_addr, - pavail_rescan[i].reg_size); - prom_printf("mem_init: Cannot continue, aborting.\n"); - prom_halt(); - - do_next_page: - old_start += PAGE_SIZE; - } - } -} - -static void __init patch_tlb_miss_handler_bitmap(void) -{ - extern unsigned int valid_addr_bitmap_insn[]; - extern unsigned int valid_addr_bitmap_patch[]; - - valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; - mb(); - valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; - flushi(&valid_addr_bitmap_insn[0]); -} - static void __init register_page_bootmem_info(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -2045,18 +2159,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long addr, last; - - addr = PAGE_OFFSET + kern_base; - last = PAGE_ALIGN(kern_size) + addr; - while (addr < last) { - set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap); - addr += PAGE_SIZE; - } - - setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); - patch_tlb_miss_handler_bitmap(); - high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); @@ -2146,18 +2248,9 @@ unsigned long _PAGE_CACHE __read_mostly; EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP -unsigned long vmemmap_table[VMEMMAP_SIZE]; - -static long __meminitdata addr_start, addr_end; -static int __meminitdata node_start; - int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, int node) { - unsigned long phys_start = (vstart - VMEMMAP_BASE); - unsigned long phys_end = (vend - VMEMMAP_BASE); - unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; - unsigned long end = VMEMMAP_ALIGN(phys_end); unsigned long pte_base; pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | @@ -2168,47 +2261,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); - for (; addr < end; addr += VMEMMAP_CHUNK) { - unsigned long *vmem_pp = - vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); - void *block; + pte_base |= _PAGE_PMD_HUGE; - if (!(*vmem_pp & _PAGE_VALID)) { - block = vmemmap_alloc_block(1UL << 22, node); - if (!block) + vstart = vstart & PMD_MASK; + vend = ALIGN(vend, PMD_SIZE); + for (; vstart < vend; vstart += PMD_SIZE) { + pgd_t *pgd = pgd_offset_k(vstart); + unsigned long pte; + pud_t *pud; + pmd_t *pmd; + + if (pgd_none(*pgd)) { + pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); + + if (!new) return -ENOMEM; + pgd_populate(&init_mm, pgd, new); + } - *vmem_pp = pte_base | __pa(block); + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); - /* check to see if we have contiguous blocks */ - if (addr_end != addr || node_start != node) { - if (addr_start) - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = addr; - node_start = node; - } - addr_end = addr + VMEMMAP_CHUNK; + if (!new) + return -ENOMEM; + pud_populate(&init_mm, pud, new); } - } - return 0; -} -void __meminit vmemmap_populate_print_last(void) -{ - if (addr_start) { - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = 0; - addr_end = 0; - node_start = 0; + pmd = pmd_offset(pud, vstart); + + pte = pmd_val(*pmd); + if (!(pte & _PAGE_VALID)) { + void *block = vmemmap_alloc_block(PMD_SIZE, node); + + if (!block) + return -ENOMEM; + + pmd_val(*pmd) = pte_base | __pa(block); + } } + + return 0; } void vmemmap_free(unsigned long start, unsigned long end) { } - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, @@ -2261,10 +2359,10 @@ static void __init sun4u_pgprot_init(void) __ACCESS_BITS_4U | _PAGE_E_4U); #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | _PAGE_W_4U); @@ -2308,10 +2406,10 @@ static void __init sun4v_pgprot_init(void) _PAGE_CACHE = _PAGE_CACHE_4V; #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); @@ -2455,53 +2553,13 @@ void __flush_tlb_all(void) : : "r" (pstate)); } -static pte_t *get_from_cache(struct mm_struct *mm) -{ - struct page *page; - pte_t *ret; - - spin_lock(&mm->page_table_lock); - page = mm->context.pgtable_page; - ret = NULL; - if (page) { - void *p = page_address(page); - - mm->context.pgtable_page = NULL; - - ret = (pte_t *) (p + (PAGE_SIZE / 2)); - } - spin_unlock(&mm->page_table_lock); - - return ret; -} - -static struct page *__alloc_for_cache(struct mm_struct *mm) -{ - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); - - if (page) { - spin_lock(&mm->page_table_lock); - if (!mm->context.pgtable_page) { - atomic_set(&page->_count, 2); - mm->context.pgtable_page = page; - } - spin_unlock(&mm->page_table_lock); - } - return page; -} - pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) pte = (pte_t *) page_address(page); @@ -2511,14 +2569,10 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) { pgtable_page_ctor(page); pte = (pte_t *) page_address(page); @@ -2529,18 +2583,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) - free_hot_cold_page(page, 0); + free_page((unsigned long)pte); } static void __pte_free(pgtable_t pte) { struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } + + pgtable_page_dtor(page); + __free_page(page); } void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -2557,124 +2608,27 @@ void pgtable_free(void *table, bool is_page) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify) -{ - if (pgprot_val(pgprot) & _PAGE_VALID) - pmd_val(pmd) |= PMD_HUGE_PRESENT; - if (tlb_type == hypervisor) { - if (pgprot_val(pgprot) & _PAGE_WRITE_4V) - pmd_val(pmd) |= PMD_HUGE_WRITE; - if (pgprot_val(pgprot) & _PAGE_EXEC_4V) - pmd_val(pmd) |= PMD_HUGE_EXEC; - - if (!for_modify) { - if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V) - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V) - pmd_val(pmd) |= PMD_HUGE_DIRTY; - } - } else { - if (pgprot_val(pgprot) & _PAGE_WRITE_4U) - pmd_val(pmd) |= PMD_HUGE_WRITE; - if (pgprot_val(pgprot) & _PAGE_EXEC_4U) - pmd_val(pmd) |= PMD_HUGE_EXEC; - - if (!for_modify) { - if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U) - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U) - pmd_val(pmd) |= PMD_HUGE_DIRTY; - } - } - - return pmd; -} - -pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) -{ - pmd_t pmd; - - pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT))); - pmd_val(pmd) |= PMD_ISHUGE; - pmd = pmd_set_protbits(pmd, pgprot, false); - return pmd; -} - -pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -{ - pmd_val(pmd) &= ~(PMD_HUGE_PRESENT | - PMD_HUGE_WRITE | - PMD_HUGE_EXEC); - pmd = pmd_set_protbits(pmd, newprot, true); - return pmd; -} - -pgprot_t pmd_pgprot(pmd_t entry) -{ - unsigned long pte = 0; - - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_VALID; - - if (tlb_type == hypervisor) { - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_PRESENT_4V; - if (pmd_val(entry) & PMD_HUGE_EXEC) - pte |= _PAGE_EXEC_4V; - if (pmd_val(entry) & PMD_HUGE_WRITE) - pte |= _PAGE_W_4V; - if (pmd_val(entry) & PMD_HUGE_ACCESSED) - pte |= _PAGE_ACCESSED_4V; - if (pmd_val(entry) & PMD_HUGE_DIRTY) - pte |= _PAGE_MODIFIED_4V; - pte |= _PAGE_CP_4V|_PAGE_CV_4V; - } else { - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_PRESENT_4U; - if (pmd_val(entry) & PMD_HUGE_EXEC) - pte |= _PAGE_EXEC_4U; - if (pmd_val(entry) & PMD_HUGE_WRITE) - pte |= _PAGE_W_4U; - if (pmd_val(entry) & PMD_HUGE_ACCESSED) - pte |= _PAGE_ACCESSED_4U; - if (pmd_val(entry) & PMD_HUGE_DIRTY) - pte |= _PAGE_MODIFIED_4U; - pte |= _PAGE_CP_4U|_PAGE_CV_4U; - } - - return __pgprot(pte); -} - void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { unsigned long pte, flags; struct mm_struct *mm; pmd_t entry = *pmd; - pgprot_t prot; if (!pmd_large(entry) || !pmd_young(entry)) return; - pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS); - pte <<= PMD_PADDR_SHIFT; - pte |= _PAGE_VALID; - - prot = pmd_pgprot(entry); + pte = pmd_val(entry); - if (tlb_type == hypervisor) - pgprot_val(prot) |= _PAGE_SZHUGE_4V; - else - pgprot_val(prot) |= _PAGE_SZHUGE_4U; - - pte |= pgprot_val(prot); + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); mm = vma->vm_mm; spin_lock_irqsave(&mm->context.lock, flags); if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, addr, pte); spin_unlock_irqrestore(&mm->context.lock, flags); @@ -2746,3 +2700,26 @@ void hugetlb_setup(struct pt_regs *regs) } } #endif + +#ifdef CONFIG_SMP +#define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range +#else +#define do_flush_tlb_kernel_range __flush_tlb_kernel_range +#endif + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) { + if (start < LOW_OBP_ADDRESS) { + flush_tsb_kernel_range(start, LOW_OBP_ADDRESS); + do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); + } + if (end > HI_OBP_ADDRESS) { + flush_tsb_kernel_range(HI_OBP_ADDRESS, end); + do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end); + } + } else { + flush_tsb_kernel_range(start, end); + do_flush_tlb_kernel_range(start, end); + } +} diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 0661aa6..ac49119 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -1,20 +1,15 @@ #ifndef _SPARC64_MM_INIT_H #define _SPARC64_MM_INIT_H +#include <asm/page.h> + /* Most of the symbols in this file are defined in init.c and * marked non-static so that assembler code can get at them. */ -#define MAX_PHYS_ADDRESS (1UL << 41UL) -#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) -#define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) -#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) -#define VALID_ADDR_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) +#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) extern unsigned long kern_linear_pte_xor[4]; -extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_nuc_bits; @@ -36,15 +31,4 @@ extern unsigned long kern_locked_tte_data; extern void prom_world(int enter); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define VMEMMAP_CHUNK_SHIFT 22 -#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) -#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) -#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) - -#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) -extern unsigned long vmemmap_table[VMEMMAP_SIZE]; -#endif - #endif /* _SPARC64_MM_INIT_H */ diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 7a91f28..c24d0aa 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -135,7 +135,7 @@ no_cache_flush: #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, - pmd_t pmd, bool exec) + pmd_t pmd) { unsigned long end; pte_t *pte; @@ -143,8 +143,11 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte = pte_offset_map(&pmd, vaddr); end = vaddr + HPAGE_SIZE; while (vaddr < end) { - if (pte_val(*pte) & _PAGE_VALID) + if (pte_val(*pte) & _PAGE_VALID) { + bool exec = pte_exec(*pte); + tlb_batch_add_one(mm, vaddr, exec); + } pte++; vaddr += PAGE_SIZE; } @@ -161,8 +164,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if (mm == &init_mm) return; - if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) { - if (pmd_val(pmd) & PMD_ISHUGE) + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) mm->context.huge_pte_count++; else mm->context.huge_pte_count--; @@ -178,16 +181,30 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } if (!pmd_none(orig)) { - bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0); - addr &= HPAGE_MASK; - if (pmd_val(orig) & PMD_ISHUGE) + if (pmd_trans_huge(orig)) { + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); + tlb_batch_add_one(mm, addr, exec); - else - tlb_batch_pmd_scan(mm, addr, orig, exec); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + } else { + tlb_batch_pmd_scan(mm, addr, orig); + } } } +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t entry = *pmdp; + + pmd_val(entry) &= ~_PAGE_VALID; + + set_pmd_at(vma->vm_mm, address, pmdp, entry); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); +} + void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 9eb10b4..2e2d684 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -87,7 +87,7 @@ void flush_tsb_user(struct tlb_batch *tb) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries); + __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); } #endif raw_spin_unlock_irqrestore(&mm->context.lock, flags); @@ -111,7 +111,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); } #endif raw_spin_unlock_irqrestore(&mm->context.lock, flags); @@ -133,7 +133,19 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign mm->context.tsb_block[tsb_idx].tsb_nentries = tsb_bytes / sizeof(struct tsb); - base = TSBMAP_BASE; + switch (tsb_idx) { + case MM_TSB_BASE: + base = TSBMAP_8K_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + base = TSBMAP_4M_BASE; + break; +#endif + default: + BUG(); + } + tte = pgprot_val(PAGE_KERNEL_LOCKED); tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); @@ -472,8 +484,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.huge_pte_count = 0; #endif - mm->context.pgtable_page = NULL; - /* copy_mm() copies over the parent's mm_struct before calling * us, so we need to zero out the TSB pointer or else tsb_grow() * will be confused and think there is an older TSB to free up. @@ -512,17 +522,10 @@ static void tsb_destroy_one(struct tsb_config *tp) void destroy_context(struct mm_struct *mm) { unsigned long flags, i; - struct page *page; for (i = 0; i < MM_NUM_TSBS; i++) tsb_destroy_one(&mm->context.tsb_block[i]); - page = mm->context.pgtable_page; - if (page && put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } - raw_spin_lock_irqsave(&ctx_alloc_lock, flags); if (CTX_VALID(mm->context)) { diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 432aa0c..b4f4733 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -153,10 +153,10 @@ __spitfire_flush_tlb_mm_slow: .globl __flush_icache_page __flush_icache_page: /* %o0 = phys_page */ srlx %o0, PAGE_SHIFT, %o0 - sethi %uhi(PAGE_OFFSET), %g1 + sethi %hi(PAGE_OFFSET), %g1 sllx %o0, PAGE_SHIFT, %o0 sethi %hi(PAGE_SIZE), %g2 - sllx %g1, 32, %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 add %o0, %g1, %o0 1: subcc %g2, 32, %g2 bne,pt %icc, 1b @@ -178,8 +178,8 @@ __flush_icache_page: /* %o0 = phys_page */ .align 64 .globl __flush_dcache_page __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 ! physical address srlx %o0, 11, %o0 ! make D-cache TAG sethi %hi(1 << 14), %o2 ! D-cache size @@ -287,8 +287,8 @@ __cheetah_flush_tlb_pending: /* 27 insns */ #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 sethi %hi(PAGE_SIZE), %o4 1: subcc %o4, (1 << 5), %o4 diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 01fe994..44d258d 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -83,9 +83,9 @@ static void bpf_flush_icache(void *start_, void *end_) #define BNE (F2(0, 2) | CONDNE) #ifdef CONFIG_SPARC64 -#define BNE_PTR (F2(0, 1) | CONDNE | (2 << 20)) +#define BE_PTR (F2(0, 1) | CONDE | (2 << 20)) #else -#define BNE_PTR BNE +#define BE_PTR BE #endif #define SETHI(K, REG) \ @@ -600,7 +600,7 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load32(r_A, struct net_device, ifindex, r_A); break; @@ -613,7 +613,7 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_HATYPE: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load16(r_A, struct net_device, type, r_A); break; diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S index 7994216..d7d9017 100644 --- a/arch/sparc/power/hibernate_asm.S +++ b/arch/sparc/power/hibernate_asm.S @@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume) nop /* Write PAGE_OFFSET to %g7 */ - sethi %uhi(PAGE_OFFSET), %g7 - sllx %g7, 32, %g7 + sethi %hi(PAGE_OFFSET), %g7 + ldx [%g7 + %lo(PAGE_OFFSET)], %g7 setuw (PAGE_SIZE-8), %g3 diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c index ab9ccc6..7149e77 100644 --- a/arch/sparc/prom/bootstr_64.c +++ b/arch/sparc/prom/bootstr_64.c @@ -14,7 +14,10 @@ * the .bss section or it will break things. */ -#define BARG_LEN 256 +/* We limit BARG_LEN to 1024 because this is the size of the + * 'barg_out' command line buffer in the SILO bootloader. + */ +#define BARG_LEN 1024 struct { int bootstr_len; int bootstr_valid; diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S index 9c86b4b..8050f38 100644 --- a/arch/sparc/prom/cif.S +++ b/arch/sparc/prom/cif.S @@ -11,11 +11,10 @@ .text .globl prom_cif_direct prom_cif_direct: + save %sp, -192, %sp sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - ldx [%o1 + 0x0010], %o2 ! prom_cif_stack - save %o2, -192, %sp - ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + ldx [%o1 + 0x0008], %l2 ! prom_cif_handler mov %g4, %l0 mov %g5, %l1 mov %g6, %l3 diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c index d95db75..110b0d7 100644 --- a/arch/sparc/prom/init_64.c +++ b/arch/sparc/prom/init_64.c @@ -26,13 +26,13 @@ phandle prom_chosen_node; * It gets passed the pointer to the PROM vector. */ -extern void prom_cif_init(void *, void *); +extern void prom_cif_init(void *); -void __init prom_init(void *cif_handler, void *cif_stack) +void __init prom_init(void *cif_handler) { phandle node; - prom_cif_init(cif_handler, cif_stack); + prom_cif_init(cif_handler); prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || (s32)prom_chosen_node == -1) diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c index 04a4540..fda23e6 100644 --- a/arch/sparc/prom/p1275.c +++ b/arch/sparc/prom/p1275.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/string.h> #include <linux/spinlock.h> +#include <linux/irqflags.h> #include <asm/openprom.h> #include <asm/oplib.h> @@ -20,7 +21,6 @@ struct { long prom_callback; /* 0x00 */ void (*prom_cif_handler)(long *); /* 0x08 */ - unsigned long prom_cif_stack; /* 0x10 */ } p1275buf; extern void prom_world(int); @@ -37,8 +37,8 @@ void p1275_cmd_direct(unsigned long *args) { unsigned long flags; - raw_local_save_flags(flags); - raw_local_irq_restore((unsigned long)PIL_NMI); + local_save_flags(flags); + local_irq_restore((unsigned long)PIL_NMI); raw_spin_lock(&prom_entry_lock); prom_world(1); @@ -46,11 +46,10 @@ void p1275_cmd_direct(unsigned long *args) prom_world(0); raw_spin_unlock(&prom_entry_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void prom_cif_init(void *cif_handler, void *cif_stack) { p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; - p1275buf.prom_cif_stack = (unsigned long)cif_stack; } diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 004ba56..33294fd 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -417,7 +417,7 @@ void __homecache_free_pages(struct page *page, unsigned int order) if (put_page_testzero(page)) { homecache_change_page_home(page, order, PAGE_HOME_HASH); if (order == 0) { - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } else { init_page_count(page); __free_pages(page, order); diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 0cb3bba..e514899 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -166,11 +166,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_HUGE_PAGE); } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index 8ddea1f..0228a6a 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -7,6 +7,7 @@ config UML bool default y select HAVE_UID16 + select HAVE_FUTEX_CMPXCHG if FUTEX select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_IO diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 3716e69..e8ab93c 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1277,7 +1277,7 @@ static void do_ubd_request(struct request_queue *q) while(1){ struct ubd *dev = q->queuedata; - if(dev->end_sg == 0){ + if(dev->request == NULL){ struct request *req = blk_fetch_request(q); if(req == NULL) return; @@ -1299,7 +1299,8 @@ static void do_ubd_request(struct request_queue *q) return; } prepare_flush_request(req, io_req); - submit_request(io_req, dev); + if (submit_request(io_req, dev) == false) + return; } while(dev->start_sg < dev->end_sg){ diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index fb5e4c6..ef470a7 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -14,6 +14,8 @@ #include <linux/compiler.h> #include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/io.h> #include <asm/cacheflush.h> @@ -73,7 +75,7 @@ do { \ else \ mm->mmap = NULL; \ rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ - mm->mmap_cache = NULL; \ + vmacache_invalidate(mm); \ mm->map_count--; \ remove_vma(high_vma); \ } \ diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c index de7dc5f..24e8360 100644 --- a/arch/unicore32/mm/alignment.c +++ b/arch/unicore32/mm/alignment.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> +#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/unaligned.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index edbb857..fa85c3e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -124,6 +124,7 @@ config X86 select COMPAT_OLD_SIGACTION if IA32_EMULATION select RTC_LIB select HAVE_DEBUG_STACKOVERFLOW + select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER def_bool y @@ -864,7 +865,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -875,6 +876,10 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. +config X86_UP_APIC_MSI + def_bool y + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI + config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC @@ -976,10 +981,27 @@ config VM86 default y depends on X86_32 ---help--- - This option is required by programs like DOSEMU to run 16-bit legacy - code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. + This option is required by programs like DOSEMU to run + 16-bit real mode legacy code on x86 processors. It also may + be needed by software like XFree86 to initialize some video + cards via BIOS. Disabling this option saves about 6K. + +config X86_16BIT + bool "Enable support for 16-bit segments" if EXPERT + default y + ---help--- + This option is required by programs like Wine to run 16-bit + protected mode legacy code on x86 processors. Disabling + this option saves about 300 bytes on i386, or around 6K text + plus 16K runtime memory on x86-64, + +config X86_ESPFIX32 + def_bool y + depends on X86_16BIT && X86_32 + +config X86_ESPFIX64 + def_bool y + depends on X86_16BIT && X86_64 config TOSHIBA tristate "Toshiba Laptop support" @@ -1594,6 +1616,7 @@ config EFI config EFI_STUB bool "EFI stub support" depends on EFI + select RELOCATABLE ---help--- This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. @@ -1885,6 +1908,10 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config ARCH_ENABLE_HUGEPAGE_MIGRATION + def_bool y + depends on X86_64 && HUGETLB_PAGE && MIGRATION + menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b7388a4..9b883a8 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -865,6 +865,9 @@ fail: * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). + * + * The caller is responsible for filling out ->code32_start in the + * returned boot_params. */ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) { @@ -921,8 +924,6 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) hdr->vid_mode = 0xffff; hdr->boot_flag = 0xAA55; - hdr->code32_start = (__u64)(unsigned long)image->image_base; - hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f689..b1bd969 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -50,6 +50,13 @@ ENTRY(efi_pe_entry) pushl %eax pushl %esi pushl %ecx + + call reloc +reloc: + popl %ecx + subl reloc, %ecx + movl %ecx, BP_code32_start(%eax) + sub $0x4, %esp ENTRY(efi_stub_entry) @@ -63,12 +70,7 @@ ENTRY(efi_stub_entry) hlt jmp 1b 2: - call 3f -3: - popl %eax - subl $3b, %eax - subl BP_pref_address(%esi), %eax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax jmp *%eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422..a558403 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -215,6 +215,8 @@ ENTRY(efi_pe_entry) cmpq $0,%rax je 1f mov %rax, %rdx + leaq startup_32(%rip), %rax + movl %eax, BP_code32_start(%rdx) popq %rsi popq %rdi @@ -228,12 +230,7 @@ ENTRY(efi_stub_entry) hlt jmp 1b 2: - call 3f -3: - popq %rax - subq $3b, %rax - subq BP_pref_address(%rsi), %rax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax jmp *%rax diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077..1b05afd 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -401,6 +401,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, unsigned char *output, unsigned long output_len) { + unsigned char *output_orig = output; + real_mode = rmode; sanitize_boot_params(real_mode); @@ -439,7 +441,12 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); - handle_relocations(output, output_len); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1..4257124 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -91,10 +91,9 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct floppy boot is not supported. " - .ascii "Use a boot loader program instead.\r\n" + .ascii "Use a boot loader.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot ...\r\n" + .ascii "Remove disk and press any key to reboot...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -108,7 +107,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 3 # nr_sections + .word 4 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -250,6 +249,25 @@ section_table: .word 0 # NumberOfLineNumbers .long 0x60500020 # Characteristics (section flags) + # + # The offset & size fields are filled in by build.c. + # + .ascii ".bss" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0xc8000080 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index c941d6a..687dd28 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -141,7 +141,7 @@ static void usage(void) #ifdef CONFIG_EFI_STUB -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) { unsigned int pe_header; unsigned short num_sections; @@ -162,10 +162,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz put_unaligned_le32(size, section + 0x8); /* section header vma field */ - put_unaligned_le32(offset, section + 0xc); + put_unaligned_le32(vma, section + 0xc); /* section header 'size of initialised data' field */ - put_unaligned_le32(size, section + 0x10); + put_unaligned_le32(datasz, section + 0x10); /* section header 'file offset' field */ put_unaligned_le32(offset, section + 0x14); @@ -177,6 +177,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz } } +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + static void update_pecoff_setup_and_reloc(unsigned int size) { u32 setup_offset = 0x200; @@ -201,9 +206,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) pe_header = get_unaligned_le32(&buf[0x3c]); - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -218,6 +220,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of uninitialized data */ + put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); +} + #endif /* CONFIG_EFI_STUB */ @@ -269,6 +287,9 @@ int main(int argc, char ** argv) int fd; void *kernel; u32 crc = 0xffffffffUL; +#ifdef CONFIG_EFI_STUB + unsigned int init_sz; +#endif /* Defaults for old kernel */ #ifdef CONFIG_X86_32 @@ -339,7 +360,9 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + init_sz = get_unaligned_le32(&buf[0x260]); + update_pecoff_bss(i + (sys_size * 16), init_sz); #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ efi_stub_entry -= 0x200; diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index aafe8ce..e26984f 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -66,5 +66,5 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3fbe870..131585f 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1375,4 +1375,4 @@ module_exit(aesni_exit); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333..1477cfc 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -481,5 +481,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); +MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 414fe5d..da710fc 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -582,5 +582,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 37fd0c0..883e1af 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -574,5 +574,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index c171dcb..5c8b626 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1725,5 +1725,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 2d48e83..a0c7f10 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -491,4 +491,4 @@ module_exit(cast5_exit); MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast5"); +MODULE_ALIAS_CRYPTO("cast5"); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 8d0dfb8..c197562 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -611,4 +611,4 @@ module_exit(cast6_exit); MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast6"); +MODULE_ALIAS_CRYPTO("cast6"); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 9d014a7..1937fc1 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -197,5 +197,5 @@ module_exit(crc32_pclmul_mod_fini); MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crc32"); -MODULE_ALIAS("crc32-pclmul"); +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 6812ad9..28640c3 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -280,5 +280,5 @@ MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.c MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crc32c"); -MODULE_ALIAS("crc32c-intel"); +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 7845d7f..b6c67bf 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -147,5 +147,5 @@ MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-pclmul"); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 98d7a18..f368ba2 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/crypto.h> #include <asm/i387.h> struct crypto_fpu_ctx { @@ -159,3 +160,5 @@ void __exit crypto_fpu_exit(void) { crypto_unregister_template(&crypto_fpu_tmpl); } + +MODULE_ALIAS_CRYPTO("fpu"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index d785cf2..a8d6f69 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -341,4 +341,4 @@ module_exit(ghash_pclmulqdqni_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " "acclerated by PCLMULQDQ-NI"); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 5e8e677..399a29d 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -119,5 +119,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS("salsa20"); -MODULE_ALIAS("salsa20-asm"); +MODULE_ALIAS_CRYPTO("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20-asm"); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 23aabc6..cb57caf 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -558,5 +558,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("serpent"); -MODULE_ALIAS("serpent-asm"); +MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-asm"); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 9ae83cf..0a86e8b 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -617,4 +617,4 @@ module_exit(serpent_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 97a356e..279f389 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -618,4 +618,4 @@ module_exit(serpent_sse2_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d..29e1060 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -237,4 +237,4 @@ module_exit(sha1_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index e52947f..4dc100d 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -318,5 +318,5 @@ module_exit(sha256_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f30cd10..26a5898 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -141,7 +141,7 @@ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) /* save number of bits */ bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61; + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); /* Pad out to 112 mod 128 and append length */ index = sctx->count[0] & 0x7f; @@ -326,5 +326,5 @@ module_exit(sha512_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha512"); -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index a62ba54..c8c12c1 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -579,4 +579,4 @@ module_exit(twofish_exit); MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("twofish"); +MODULE_ALIAS_CRYPTO("twofish"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 0a52023..77e06c2 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -96,5 +96,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 13e63b3..56d8a08 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -495,5 +495,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4299eb0..92a2e93 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -151,6 +151,16 @@ ENTRY(ia32_sysenter_target) 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC + + /* + * Sysenter doesn't filter flags, so we need to clear NT + * ourselves. To save a few cycles, we can check whether + * NT was set instead of doing an unconditional popfq. + */ + testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) + jnz sysenter_fix_flags +sysenter_flags_fixed: + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -184,6 +194,8 @@ sysexit_from_sys_call: TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 + CFI_RESTORE_STATE + #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r9d /* 6th arg: 4th syscall arg */ @@ -226,7 +238,6 @@ sysexit_from_sys_call: .endm sysenter_auditsys: - CFI_RESTORE_STATE auditsys_entry_common movl %ebp,%r9d /* reload 6th syscall arg */ jmp sysenter_dispatch @@ -235,6 +246,11 @@ sysexit_audit: auditsys_exit sysexit_from_sys_call #endif +sysenter_fix_flags: + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq_cfi + jmp sysenter_flags_fixed + sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 89270b4..c2f19a8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -203,6 +203,7 @@ #define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL (8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index b90e5df..f6aaf7d 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } -#define _LDT_empty(info) \ +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ @@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} static inline void clear_LDT(void) { diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9c999c1..01f15b2 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -155,8 +155,9 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS # error "The following code assumes __USER32_DS == __USER_DS" diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h new file mode 100644 index 0000000..99efebb --- /dev/null +++ b/arch/x86/include/asm/espfix.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_ESPFIX_H +#define _ASM_X86_ESPFIX_H + +#ifdef CONFIG_X86_64 + +#include <asm/percpu.h> + +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +extern void init_espfix_bsp(void); +extern void init_espfix_ap(void); + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index a809121..68c0539 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + ptep_clear_flush(vma, addr, ptep); } static inline int huge_pte_none(pte_t pte) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0ea10f27..cb6cfcd 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu); #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> +extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); extern void irq_force_complete_move(int); #endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf8..0a8b519 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ -#define INTERRUPT_RETURN iretq +#define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4134cf6..25c8292 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -462,7 +462,7 @@ struct kvm_vcpu_arch { bool nmi_injected; /* Trying to inject an NMI this entry */ struct mtrr_state_type mtrr_state; - u32 pat; + u64 pat; int switch_db_regs; unsigned long db[KVM_NR_DB_REGS]; @@ -480,6 +480,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; @@ -980,6 +981,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } +static inline u64 get_canonical(u64 la) +{ + return ((int64_t)la << 16) >> 16; +} + +static inline bool is_noncanonical_address(u64 la) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la) != la; +#else + return false; +#endif +} + #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) @@ -1038,7 +1053,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); -void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e4..e62cf89 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include <asm/processor.h> +#include <asm/alternative.h> #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17d..3a52ee0 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -20,7 +20,6 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 #define NMI_STACK 0 #define DEBUG_STACK 0 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..d1d2972 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,11 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 2 +#define DEBUG_STACK 3 +#define MCE_STACK 4 +#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d88344..b1609f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a086..68e9f00 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -232,6 +232,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_INFO +/* + * When hitting ptrace_stop(), we cannot return using SYSRET because + * that does not restore the full CPU state, only a minimal set. The + * ptracer can change arbitrary register values, which is usually okay + * because the usual ptrace stops run off the signal delivery path which + * forces IRET; however, ptrace_event() stops happen in arbitrary places + * in the kernel and don't force IRET path. + * + * So force IRET path after a ptrace stop. + */ +#define arch_ptrace_stop_needed(code, info) \ +({ \ + set_thread_flag(TIF_NOTIFY_RESUME); \ + false; \ +}) + struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 3475554..ad1d8ec 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -64,6 +64,8 @@ static inline void x86_ce4100_early_setup(void) { } #ifndef _SETUP +#include <asm/espfix.h> + /* * This is set up by the setup-routine at boot-time */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f08e527..b188292 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -149,7 +149,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY) + _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e6d90ba..04905bf 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void) static inline void __flush_tlb_one(unsigned long addr) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr) */ static inline void __flush_tlb_up(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); } static inline void flush_tlb_all(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb_all(); } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 2a46ca7..2874be9 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -34,7 +34,7 @@ static inline unsigned int __getcpu(void) native_read_tscp(&p); } else { /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } return p; diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb..6e1aaf7 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420..990a2fe 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -114,6 +115,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_INVPCID, "INVPCID" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b9..32f1140 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a7eb82d..7170f17 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1282,7 +1282,7 @@ void setup_local_APIC(void) unsigned int value, queued; int i, j, acked = 0; unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz; + long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) rdtscll(tsc); @@ -1379,7 +1379,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc) { + if (cpu_has_tsc && cpu_khz) { rdtscll(ntsc); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28233b9..ee51e67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -509,6 +509,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) { u64 val; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3533e2c..00cc6f7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -144,6 +144,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { + if (strlen(s)) + return 0; setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_AVX); @@ -1135,7 +1137,7 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC); + X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 87c0be5..f4a9985 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -154,6 +154,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 @@ -369,6 +384,13 @@ static void init_intel(struct cpuinfo_x86 *c) detect_extended_topology(c); l2 = init_intel_cacheinfo(c); + + /* Detect legacy cache sizes if init_intel_cacheinfo did not */ + if (l2 == 0) { + cpu_detect_cache_sizes(c); + l2 = c->x86_cache_size; + } + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -483,6 +505,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; + + /* + * Intel Quark SoC X1000 contains a 4-way set associative + * 16K cache with a 16 byte cache line and 256 lines per tag + */ + if ((c->x86 == 5) && (c->x86_model == 9)) + size = 16; return size; } #endif @@ -688,7 +717,8 @@ static const struct cpu_dev intel_cpu_dev = { [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" + [8] = "Mobile Pentium MMX", + [9] = "Quark SoC X1000", } }, { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 71a39f3..6474807 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -63,6 +63,7 @@ static struct clocksource hyperv_cs = { .rating = 400, /* use this when running on Hyperv*/ .read = read_hv_clock, .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static void __init ms_hyperv_init_platform(void) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ce2d0a2..0e25a1b 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) } /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Save MTRR state */ @@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Intel (P6) standard MTRRs */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5edd3c0..c7106f1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index cc16faa..53bd272 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -279,14 +279,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f31a165..b400d0b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1365,6 +1365,15 @@ again: intel_pmu_lbr_read(); /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + /* * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { @@ -2135,6 +2144,41 @@ static void intel_snb_check_microcode(void) } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2198,7 +2242,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2243,10 +2288,7 @@ __init int intel_pmu_init(void) if (version > 1) x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - /* - * v2 and above have a perf capabilities MSR - */ - if (version > 1) { + if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); @@ -2404,6 +2446,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -2503,6 +2548,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4118f9f..3e1cfbb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2764,6 +2764,17 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_uncore_event(struct perf_event *event) +{ + return event->pmu->event_init == uncore_pmu_event_init; +} + static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { @@ -2778,13 +2789,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b return -EINVAL; n = box->n_events; - box->event_list[n] = leader; - n++; + + if (is_uncore_event(leader)) { + box->event_list[n] = leader; + n++; + } + if (!dogrp) return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) + if (!is_uncore_event(event) || + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207..66e274a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { [ DEBUG_STACK-1 ] = "#DB", [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ STACKFAULT_STACK-1 ] = "#SS", [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e491bfd..3e617f1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -442,8 +442,9 @@ sysenter_past_esp: jnz sysenter_audit sysenter_do_call: cmpl $(NR_syscalls), %eax - jae syscall_badsys + jae sysenter_badsys call *sys_call_table(,%eax,4) +sysenter_after_call: movl %eax,PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) @@ -524,6 +525,7 @@ ENTRY(system_call) jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) +syscall_after_call: movl %eax,PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT @@ -538,6 +540,7 @@ syscall_exit: restore_all: TRACE_IRQS_IRET restore_all_notrace: +#ifdef CONFIG_X86_ESPFIX32 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: PT_OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. @@ -548,6 +551,7 @@ restore_all_notrace: cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS +#endif restore_nocheck: RESTORE_REGS 4 # skip orig_eax/error_code irq_return: @@ -560,13 +564,9 @@ ENTRY(iret_exc) .previous _ASM_EXTABLE(irq_return,iret_exc) +#ifdef CONFIG_X86_ESPFIX32 CFI_RESTORE_STATE ldt_ss: - larl PT_OLDSS(%esp), %eax - jnz restore_nocheck - testl $0x00400000, %eax # returning to 32bit stack? - jnz restore_nocheck # allright, normal return - #ifdef CONFIG_PARAVIRT /* * The kernel can't run on a non-flat stack if paravirt mode @@ -608,6 +608,7 @@ ldt_ss: lss (%esp), %esp /* switch to espfix segment */ CFI_ADJUST_CFA_OFFSET -8 jmp restore_nocheck +#endif CFI_ENDPROC ENDPROC(system_call) @@ -698,8 +699,13 @@ syscall_fault: END(syscall_fault) syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace + movl $-ENOSYS,%eax + jmp syscall_after_call +END(syscall_badsys) + +sysenter_badsys: + movl $-ENOSYS,%eax + jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC /* @@ -715,6 +721,7 @@ END(syscall_badsys) * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. */ +#ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ @@ -724,8 +731,10 @@ END(syscall_badsys) pushl_cfi %eax lss (%esp), %esp /* switch to the normal stack segment */ CFI_ADJUST_CFA_OFFSET -8 +#endif .endm .macro UNWIND_ESPFIX_STACK +#ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax @@ -736,6 +745,7 @@ END(syscall_badsys) /* switch to normal stack */ FIXUP_ESPFIX_STACK 27: +#endif .endm /* @@ -1356,11 +1366,13 @@ END(debug) ENTRY(nmi) RING0_INT_FRAME ASM_CLAC +#ifdef CONFIG_X86_ESPFIX32 pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax popl_cfi %eax je nmi_espfix_stack +#endif cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup pushl_cfi %eax @@ -1400,6 +1412,7 @@ nmi_debug_stack_check: FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct +#ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * @@ -1421,6 +1434,7 @@ nmi_espfix_stack: lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 jmp irq_return +#endif CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index abca4f4..8feab72 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -58,6 +58,7 @@ #include <asm/asm.h> #include <asm/context_tracking.h> #include <asm/smap.h> +#include <asm/pgtable_types.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -1041,32 +1042,52 @@ restore_args: irq_return: INTERRUPT_RETURN - _ASM_EXTABLE(irq_return, bad_iret) -#ifdef CONFIG_PARAVIRT ENTRY(native_iret) - iretq - _ASM_EXTABLE(native_iret, bad_iret) + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ +#ifdef CONFIG_X86_ESPFIX64 + testb $4,(SS-RIP)(%rsp) + jnz native_irq_return_ldt #endif - .section .fixup,"ax" -bad_iret: +.global native_irq_return_iret +native_irq_return_iret: /* - * The iret traps when the %cs or %ss being restored is bogus. - * We've lost the original trap vector and error code. - * #GPF is the most likely one to get for an invalid selector. - * So pretend we completed the iret and took the #GPF in user mode. - * - * We are now running with the kernel GS after exception recovery. - * But error_entry expects us to have user GS to match the user %cs, - * so swap back. + * This may fault. Non-paranoid faults on return to userspace are + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. + * Double-faults due to espfix64 are handled in do_double_fault. + * Other faults here are fatal. */ - pushq $0 + iretq +#ifdef CONFIG_X86_ESPFIX64 +native_irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi SWAPGS - jmp general_protection - - .previous + movq PER_CPU_VAR(espfix_waddr),%rdi + movq %rax,(0*8)(%rdi) /* RAX */ + movq (2*8)(%rsp),%rax /* RIP */ + movq %rax,(1*8)(%rdi) + movq (3*8)(%rsp),%rax /* CS */ + movq %rax,(2*8)(%rdi) + movq (4*8)(%rsp),%rax /* RFLAGS */ + movq %rax,(3*8)(%rdi) + movq (6*8)(%rsp),%rax /* SS */ + movq %rax,(5*8)(%rdi) + movq (5*8)(%rsp),%rax /* RSP */ + movq %rax,(4*8)(%rdi) + andl $0xffff0000,%eax + popq_cfi %rdi + orq PER_CPU_VAR(espfix_stack),%rax + SWAPGS + movq %rax,%rsp + popq_cfi %rax + jmp native_irq_return_iret +#endif /* edi: workmask, edx: work */ retint_careful: @@ -1118,9 +1139,9 @@ ENTRY(retint_kernel) call preempt_schedule_irq jmp exit_intr #endif - CFI_ENDPROC END(common_interrupt) + /* * End of kprobes section */ @@ -1483,7 +1504,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK -paranoiderrorentry stack_segment do_stack_segment +errorentry stack_segment do_stack_segment #ifdef CONFIG_XEN zeroentry xen_debug do_debug zeroentry xen_int3 do_int3 @@ -1593,16 +1614,15 @@ error_sti: /* * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. */ error_kernelspace: incl %ebx - leaq irq_return(%rip),%rcx + leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) - je error_swapgs + je error_bad_iret movl %ecx,%eax /* zero extend */ cmpq %rax,RIP+8(%rsp) je bstep_iret @@ -1613,7 +1633,15 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - jmp error_swapgs + /* fall through */ + +error_bad_iret: + SWAPGS + mov %rsp,%rdi + call fixup_bad_iret + mov %rax,%rsp + decl %ebx /* Return to usergs */ + jmp error_sti CFI_ENDPROC END(error_entry) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c new file mode 100644 index 0000000..94d857f --- /dev/null +++ b/arch/x86/kernel/espfix_64.c @@ -0,0 +1,208 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2014 Intel Corporation; author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * ----------------------------------------------------------------------- */ + +/* + * The IRET instruction, when returning to a 16-bit segment, only + * restores the bottom 16 bits of the user space stack pointer. This + * causes some 16-bit software to break, but it also leaks kernel state + * to user space. + * + * This works around this by creating percpu "ministacks", each of which + * is mapped 2^16 times 64K apart. When we detect that the return SS is + * on the LDT, we copy the IRET frame to the ministack and use the + * relevant alias to return to userspace. The ministacks are mapped + * readonly, so if the IRET fault we promote #GP to #DF which is an IST + * vector and thus has its own stack; we then do the fixup in the #DF + * handler. + * + * This file sets up the ministacks and the related page tables. The + * actual ministack invocation is in entry_64.S. + */ + +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/random.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/setup.h> +#include <asm/espfix.h> + +/* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round + * it up to a cache line to avoid unnecessary sharing. + */ +#define ESPFIX_STACK_SIZE (8*8UL) +#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) + +/* There is address space for how many espfix pages? */ +#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) + +#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) +#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS +# error "Need more than one PGD for the ESPFIX hack" +#endif + +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + +/* This contains the *bottom* address of the espfix stack */ +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +/* Initialization mutex - should this be a spinlock? */ +static DEFINE_MUTEX(espfix_init_mutex); + +/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ +#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) +static void *espfix_pages[ESPFIX_MAX_PAGES]; + +static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] + __aligned(PAGE_SIZE); + +static unsigned int page_random, slot_random; + +/* + * This returns the bottom address of the espfix stack for a specific CPU. + * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case + * we have to account for some amount of padding at the end of each page. + */ +static inline unsigned long espfix_base_addr(unsigned int cpu) +{ + unsigned long page, slot; + unsigned long addr; + + page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; + slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; + addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); + addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); + addr += ESPFIX_BASE_ADDR; + return addr; +} + +#define PTE_STRIDE (65536/PAGE_SIZE) +#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) +#define ESPFIX_PMD_CLONES PTRS_PER_PMD +#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) + +#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) + +static void init_espfix_random(void) +{ + unsigned long rand; + + /* + * This is run before the entropy pools are initialized, + * but this is hopefully better than nothing. + */ + if (!arch_get_random_long(&rand)) { + /* The constant is an arbitrary large prime */ + rdtscll(rand); + rand *= 0xc345c6b72fd16123UL; + } + + slot_random = rand % ESPFIX_STACKS_PER_PAGE; + page_random = (rand / ESPFIX_STACKS_PER_PAGE) + & (ESPFIX_PAGE_SPACE - 1); +} + +void __init init_espfix_bsp(void) +{ + pgd_t *pgd_p; + pteval_t ptemask; + + ptemask = __supported_pte_mask; + + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); + + /* Randomize the locations */ + init_espfix_random(); + + /* The rest is the same as for any other processor */ + init_espfix_ap(); +} + +void init_espfix_ap(void) +{ + unsigned int cpu, page; + unsigned long addr; + pud_t pud, *pud_p; + pmd_t pmd, *pmd_p; + pte_t pte, *pte_p; + int n; + void *stack_page; + pteval_t ptemask; + + /* We only have to do this once... */ + if (likely(this_cpu_read(espfix_stack))) + return; /* Already initialized */ + + cpu = smp_processor_id(); + addr = espfix_base_addr(cpu); + page = cpu/ESPFIX_STACKS_PER_PAGE; + + /* Did another CPU already set this up? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (likely(stack_page)) + goto done; + + mutex_lock(&espfix_init_mutex); + + /* Did we race on the lock? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (stack_page) + goto unlock_done; + + ptemask = __supported_pte_mask; + + pud_p = &espfix_pud_page[pud_index(addr)]; + pud = *pud_p; + if (!pud_present(pud)) { + pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PUD_CLONES; n++) + set_pud(&pud_p[n], pud); + } + + pmd_p = pmd_offset(&pud, addr); + pmd = *pmd_p; + if (!pmd_present(pmd)) { + pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PMD_CLONES; n++) + set_pmd(&pmd_p[n], pmd); + } + + pte_p = pte_offset_kernel(&pmd, addr); + stack_page = (void *)__get_free_page(GFP_KERNEL); + pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); + for (n = 0; n < ESPFIX_PTE_CLONES; n++) + set_pte(&pte_p[n*PTE_STRIDE], pte); + + /* Job is done for this CPU and any CPU which shares this page */ + ACCESS_ONCE(espfix_pages[page]) = stack_page; + +unlock_done: + mutex_unlock(&espfix_init_mutex); +done: + this_cpu_write(espfix_stack, addr); + this_cpu_write(espfix_waddr, (unsigned long)stack_page + + (addr & ~PAGE_MASK)); +} diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e625319..f8ab203 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -297,16 +297,7 @@ int ftrace_int3_handler(struct pt_regs *regs) static int ftrace_write(unsigned long ip, const char *val, int size) { - /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); + ip = text_ip_addr(ip); return probe_kernel_write((void *)ip, val, size); } @@ -659,8 +650,8 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, ret = -EPERM; goto out; } - run_sync(); out: + run_sync(); return ret; fail_update: diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 22d0687..3910078 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -262,6 +262,83 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU + +/* These two declarations are only used in check_irq_vectors_for_cpu_disable() + * below, which is protected by stop_machine(). Putting them on the stack + * results in a stack frame overflow. Dynamically allocating could result in a + * failure so declare these two cpumasks as global. + */ +static struct cpumask affinity_new, online_new; + +/* + * This cpu is going to be removed and its vectors migrated to the remaining + * online cpus. Check to see if there are enough vectors in the remaining cpus. + * This function is protected by stop_machine(). + */ +int check_irq_vectors_for_cpu_disable(void) +{ + int irq, cpu; + unsigned int this_cpu, vector, this_count, count; + struct irq_desc *desc; + struct irq_data *data; + + this_cpu = smp_processor_id(); + cpumask_copy(&online_new, cpu_online_mask); + cpu_clear(this_cpu, online_new); + + this_count = 0; + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + irq = __this_cpu_read(vector_irq[vector]); + if (irq >= 0) { + desc = irq_to_desc(irq); + data = irq_desc_get_irq_data(desc); + cpumask_copy(&affinity_new, data->affinity); + cpu_clear(this_cpu, affinity_new); + + /* Do not count inactive or per-cpu irqs. */ + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + continue; + + /* + * A single irq may be mapped to multiple + * cpu's vector_irq[] (for example IOAPIC cluster + * mode). In this case we have two + * possibilities: + * + * 1) the resulting affinity mask is empty; that is + * this the down'd cpu is the last cpu in the irq's + * affinity mask, or + * + * 2) the resulting affinity mask is no longer + * a subset of the online cpus but the affinity + * mask is not zero; that is the down'd cpu is the + * last online cpu in a user set affinity mask. + */ + if (cpumask_empty(&affinity_new) || + !cpumask_subset(&affinity_new, &online_new)) + this_count++; + } + } + + count = 0; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] < 0) + count++; + } + } + + if (count < this_count) { + pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", + this_cpu, this_count, count); + return -ERANGE; + } + return 0; +} + /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 79a3f96..a1f5b18 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1017,6 +1017,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); + + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer to get messed up. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); return 1; } @@ -1042,24 +1051,25 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); struct jprobe *jp = container_of(p, struct jprobe, kp); + void *saved_sp = kcb->jprobe_saved_sp; if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { + if (stack_addr(regs) != saved_sp) { struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; printk(KERN_ERR "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); + stack_addr(regs), saved_sp); printk(KERN_ERR "Saved registers for jprobe %p\n", jp); show_regs(saved_regs); printk(KERN_ERR "Current registers\n"); show_regs(regs); BUG(); } + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); + memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f022c54..e725933 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -280,7 +280,14 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1570e07..23457e5 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -262,7 +262,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ebc9873..c37886d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -229,6 +229,11 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out_unlock; + } + fill_ldt(&ldt, &ldt_info); if (oldmode) ldt.avl = 0; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34..a1da673 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50..0686fe3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,15 +1441,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 -#endif - /* * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. @@ -1487,7 +1478,7 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) + if (is_ia32_task()) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 2a26819..80eab01 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail) void arch_remove_reservations(struct resource *avail) { - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + /* + * Trim out BIOS area (high 2MB) and E820 regions. We do not remove + * the low 1MB unconditionally, as this area is needed for some ISA + * cards requiring a memory range, e.g. the i82365 PCMCIA controller. + */ if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); remove_e820_regions(avail); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ecfe089..2094434 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -673,6 +673,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * handler too. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); + /* + * Ensure the signal handler starts with the new fpu state. + */ + if (used_math()) + drop_init_fpu(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6cacab6..9ccb7ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -265,6 +265,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(); +#endif + + /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. @@ -1278,6 +1285,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; @@ -1310,6 +1320,12 @@ void cpu_disable_common(void) int native_cpu_disable(void) { + int ret; + + ret = check_irq_vectors_for_cpu_disable(); + if (ret) + return ret; + clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index f7fec09..7fc5e84 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -27,6 +27,58 @@ static int get_free_idx(void) return -ESRCH; } +static bool tls_desc_okay(const struct user_desc *info) +{ + /* + * For historical reasons (i.e. no one ever documented how any + * of the segmentation APIs work), user programs can and do + * assume that a struct user_desc that's all zeros except for + * entry_number means "no segment at all". This never actually + * worked. In fact, up to Linux 3.19, a struct user_desc like + * this would create a 16-bit read-write segment with base and + * limit both equal to zero. + * + * That was close enough to "no segment at all" until we + * hardened this function to disallow 16-bit TLS segments. Fix + * it up by interpreting these zeroed segments the way that they + * were almost certainly intended to be interpreted. + * + * The correct way to ask for "no segment at all" is to specify + * a user_desc that satisfies LDT_empty. To keep everything + * working, we accept both. + * + * Note that there's a similar kludge in modify_ldt -- look at + * the distinction between modes 1 and 0x11. + */ + if (LDT_empty(info) || LDT_zero(info)) + return true; + + /* + * espfix is required for 16-bit data segments, but espfix + * only works for LDT segments. + */ + if (!info->seg_32bit) + return false; + + /* Only allow data segments in the TLS array. */ + if (info->contents > 1) + return false; + + /* + * Non-present segments with DPL 3 present an interesting attack + * surface. The kernel should handle such segments correctly, + * but TLS is very difficult to protect in a sandbox, so prevent + * such segments from being created. + * + * If userspace needs to remove a TLS entry, it can still delete + * it outright. + */ + if (info->seg_not_present) + return false; + + return true; +} + static void set_tls_desc(struct task_struct *p, int idx, const struct user_desc *info, int n) { @@ -40,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info)) + if (LDT_empty(info) || LDT_zero(info)) desc->a = desc->b = 0; else fill_ldt(desc, info); @@ -66,6 +118,9 @@ int do_set_thread_area(struct task_struct *p, int idx, if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + if (!tls_desc_okay(&info)) + return -EINVAL; + if (idx == -1) idx = info.entry_number; @@ -192,6 +247,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, { struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; + int i; if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || @@ -205,6 +261,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, else info = infobuf; + for (i = 0; i < count / sizeof(struct user_desc); i++) + if (!tls_desc_okay(info + i)) + return -EINVAL; + set_tls_desc(target, GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), info, count / sizeof(struct user_desc)); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6663bb5..d249814 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -235,33 +235,41 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -#endif DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - enum ctx_state prev_state; - - prev_state = exception_enter(); - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - conditional_sti_ist(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - conditional_cli_ist(regs); - } - exception_exit(prev_state); -} - dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_X86_ESPFIX64 + extern unsigned char native_irq_return_iret[]; + + /* + * If IRET takes a non-IST fault on the espfix64 stack, then we + * end up promoting it to a doublefault. In that case, modify + * the stack to make it look like we just entered the #GP + * handler from user space, similar to bad_iret. + */ + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { + struct pt_regs *normal_regs = task_pt_regs(current); + + /* Fake a #GP(0) from userspace. */ + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + regs->ip = (unsigned long)general_protection; + regs->sp = (unsigned long)&normal_regs->orig_ax; + return; + } +#endif + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -375,7 +383,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage notrace __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -394,6 +402,35 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } + +struct bad_iret_stack { + void *error_entry_ret; + struct pt_regs regs; +}; + +asmlinkage __visible notrace __kprobes +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +{ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault + * correctly, we want move our stack frame to task_pt_regs + * and we want to pretend that the exception came from the + * iret target. + */ + struct bad_iret_stack *new_stack = + container_of(task_pt_regs(current), + struct bad_iret_stack, regs); + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ + memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + + BUG_ON(!user_mode_vm(&new_stack->regs)); + return new_stack; +} #endif /* @@ -766,7 +803,7 @@ void __init trap_init(void) set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun); set_intr_gate(X86_TRAP_TS, &invalid_TSS); set_intr_gate(X86_TRAP_NP, &segment_not_present); - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); + set_intr_gate(X86_TRAP_SS, stack_segment); set_intr_gate(X86_TRAP_GP, &general_protection); set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug); set_intr_gate(X86_TRAP_MF, &coprocessor_error); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 930e5d4..cefe57c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -386,7 +386,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - pr_err("Fast TSC calibration failed\n"); + pr_info("Fast TSC calibration failed\n"); return 0; success: @@ -974,14 +974,17 @@ void __init tsc_init(void) x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) + if (!cpu_has_tsc) { + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; + } tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1f96f93..09ce23a 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -125,10 +125,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, if (!show_unhandled_signals) return; - pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, current->comm, task_pid_nr(current), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 422fd82..f5869fc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -268,8 +268,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_init_fpu(tsk); /* trigger finit */ - return 0; } @@ -399,8 +397,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_eager_fpu()) + if (use_eager_fpu()) { + preempt_disable(); math_state_restore(); + preempt_enable(); + } return err; } else { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 92e6f4a..ab1d459 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -498,11 +498,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); } -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) -{ - register_address_increment(ctxt, &ctxt->_eip, rel); -} - static u32 desc_limit_scaled(struct desc_struct *desc) { u32 limit = get_desc_limit(desc); @@ -576,6 +571,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, + int cs_l) +{ + switch (ctxt->op_bytes) { + case 2: + ctxt->_eip = (u16)dst; + break; + case 4: + ctxt->_eip = (u32)dst; + break; + case 8: + if ((cs_l && is_noncanonical_address(dst)) || + (!cs_l && (dst & ~(u32)-1))) + return emulate_gp(ctxt, 0); + ctxt->_eip = dst; + break; + default: + WARN(1, "unsupported eip assignment size\n"); + } + return X86EMUL_CONTINUE; +} + +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); +} + +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +{ + return assign_eip_near(ctxt, ctxt->_eip + rel); +} + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) { u16 selector; @@ -1964,13 +1991,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) case 2: /* call near abs */ { long int old_eip; old_eip = ctxt->_eip; - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + break; ctxt->src.val = old_eip; rc = em_push(ctxt); break; } case 4: /* jmp abs */ - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); break; case 5: /* jmp far */ rc = em_jmp_far(ctxt); @@ -2002,16 +2031,21 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - return em_pop(ctxt); + int rc; + unsigned long eip; + + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + + return assign_eip_near(ctxt, eip); } static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long cs; + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2021,6 +2055,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } @@ -2279,7 +2316,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; - u64 msr_data; + u64 msr_data, rcx, rdx; int usermode; u16 cs_sel = 0, ss_sel = 0; @@ -2295,6 +2332,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) else usermode = X86EMUL_MODE_PROT32; + rcx = reg_read(ctxt, VCPU_REGS_RCX); + rdx = reg_read(ctxt, VCPU_REGS_RDX); + cs.dpl = 3; ss.dpl = 3; ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); @@ -2312,6 +2352,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; + if (is_noncanonical_address(rcx) || + is_noncanonical_address(rdx)) + return emulate_gp(ctxt, 0); break; } cs_sel |= SELECTOR_RPL_MASK; @@ -2320,8 +2363,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); - *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); + ctxt->_eip = rdx; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; return X86EMUL_CONTINUE; } @@ -2860,10 +2903,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) static int em_call(struct x86_emulate_ctxt *ctxt) { + int rc; long rel = ctxt->src.val; ctxt->src.val = (unsigned long)ctxt->_eip; - jmp_rel(ctxt, rel); + rc = jmp_rel(ctxt, rel); + if (rc != X86EMUL_CONTINUE) + return rc; return em_push(ctxt); } @@ -2895,11 +2941,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) { int rc; + unsigned long eip; - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_near(ctxt, eip); if (rc != X86EMUL_CONTINUE) return rc; rsp_increment(ctxt, ctxt->src.val); @@ -3189,20 +3236,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_jcxz(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_in(struct x86_emulate_ctxt *ctxt) @@ -4554,7 +4605,7 @@ special_insn: break; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x8d: /* lea r16/r32, m */ ctxt->dst.val = ctxt->src.addr.mem.ea; @@ -4583,7 +4634,7 @@ special_insn: break; case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); ctxt->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ @@ -4703,7 +4754,7 @@ twobyte_insn: break; case 0x80 ... 0x8f: /* jnz rel, etc*/ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x90 ... 0x9f: /* setcc r/m8 */ ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 518d864..298781d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) return; timer = &pit->pit_state.timer; + mutex_lock(&pit->pit_state.lock); if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + mutex_unlock(&pit->pit_state.lock); } static void destroy_pit_timer(struct kvm_pit *pit) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d86ff15..92bbb39 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { + /* Note that we never get here with APIC virtualization enabled. */ + if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) apic->highest_isr_cache = vec; } +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int result; + + /* + * Note that isr_count is always 1, and highest_isr_cache + * is always -1, with APIC virtualization enabled. + */ + if (!apic->isr_count) + return -1; + if (likely(apic->highest_isr_cache != -1)) + return apic->highest_isr_cache; + + result = find_highest_vector(apic->regs + APIC_ISR); + ASSERT(result == -1 || result >= 16); + + return result; +} + static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) { - if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + struct kvm_vcpu *vcpu; + if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + return; + + vcpu = apic->vcpu; + + /* + * We do get here for APIC virtualization enabled if the guest + * uses the Hyper-V APIC enlightenment. In this case we may need + * to trigger a new interrupt delivery by writing the SVI field; + * on the other hand isr_count and highest_isr_cache are unused + * and must be left alone. + */ + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, + apic_find_highest_isr(apic)); + else { --apic->isr_count; - BUG_ON(apic->isr_count < 0); - apic->highest_isr_cache = -1; + BUG_ON(apic->isr_count < 0); + apic->highest_isr_cache = -1; + } } int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) @@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -static inline int apic_find_highest_isr(struct kvm_lapic *apic) -{ - int result; - - /* Note that isr_count is always 1 with vid enabled */ - if (!apic->isr_count) - return -1; - if (likely(apic->highest_isr_cache != -1)) - return apic->highest_isr_cache; - - result = find_highest_vector(apic->regs + APIC_ISR); - ASSERT(result == -1 || result >= 16); - - return result; -} - void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; + /* Note that we never get here with APIC virtualization enabled. */ + if (vector == -1) return -1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 74dd129..8ad01b4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -198,16 +198,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); /* - * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, - * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation - * number. + * the low bit of the generation number is always presumed to be zero. + * This disables mmio caching during memslot updates. The concept is + * similar to a seqcount but instead of retrying the access we just punt + * and ignore the cache. + * + * spte bits 3-11 are used as bits 1-9 of the generation number, + * the bits 52-61 are used as bits 10-19 of the generation number. */ -#define MMIO_SPTE_GEN_LOW_SHIFT 3 +#define MMIO_SPTE_GEN_LOW_SHIFT 2 #define MMIO_SPTE_GEN_HIGH_SHIFT 52 -#define MMIO_GEN_SHIFT 19 -#define MMIO_GEN_LOW_SHIFT 9 -#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) +#define MMIO_GEN_SHIFT 20 +#define MMIO_GEN_LOW_SHIFT 10 +#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) @@ -3161,7 +3165,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - vcpu_clear_mmio_info(vcpu, ~0ul); + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -4424,8 +4428,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { - printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); + if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { + printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 612c717..5dcdff5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3204,7 +3204,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) msr.host_initiated = false; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - if (svm_set_msr(&svm->vcpu, &msr)) { + if (kvm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); } else { @@ -3486,9 +3486,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_code; - return 0; + WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; } return svm_exit_handlers[exit_code](svm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 59181e6..c7663b1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2540,12 +2540,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; msr = find_msr_entry(vmx, msr_index); if (msr) { + u64 old_msr_data = msr->data; msr->data = data; if (msr - vmx->guest_msrs < vmx->save_nmsrs) { preempt_disable(); - kvm_set_shared_msr(msr->index, msr->data, - msr->mask); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); preempt_enable(); + if (ret) + msr->data = old_msr_data; } break; } @@ -5113,7 +5116,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) msr.data = data; msr.index = ecx; msr.host_initiated = false; - if (vmx_set_msr(vcpu, &msr) != 0) { + if (kvm_set_msr(vcpu, &msr) != 0) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(vcpu, 0); return 1; @@ -6385,6 +6388,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) return 1; } +static int handle_invvpid(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6430,6 +6439,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_INVEPT] = handle_invept, + [EXIT_REASON_INVVPID] = handle_invvpid, }; static const int kvm_vmx_max_exit_handlers = @@ -6656,7 +6666,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: - case EXIT_REASON_INVEPT: + case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! @@ -6812,10 +6822,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - vcpu->run->hw.hardware_exit_reason = exit_reason; + WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; } - return 0; } static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1b7b17..14915fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -225,20 +225,25 @@ static void kvm_shared_msr_cpu_online(void) shared_msr_update(i, shared_msrs_global.msrs[i]); } -void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) +int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); + int err; if (((value ^ smsr->values[slot].curr) & mask) == 0) - return; + return 0; smsr->values[slot].curr = value; - wrmsrl(shared_msrs_global.msrs[slot], value); + err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); + if (err) + return 1; + if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); smsr->registered = true; } + return 0; } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); @@ -910,7 +915,6 @@ void kvm_enable_efer_bits(u64 mask) } EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); - /* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -918,8 +922,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); */ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { + switch (msr->index) { + case MSR_FS_BASE: + case MSR_GS_BASE: + case MSR_KERNEL_GS_BASE: + case MSR_CSTAR: + case MSR_LSTAR: + if (is_noncanonical_address(msr->data)) + return 1; + break; + case MSR_IA32_SYSENTER_EIP: + case MSR_IA32_SYSENTER_ESP: + /* + * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if + * non-canonical address is written on Intel but not on + * AMD (which ignores the top 32-bits, because it does + * not implement 64-bit SYSENTER). + * + * 64-bit code should hence be able to write a non-canonical + * value on AMD. Making the address canonical ensures that + * vmentry does not fail on Intel after writing a non-canonical + * value, and that something deterministic happens if the guest + * invokes 64-bit SYSENTER. + */ + msr->data = get_canonical(msr->data); + } return kvm_x86_ops->set_msr(vcpu, msr); } +EXPORT_SYMBOL_GPL(kvm_set_msr); /* * Adapt set_msr() to msr_io()'s calling convention @@ -1073,7 +1103,6 @@ static inline u64 get_kernel_ns(void) { struct timespec ts; - WARN_ON(preemptible()); ktime_get_ts(&ts); monotonic_to_bootbased(&ts); return timespec_to_ns(&ts); @@ -4842,7 +4871,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3186542..7626d3e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -78,15 +78,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; + vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; +} + +static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; } /* - * Clear the mmio cache info for the given gva, - * specially, if gva is ~0ul, we clear all mmio cache info. + * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we + * clear all mmio cache info. */ +#define MMIO_GVA_ANY (~(gva_t)0) + static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) { - if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) + if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) return; vcpu->arch.mmio_gva = 0; @@ -94,7 +102,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) { - if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && + vcpu->arch.mmio_gva == (gva & PAGE_MASK)) return true; return false; @@ -102,7 +111,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && + vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) return true; return false; diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a..3620928 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,11 +30,13 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + unsigned long lines; }; struct addr_marker { unsigned long start_address; const char *name; + unsigned long max_lines; }; /* indices for address_markers; keep sync'd w/ address_markers below */ @@ -45,6 +47,7 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, + ESPFIX_START_NR, HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -67,6 +70,7 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, + { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, @@ -163,7 +167,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, pgprot_t new_prot, int level) { pgprotval_t prot, cur; - static const char units[] = "KMGTPE"; + static const char units[] = "BKMGTPE"; /* * If we have a "break" in the series, we need to flush the state that @@ -178,6 +182,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; + st->lines = 0; seq_printf(m, "---[ %s ]---\n", st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { @@ -188,17 +193,21 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); - - delta = (st->current_address - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; + if (!st->marker->max_lines || + st->lines < st->marker->max_lines) { + seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); + + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level); } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); + st->lines++; /* * We print markers for special areas of address space, diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 9d980d8..fa029fb 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, { return NULL; } - -int pmd_huge_support(void) -{ - return 0; -} #else struct page * @@ -80,11 +75,6 @@ int pud_huge(pud_t pud) { return !!(pud_val(pud) & _PAGE_PSE); } - -int pmd_huge_support(void) -{ - return 1; -} #endif /* x86_64 also uses this file */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a..b599241 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1110,7 +1110,7 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1121,7 +1121,16 @@ void mark_rodata_ro(void) /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. + * + * We align all_end to PMD_SIZE because the existing mapping + * is a full PMD. If we would align _brk_end to PAGE_SIZE we + * split the PMD and the reminder between _brk_end and the end + * of the PMD will remain mapped executable. + * + * Any PMD which was setup after the one which covers _brk_end + * has been zapped already via cleanup_highmem(). */ + all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 799580c..94bd247 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -50,6 +50,21 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; ++i) + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + + WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn); + + return 0; +} + /* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses @@ -93,14 +108,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't allow anybody to remap normal RAM that we're using.. */ + pfn = phys_addr >> PAGE_SHIFT; last_pfn = last_addr >> PAGE_SHIFT; - for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { - int is_ram = page_is_ram(pfn); - - if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) - return NULL; - WARN_ON_ONCE(is_ram); - } + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) + return NULL; /* * Mappings have to be page-aligned diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480..aabdf76 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -389,7 +389,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; - phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; return (phys_addr | offset); } EXPORT_SYMBOL_GPL(slow_virt_to_phys); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfa537a..5da29d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -386,13 +386,20 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - int young; - - young = ptep_test_and_clear_young(vma, address, ptep); - if (young) - flush_tlb_page(vma, address); - - return young; + /* + * On x86 CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ + return ptep_test_and_clear_young(vma, address, ptep); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ae699b3..dd8dda1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -103,7 +103,7 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) local_flush_tlb(); @@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, info.flush_start = start; info.flush_end = end; - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (is_uv_system()) { unsigned int cpu; @@ -151,44 +151,19 @@ void flush_tlb_current_task(void) preempt_disable(); - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); preempt_enable(); } -/* - * It can find out the THP large page, or - * HUGETLB page in tlb_flush when THP disabled - */ -static inline unsigned long has_large_page(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = ALIGN(start, HPAGE_SIZE); - for (; addr < end; addr += HPAGE_SIZE) { - pgd = pgd_offset(mm, addr); - if (likely(!pgd_none(*pgd))) { - pud = pud_offset(pgd, addr); - if (likely(!pud_none(*pud))) { - pmd = pmd_offset(pud, addr); - if (likely(!pmd_none(*pmd))) - if (pmd_large(*pmd)) - return addr; - } - } - } - return 0; -} - void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { unsigned long addr; unsigned act_entries, tlb_entries = 0; + unsigned long nr_base_pages; preempt_disable(); if (current->active_mm != mm) @@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, tlb_entries = tlb_lli_4k[ENTRIES]; else tlb_entries = tlb_lld_4k[ENTRIES]; + /* Assume all of TLB entries was occupied by this task */ - act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; + act_entries = tlb_entries >> tlb_flushall_shift; + act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; + nr_base_pages = (end - start) >> PAGE_SHIFT; /* tlb_flushall_shift is on balance point, details in commit log */ - if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + if (nr_base_pages > act_entries) { + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { - if (has_large_page(mm, start, end)) { - local_flush_tlb(); - goto flush_all; - } /* flush range by one by one 'invlpg' */ for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) static void do_flush_tlb_all(void *info) { - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); @@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index db6b1ab..96a159a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res, return start; if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; + } else if (res->flags & IORESOURCE_MEM) { + /* The low 1MB range is reserved for ISA cards */ + if (start < BIOS_END) + start = BIOS_END; } return start; } diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65d..63a8993 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -212,10 +212,10 @@ 203 common sched_setaffinity sys_sched_setaffinity 204 common sched_getaffinity sys_sched_getaffinity 205 64 set_thread_area -206 common io_setup sys_io_setup +206 64 io_setup sys_io_setup 207 common io_destroy sys_io_destroy 208 common io_getevents sys_io_getevents -209 common io_submit sys_io_submit +209 64 io_submit sys_io_submit 210 common io_cancel sys_io_cancel 211 64 get_thread_area 212 common lookup_dcookie sys_lookup_dcookie @@ -356,3 +356,5 @@ 540 x32 process_vm_writev compat_sys_process_vm_writev 541 x32 setsockopt compat_sys_setsockopt 542 x32 getsockopt compat_sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 531d426..bd16d6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -34,7 +34,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index f2f0723..9578308 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -46,7 +46,7 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e875..ab6ba35 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -117,30 +117,45 @@ subsys_initcall(init_vdso); struct linux_binprm; -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits */ +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ static unsigned long vdso_addr(unsigned long start, unsigned len) { unsigned long addr, end; unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; if (end >= TASK_SIZE_MAX) end = TASK_SIZE_MAX; end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } /* - * page-align it here so that get_unmapped_area doesn't - * align it wrongfully again to the next page. addr can come in 4K - * unaligned here as a result of stack start randomization. + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. */ - addr = PAGE_ALIGN(addr); addr = align_vdso_addr(addr); return addr; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index be6b860..ba81b54 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -274,7 +274,7 @@ void __init xen_init_spinlocks(void) printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); return; } - + printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } @@ -290,6 +290,9 @@ static __init int xen_init_spinlocks_jump(void) if (!xen_pvspin) return 0; + if (!xen_domain()) + return 0; + static_key_slow_inc(¶virt_ticketlocks_enabled); return 0; } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index ee36589..90bfa52 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -445,7 +445,7 @@ void xen_setup_timer(int cpu) irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_DISABLED|IRQF_PERCPU| IRQF_NOBALANCING|IRQF_TIMER| - IRQF_FORCE_RESUME, + IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, name, NULL); memcpy(evt, xen_clockevent, sizeof(*evt)); diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 0fdf5d0..4651cb9 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -67,7 +67,12 @@ #define VMALLOC_START 0xC0000000 #define VMALLOC_END 0xC7FEFFFF #define TLBTEMP_BASE_1 0xC7FF0000 -#define TLBTEMP_BASE_2 0xC7FF8000 +#define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) +#if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE +#define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) +#else +#define TLBTEMP_SIZE ICACHE_WAY_SIZE +#endif /* * For the Xtensa architecture, the PTE layout is as follows: diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index fd686dc..c7211e7 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -52,7 +52,12 @@ */ .macro get_fs ad, sp GET_CURRENT(\ad,\sp) +#if THREAD_CURRENT_DS > 1020 + addi \ad, \ad, TASK_THREAD + l32i \ad, \ad, THREAD_CURRENT_DS - TASK_THREAD +#else l32i \ad, \ad, THREAD_CURRENT_DS +#endif .endm /* diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h index b4cb110..a47909f 100644 --- a/arch/xtensa/include/uapi/asm/ioctls.h +++ b/arch/xtensa/include/uapi/asm/ioctls.h @@ -28,17 +28,17 @@ #define TCSETSW 0x5403 #define TCSETSF 0x5404 -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x80127417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x40127418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x40127419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x4012741C /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) -#define TIOCSWINSZ _IOW('t', 103, struct winsize) -#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSWINSZ 0x40087467 /* _IOW('t', 103, struct winsize) */ +#define TIOCGWINSZ 0x80087468 /* _IOR('t', 104, struct winsize) */ #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ #define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ #define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ @@ -88,7 +88,6 @@ #define TIOCSETD _IOW('T', 35, int) #define TIOCGETD _IOR('T', 36, int) #define TCSBRKP _IOW('T', 37, int) /* Needed for POSIX tcsendbreak()*/ -#define TIOCTTYGSTRUCT _IOR('T', 38, struct tty_struct) /* For debugging only*/ #define TIOCSBRK _IO('T', 39) /* BSD compatibility */ #define TIOCCBRK _IO('T', 40) /* BSD compatibility */ #define TIOCGSID _IOR('T', 41, pid_t) /* Return the session ID of FD*/ @@ -114,8 +113,10 @@ #define TIOCSERGETLSR _IOR('T', 89, unsigned int) /* Get line status reg. */ /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ # define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#define TIOCSERGETMULTI _IOR('T', 90, struct serial_multiport_struct) /* Get multiport config */ -#define TIOCSERSETMULTI _IOW('T', 91, struct serial_multiport_struct) /* Set multiport config */ +#define TIOCSERGETMULTI 0x80a8545a /* Get multiport config */ + /* _IOR('T', 90, struct serial_multiport_struct) */ +#define TIOCSERSETMULTI 0x40a8545b /* Set multiport config */ + /* _IOW('T', 91, struct serial_multiport_struct) */ #define TIOCMIWAIT _IO('T', 92) /* wait for a change on serial input line(s) */ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index 51940fe..513effd 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -384,7 +384,8 @@ __SYSCALL(174, sys_chroot, 1) #define __NR_pivot_root 175 __SYSCALL(175, sys_pivot_root, 2) #define __NR_umount 176 -__SYSCALL(176, sys_umount, 2) +__SYSCALL(176, sys_oldumount, 1) +#define __ARCH_WANT_SYS_OLDUMOUNT #define __NR_swapoff 177 __SYSCALL(177, sys_swapoff, 1) #define __NR_sync 178 diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e251..4b8e636 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1001,9 +1001,8 @@ ENTRY(fast_syscall_xtensa) movi a7, 4 # sizeof(unsigned int) access_ok a3, a7, a0, a2, .Leac # a0: scratch reg, a2: sp - addi a6, a6, -1 # assuming SYS_XTENSA_ATOMIC_SET = 1 - _bgeui a6, SYS_XTENSA_COUNT - 1, .Lill - _bnei a6, SYS_XTENSA_ATOMIC_CMP_SWP - 1, .Lnswp + _bgeui a6, SYS_XTENSA_COUNT, .Lill + _bnei a6, SYS_XTENSA_ATOMIC_CMP_SWP, .Lnswp /* Fall through for ATOMIC_CMP_SWP. */ @@ -1015,27 +1014,26 @@ TRY s32i a5, a3, 0 # different, modify value l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, 1 # and return 1 - addi a6, a6, 1 # restore a6 (really necessary?) rfe 1: l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, 0 # return 0 (note that we cannot set - addi a6, a6, 1 # restore a6 (really necessary?) rfe .Lnswp: /* Atomic set, add, and exg_add. */ TRY l32i a7, a3, 0 # orig + addi a6, a6, -SYS_XTENSA_ATOMIC_SET add a0, a4, a7 # + arg moveqz a0, a4, a6 # set + addi a6, a6, SYS_XTENSA_ATOMIC_SET TRY s32i a0, a3, 0 # write new value mov a0, a2 mov a2, a7 l32i a7, a0, PT_AREG7 # restore a7 l32i a0, a0, PT_AREG0 # restore a0 - addi a6, a6, 1 # restore a6 (really necessary?) rfe CATCH @@ -1044,7 +1042,7 @@ CATCH movi a2, -EFAULT rfe -.Lill: l32i a7, a2, PT_AREG0 # restore a7 +.Lill: l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, -EINVAL rfe @@ -1600,7 +1598,7 @@ ENTRY(fast_second_level_miss) rsr a0, excvaddr bltu a0, a3, 2f - addi a1, a0, -(2 << (DCACHE_ALIAS_ORDER + PAGE_SHIFT)) + addi a1, a0, -TLBTEMP_SIZE bgeu a1, a3, 2f /* Check if we have to restore an ITLB mapping. */ @@ -1855,7 +1853,6 @@ ENTRY(_switch_to) entry a1, 16 - mov a10, a2 # preserve 'prev' (a2) mov a11, a3 # and 'next' (a3) l32i a4, a2, TASK_THREAD_INFO @@ -1863,8 +1860,14 @@ ENTRY(_switch_to) save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER - s32i a0, a10, THREAD_RA # save return address - s32i a1, a10, THREAD_SP # save stack pointer +#if THREAD_RA > 1020 || THREAD_SP > 1020 + addi a10, a2, TASK_THREAD + s32i a0, a10, THREAD_RA - TASK_THREAD # save return address + s32i a1, a10, THREAD_SP - TASK_THREAD # save stack pointer +#else + s32i a0, a2, THREAD_RA # save return address + s32i a1, a2, THREAD_SP # save stack pointer +#endif /* Disable ints while we manipulate the stack pointer. */ @@ -1905,7 +1908,6 @@ ENTRY(_switch_to) load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER wsr a14, ps - mov a2, a10 # return 'prev' rsync retw diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 2d9cc6d..e8b76b8 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -49,9 +49,8 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag) /* We currently don't support coherent memory outside KSEG */ - if (ret < XCHAL_KSEG_CACHED_VADDR - || ret >= XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE) - BUG(); + BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || + ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); if (ret != 0) { @@ -68,10 +67,11 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { - long addr=(long)vaddr+XCHAL_KSEG_CACHED_VADDR-XCHAL_KSEG_BYPASS_VADDR; + unsigned long addr = (unsigned long)vaddr + + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; - if (addr < 0 || addr >= XCHAL_KSEG_SIZE) - BUG(); + BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || + addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); free_pages(addr, get_order(size)); } diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index cb8fd44..da0224d 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -376,38 +376,42 @@ _DoubleExceptionVector_WindowOverflow: beqz a2, 1f # if at start of vector, don't restore addi a0, a0, -128 - bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 - bbsi a0, 7, 2f + bbsi.l a0, 8, 1f # don't restore except for overflow 8 and 12 + + /* + * This fixup handler is for the extremely unlikely case where the + * overflow handler's reference thru a0 gets a hardware TLB refill + * that bumps out the (distinct, aliasing) TLB entry that mapped its + * prior references thru a9/a13, and where our reference now thru + * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill). + */ + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + bbsi.l a0, 7, 2f /* * Restore a0 as saved by _WindowOverflow8(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a9, and where our reference now thru a9 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a9, -16 - wsr a2, depc # replace the saved a0 - j 1f + l32e a0, a9, -16 + wsr a0, depc # replace the saved a0 + j 3f 2: /* * Restore a0 as saved by _WindowOverflow12(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a13, and where our reference now thru a13 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a13, -16 - wsr a2, depc # replace the saved a0 + l32e a0, a13, -16 + wsr a0, depc # replace the saved a0 +3: + xsr a3, excsave1 + movi a0, 0 + s32i a0, a3, EXC_TABLE_FIXUP + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE 1: /* * Restore WindowBase while leaving all address registers restored. @@ -449,6 +453,7 @@ _DoubleExceptionVector_WindowOverflow: s32i a0, a2, PT_DEPC +_DoubleExceptionVector_handle_exception: addx4 a0, a0, a3 l32i a0, a0, EXC_TABLE_FAST_USER xsr a3, excsave1 @@ -464,11 +469,120 @@ _DoubleExceptionVector_WindowOverflow: rotw -3 j 1b - .end literal_prefix ENDPROC(_DoubleExceptionVector) /* + * Fixup handler for TLB miss in double exception handler for window owerflow. + * We get here with windowbase set to the window that was being spilled and + * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12 + * (bit set) window. + * + * We do the following here: + * - go to the original window retaining a0 value; + * - set up exception stack to return back to appropriate a0 restore code + * (we'll need to rotate window back and there's no place to save this + * information, use different return address for that); + * - handle the exception; + * - go to the window that was being spilled; + * - set up window_overflow_restore_a0_fixup as a fixup routine; + * - reload a0; + * - restore the original window; + * - reset the default fixup routine; + * - return to user. By the time we get to this fixup handler all information + * about the conditions of the original double exception that happened in + * the window overflow handler is lost, so we just return to userspace to + * retry overflow from start. + * + * a0: value of depc, original value in depc + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE + * a3: exctable, original value in excsave1 + */ + +ENTRY(window_overflow_restore_a0_fixup) + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + _beqi a0, 1, .Lhandle_1 + _beqi a0, 3, .Lhandle_3 + + .macro overflow_fixup_handle_exception_pane n + + rsr a0, depc + rotw -\n + + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + + movi a0, .Lrestore_\n + s32i a0, a2, PT_DEPC + rsr a0, exccause + j _DoubleExceptionVector_handle_exception + + .endm + + overflow_fixup_handle_exception_pane 2 +.Lhandle_1: + overflow_fixup_handle_exception_pane 1 +.Lhandle_3: + overflow_fixup_handle_exception_pane 3 + + .macro overflow_fixup_restore_a0_pane n + + rotw \n + /* Need to preserve a0 value here to be able to handle exception + * that may occur on a0 reload from stack. It may occur because + * TLB miss handler may not be atomic and pointer to page table + * may be lost before we get here. There are no free registers, + * so we need to use EXC_TABLE_DOUBLE_SAVE area. + */ + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + bbsi.l a0, 7, 1f + l32e a0, a9, -16 + j 2f +1: + l32e a0, a13, -16 +2: + rotw -\n + + .endm + +.Lrestore_2: + overflow_fixup_restore_a0_pane 2 + +.Lset_default_fixup: + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, 0 + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + rfe + +.Lrestore_1: + overflow_fixup_restore_a0_pane 1 + j .Lset_default_fixup +.Lrestore_3: + overflow_fixup_restore_a0_pane 3 + j .Lset_default_fixup + +ENDPROC(window_overflow_restore_a0_fixup) + + .end literal_prefix +/* * Debug interrupt vector * * There is not much space here, so simply jump to another handler. diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 21acd11..af84f8f 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -262,13 +262,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 16, + DOUBLEEXC_VECTOR_VADDR - 40, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 32, + 40, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4e491d9..a573d4b 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -80,7 +80,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - blkg->refcnt = 1; + atomic_set(&blkg->refcnt, 1); /* root blkg uses @q->root_rl, init rl only for !root blkgs */ if (blkcg != &blkcg_root) { @@ -399,11 +399,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head) /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); - if (blkg->parent) { - spin_lock_irq(blkg->q->queue_lock); + if (blkg->parent) blkg_put(blkg->parent); - spin_unlock_irq(blkg->q->queue_lock); - } blkg_free(blkg); } @@ -862,6 +859,20 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + blk_throtl_drain(q); } diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 2e34c38..f1c1cfc 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -18,6 +18,7 @@ #include <linux/seq_file.h> #include <linux/radix-tree.h> #include <linux/blkdev.h> +#include <linux/atomic.h> /* Max limits for throttle policy */ #define THROTL_IOPS_MAX UINT_MAX @@ -104,7 +105,7 @@ struct blkcg_gq { struct request_list rl; /* reference count */ - int refcnt; + atomic_t refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -253,13 +254,12 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) * blkg_get - get a blkg reference * @blkg: blkg to get * - * The caller should be holding queue_lock and an existing reference. + * The caller should be holding an existing reference. */ static inline void blkg_get(struct blkcg_gq *blkg) { - lockdep_assert_held(blkg->q->queue_lock); - WARN_ON_ONCE(!blkg->refcnt); - blkg->refcnt++; + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + atomic_inc(&blkg->refcnt); } void __blkg_release_rcu(struct rcu_head *rcu); @@ -267,14 +267,11 @@ void __blkg_release_rcu(struct rcu_head *rcu); /** * blkg_put - put a blkg reference * @blkg: blkg to put - * - * The caller should be holding queue_lock. */ static inline void blkg_put(struct blkcg_gq *blkg) { - lockdep_assert_held(blkg->q->queue_lock); - WARN_ON_ONCE(blkg->refcnt <= 0); - if (!--blkg->refcnt) + WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); + if (atomic_dec_and_test(&blkg->refcnt)) call_rcu(&blkg->rcu_head, __blkg_release_rcu); } diff --git a/block/blk-core.c b/block/blk-core.c index f703f97..a1f91ce 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2297,7 +2297,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (!req->bio) return false; - trace_block_rq_complete(req->q, req); + trace_block_rq_complete(req->q, req, nr_bytes); /* * For fs requests, rq is just carrier of independent bio's diff --git a/block/blk-settings.c b/block/blk-settings.c index 5330933..ec00a0f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -553,7 +553,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, bottom = max(b->physical_block_size, b->io_min) + alignment; /* Verify that top and bottom intervals line up */ - if (max(top, bottom) & (min(top, bottom) - 1)) { + if (max(top, bottom) % min(top, bottom)) { t->misaligned = 1; ret = -1; } @@ -594,7 +594,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Find lowest common alignment_offset */ t->alignment_offset = lcm(t->alignment_offset, alignment) - & (max(t->physical_block_size, t->io_min) - 1); + % max(t->physical_block_size, t->io_min); /* Verify that new alignment_offset is on a logical block boundary */ if (t->alignment_offset & (t->logical_block_size - 1)) { diff --git a/block/blk-tag.c b/block/blk-tag.c index 3f33d86..a185b86 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -27,18 +27,15 @@ struct request *blk_queue_find_tag(struct request_queue *q, int tag) EXPORT_SYMBOL(blk_queue_find_tag); /** - * __blk_free_tags - release a given set of tag maintenance info + * blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * - * Tries to free the specified @bqt. Returns true if it was - * actually freed and false if there are still references using it + * Drop the reference count on @bqt and frees it when the last reference + * is dropped. */ -static int __blk_free_tags(struct blk_queue_tag *bqt) +void blk_free_tags(struct blk_queue_tag *bqt) { - int retval; - - retval = atomic_dec_and_test(&bqt->refcnt); - if (retval) { + if (atomic_dec_and_test(&bqt->refcnt)) { BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) < bqt->max_depth); @@ -50,9 +47,8 @@ static int __blk_free_tags(struct blk_queue_tag *bqt) kfree(bqt); } - - return retval; } +EXPORT_SYMBOL(blk_free_tags); /** * __blk_queue_free_tags - release tag maintenance info @@ -69,28 +65,13 @@ void __blk_queue_free_tags(struct request_queue *q) if (!bqt) return; - __blk_free_tags(bqt); + blk_free_tags(bqt); q->queue_tags = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q); } /** - * blk_free_tags - release a given set of tag maintenance info - * @bqt: the tag map to free - * - * For externally managed @bqt frees the map. Callers of this - * function must guarantee to have released all the queues that - * might have been using this tag map. - */ -void blk_free_tags(struct blk_queue_tag *bqt) -{ - if (unlikely(!__blk_free_tags(bqt))) - BUG(); -} -EXPORT_SYMBOL(blk_free_tags); - -/** * blk_queue_free_tags - release tag maintenance info * @q: the request queue for the device * diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 434944c..06c2bab 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1275,12 +1275,16 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) static void cfq_update_group_weight(struct cfq_group *cfqg) { - BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); - if (cfqg->new_weight) { cfqg->weight = cfqg->new_weight; cfqg->new_weight = 0; } +} + +static void +cfq_update_group_leaf_weight(struct cfq_group *cfqg) +{ + BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); if (cfqg->new_leaf_weight) { cfqg->leaf_weight = cfqg->new_leaf_weight; @@ -1299,7 +1303,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) /* add to the service tree */ BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); - cfq_update_group_weight(cfqg); + cfq_update_group_leaf_weight(cfqg); __cfq_group_service_tree_add(st, cfqg); /* @@ -1323,6 +1327,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) */ while ((parent = cfqg_parent(pos))) { if (propagate) { + cfq_update_group_weight(pos); propagate = !parent->nr_active++; parent->children_weight += pos->weight; } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index fbd5a67..a0926a6 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -690,6 +690,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKROSET: case BLKDISCARD: case BLKSECDISCARD: + case BLKZEROOUT: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us diff --git a/block/genhd.c b/block/genhd.c index 791f419..a8d586a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -28,10 +28,10 @@ struct kobject *block_depr; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) -/* For extended devt allocation. ext_devt_mutex prevents look up +/* For extended devt allocation. ext_devt_lock prevents look up * results from going away underneath its user. */ -static DEFINE_MUTEX(ext_devt_mutex); +static DEFINE_SPINLOCK(ext_devt_lock); static DEFINE_IDR(ext_devt_idr); static struct device_type disk_type; @@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) } /* allocate ext devt */ - mutex_lock(&ext_devt_mutex); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); - mutex_unlock(&ext_devt_mutex); + idr_preload(GFP_KERNEL); + + spin_lock(&ext_devt_lock); + idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); + spin_unlock(&ext_devt_lock); + + idr_preload_end(); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -441,15 +445,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - might_sleep(); - if (devt == MKDEV(0, 0)) return; if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } } @@ -665,7 +667,6 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); - blk_free_devt(disk_to_dev(disk)->devt); } EXPORT_SYMBOL(del_gendisk); @@ -690,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } else { struct hd_struct *part; - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } return disk; @@ -1069,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) struct disk_part_tbl *old_ptbl = disk->part_tbl; struct disk_part_tbl *new_ptbl; int len = old_ptbl ? old_ptbl->len : 0; - int target = partno + 1; + int i, target; size_t size; - int i; + + /* + * check for int overflow, since we can get here from blkpg_ioctl() + * with a user passed 'partno'. + */ + target = partno + 1; + if (target < 0) + return -EINVAL; /* disk_max_parts() is zero during initialization, ignore if so */ if (disk_max_parts(disk) && target > disk_max_parts(disk)) @@ -1098,6 +1106,7 @@ static void disk_release(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); + blk_free_devt(dev->devt); disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); diff --git a/block/partition-generic.c b/block/partition-generic.c index 789cdea..0d9e5f9 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + blk_free_devt(dev->devt); free_part_stats(p); free_part_info(p); kfree(p); @@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno) rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); - blk_free_devt(part_devt(part)); hd_struct_put(part); } diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 43be471..0931f51 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state) continue; } lv_ix = be16_to_cpu(p->lv_ix) - 1; - if (lv_ix > state->limit) { + if (lv_ix >= state->limit) { cur_lv_ix = -1; continue; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a5ffcc9..1b4988b 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -506,7 +506,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) { err = DRIVER_ERROR << 24; - goto out; + goto error; } memset(sense, 0, sizeof(sense)); @@ -516,7 +516,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, blk_execute_rq(q, disk, rq, 0); -out: err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { if (rq->sense_len && rq->sense) { diff --git a/crypto/842.c b/crypto/842.c index 65c7a89c..b48f4f1 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -180,3 +180,4 @@ module_exit(nx842_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("842 Compression Algorithm"); +MODULE_ALIAS_CRYPTO("842"); diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index fd0d6b4..3dd1011 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -1474,4 +1474,5 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-generic"); diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ac33d5f..6ef6e2a 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -21,6 +21,7 @@ #include <linux/module.h> #include <linux/net.h> #include <linux/rwsem.h> +#include <linux/security.h> struct alg_type_list { const struct af_alg_type *type; @@ -243,6 +244,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) sock_init_data(newsock, sk2); sock_graft(sk2, newsock); + security_sk_clone(sk, sk2); err = type->accept(ask->private, sk2); if (err) { @@ -447,6 +449,9 @@ void af_alg_complete(struct crypto_async_request *req, int err) { struct af_alg_completion *completion = req->data; + if (err == -EINPROGRESS) + return; + completion->err = err; complete(&completion->completion); } diff --git a/crypto/algapi.c b/crypto/algapi.c index 5013cad..13d3e63 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -495,8 +495,8 @@ static struct crypto_template *__crypto_lookup_template(const char *name) struct crypto_template *crypto_lookup_template(const char *name) { - return try_then_request_module(__crypto_lookup_template(name), "%s", - name); + return try_then_request_module(__crypto_lookup_template(name), + "crypto-%s", name); } EXPORT_SYMBOL_GPL(crypto_lookup_template); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a19c027..83187f4 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -49,7 +49,7 @@ struct skcipher_ctx { struct ablkcipher_request req; }; -#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \ +#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \ sizeof(struct scatterlist) - 1) static inline int skcipher_sndbuf(struct sock *sk) diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 666f196..6f5bebc 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -476,4 +476,5 @@ module_param(dbg, int, 0); MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)"); module_init(prng_mod_init); module_exit(prng_mod_fini); -MODULE_ALIAS("stdrng"); +MODULE_ALIAS_CRYPTO("stdrng"); +MODULE_ALIAS_CRYPTO("ansi_cprng"); diff --git a/crypto/anubis.c b/crypto/anubis.c index 008c8a4..4bb187c 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -704,3 +704,4 @@ module_exit(anubis_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Anubis Cryptographic Algorithm"); +MODULE_ALIAS_CRYPTO("anubis"); diff --git a/crypto/api.c b/crypto/api.c index 9d68122..6d536b8 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -216,11 +216,11 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) alg = crypto_alg_lookup(name, type, mask); if (!alg) { - request_module("%s", name); + request_module("crypto-%s", name); if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & CRYPTO_ALG_NEED_FALLBACK)) - request_module("%s-all", name); + request_module("crypto-%s-all", name); alg = crypto_alg_lookup(name, type, mask); } diff --git a/crypto/arc4.c b/crypto/arc4.c index 5a772c3..f1a8192 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -166,3 +166,4 @@ module_exit(arc4_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ARC4 Cipher Algorithm"); MODULE_AUTHOR("Jon Oberheide <jon@oberheide.org>"); +MODULE_ALIAS_CRYPTO("arc4"); diff --git a/crypto/authenc.c b/crypto/authenc.c index 6ff0208..0fdd14c 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -721,3 +721,4 @@ module_exit(crypto_authenc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Simple AEAD wrapper for IPsec"); +MODULE_ALIAS_CRYPTO("authenc"); diff --git a/crypto/authencesn.c b/crypto/authencesn.c index ab53762..16c225c 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -832,3 +832,4 @@ module_exit(crypto_authenc_esn_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_DESCRIPTION("AEAD wrapper for IPsec with extended sequence numbers"); +MODULE_ALIAS_CRYPTO("authencesn"); diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 8baf544..87b392a 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -138,4 +138,5 @@ module_exit(blowfish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm"); -MODULE_ALIAS("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-generic"); diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index 26bcd7a..a02286b 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -1098,4 +1098,5 @@ module_exit(camellia_fini); MODULE_DESCRIPTION("Camellia Cipher Algorithm"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("camellia"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-generic"); diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 5558f63..df5c726 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -549,4 +549,5 @@ module_exit(cast5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast5 Cipher Algorithm"); -MODULE_ALIAS("cast5"); +MODULE_ALIAS_CRYPTO("cast5"); +MODULE_ALIAS_CRYPTO("cast5-generic"); diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index de73252..058c8d7 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -291,4 +291,5 @@ module_exit(cast6_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast6 Cipher Algorithm"); -MODULE_ALIAS("cast6"); +MODULE_ALIAS_CRYPTO("cast6"); +MODULE_ALIAS_CRYPTO("cast6-generic"); diff --git a/crypto/cbc.c b/crypto/cbc.c index 61ac42e..780ee27 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -289,3 +289,4 @@ module_exit(crypto_cbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CBC block cipher algorithm"); +MODULE_ALIAS_CRYPTO("cbc"); diff --git a/crypto/ccm.c b/crypto/ccm.c index ed009b7..c569c9c 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -879,5 +879,6 @@ module_exit(crypto_ccm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Counter with CBC MAC"); -MODULE_ALIAS("ccm_base"); -MODULE_ALIAS("rfc4309"); +MODULE_ALIAS_CRYPTO("ccm_base"); +MODULE_ALIAS_CRYPTO("rfc4309"); +MODULE_ALIAS_CRYPTO("ccm"); diff --git a/crypto/chainiv.c b/crypto/chainiv.c index 834d8dd..22b7e55 100644 --- a/crypto/chainiv.c +++ b/crypto/chainiv.c @@ -359,3 +359,4 @@ module_exit(chainiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Chain IV Generator"); +MODULE_ALIAS_CRYPTO("chainiv"); diff --git a/crypto/cmac.c b/crypto/cmac.c index 50880cf..7a8bfbd 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -313,3 +313,4 @@ module_exit(crypto_cmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CMAC keyed hash algorithm"); +MODULE_ALIAS_CRYPTO("cmac"); diff --git a/crypto/crc32.c b/crypto/crc32.c index 9d1c415..187ded2 100644 --- a/crypto/crc32.c +++ b/crypto/crc32.c @@ -156,3 +156,4 @@ module_exit(crc32_mod_fini); MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crc32"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c index 877e711..c1229614 100644 --- a/crypto/crct10dif_generic.c +++ b/crypto/crct10dif_generic.c @@ -124,4 +124,5 @@ module_exit(crct10dif_mod_fini); MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); MODULE_DESCRIPTION("T10 DIF CRC calculation."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-generic"); diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 7bdd61b..75c415d 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -955,3 +955,4 @@ module_exit(cryptd_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software async crypto daemon"); +MODULE_ALIAS_CRYPTO("cryptd"); diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 1dc54bb..a203191 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -145,9 +145,9 @@ static struct crypto_alg null_algs[3] = { { .coa_decompress = null_compress } } } }; -MODULE_ALIAS("compress_null"); -MODULE_ALIAS("digest_null"); -MODULE_ALIAS("cipher_null"); +MODULE_ALIAS_CRYPTO("compress_null"); +MODULE_ALIAS_CRYPTO("digest_null"); +MODULE_ALIAS_CRYPTO("cipher_null"); static int __init crypto_null_mod_init(void) { diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 1512e41..43665d0 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -466,7 +466,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) type -= CRYPTO_MSG_BASE; link = &crypto_dispatch[type]; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c index adad92a..2f1b8d1 100644 --- a/crypto/crypto_wq.c +++ b/crypto/crypto_wq.c @@ -33,7 +33,7 @@ static void __exit crypto_wq_exit(void) destroy_workqueue(kcrypto_wq); } -module_init(crypto_wq_init); +subsys_initcall(crypto_wq_init); module_exit(crypto_wq_exit); MODULE_LICENSE("GPL"); diff --git a/crypto/ctr.c b/crypto/ctr.c index f2b94f2..2386f73 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -466,4 +466,5 @@ module_exit(crypto_ctr_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CTR Counter block mode"); -MODULE_ALIAS("rfc3686"); +MODULE_ALIAS_CRYPTO("rfc3686"); +MODULE_ALIAS_CRYPTO("ctr"); diff --git a/crypto/cts.c b/crypto/cts.c index 042223f..60b9da3 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -350,3 +350,4 @@ module_exit(crypto_cts_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("CTS-CBC CipherText Stealing for CBC"); +MODULE_ALIAS_CRYPTO("cts"); diff --git a/crypto/deflate.c b/crypto/deflate.c index b57d70e..95d8d37 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -222,4 +222,4 @@ module_exit(deflate_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); - +MODULE_ALIAS_CRYPTO("deflate"); diff --git a/crypto/des_generic.c b/crypto/des_generic.c index f6cf63f..3ec6071 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -971,8 +971,6 @@ static struct crypto_alg des_algs[2] = { { .cia_decrypt = des3_ede_decrypt } } } }; -MODULE_ALIAS("des3_ede"); - static int __init des_generic_mod_init(void) { return crypto_register_algs(des_algs, ARRAY_SIZE(des_algs)); @@ -989,4 +987,7 @@ module_exit(des_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); MODULE_AUTHOR("Dag Arne Osvik <da@osvik.no>"); -MODULE_ALIAS("des"); +MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des-generic"); +MODULE_ALIAS_CRYPTO("des3_ede"); +MODULE_ALIAS_CRYPTO("des3_ede-generic"); diff --git a/crypto/ecb.c b/crypto/ecb.c index 935cfef..12011af 100644 --- a/crypto/ecb.c +++ b/crypto/ecb.c @@ -185,3 +185,4 @@ module_exit(crypto_ecb_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ECB block cipher algorithm"); +MODULE_ALIAS_CRYPTO("ecb"); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 42ce9f5..388f582 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -267,3 +267,4 @@ module_exit(eseqiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator"); +MODULE_ALIAS_CRYPTO("eseqiv"); diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 021d7fe..77286ea 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -420,3 +420,4 @@ module_exit(fcrypt_mod_fini); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("FCrypt Cipher Algorithm"); MODULE_AUTHOR("David Howells <dhowells@redhat.com>"); +MODULE_ALIAS_CRYPTO("fcrypt"); diff --git a/crypto/gcm.c b/crypto/gcm.c index 43e1fb0..b4c2520 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1441,6 +1441,7 @@ module_exit(crypto_gcm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Galois/Counter Mode"); MODULE_AUTHOR("Mikko Herranen <mh1@iki.fi>"); -MODULE_ALIAS("gcm_base"); -MODULE_ALIAS("rfc4106"); -MODULE_ALIAS("rfc4543"); +MODULE_ALIAS_CRYPTO("gcm_base"); +MODULE_ALIAS_CRYPTO("rfc4106"); +MODULE_ALIAS_CRYPTO("rfc4543"); +MODULE_ALIAS_CRYPTO("gcm"); diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c index 9d3f0c6..bac7099 100644 --- a/crypto/ghash-generic.c +++ b/crypto/ghash-generic.c @@ -172,4 +172,5 @@ module_exit(ghash_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm"); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); +MODULE_ALIAS_CRYPTO("ghash-generic"); diff --git a/crypto/hmac.c b/crypto/hmac.c index 8d9544c..ade790b 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -271,3 +271,4 @@ module_exit(hmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("HMAC hash algorithm"); +MODULE_ALIAS_CRYPTO("hmac"); diff --git a/crypto/khazad.c b/crypto/khazad.c index 60e7cd6..873eb5d 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -880,3 +880,4 @@ module_exit(khazad_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Khazad Cryptographic Algorithm"); +MODULE_ALIAS_CRYPTO("khazad"); diff --git a/crypto/krng.c b/crypto/krng.c index a2d2b72..0224841 100644 --- a/crypto/krng.c +++ b/crypto/krng.c @@ -62,4 +62,5 @@ module_exit(krng_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Kernel Random Number Generator"); -MODULE_ALIAS("stdrng"); +MODULE_ALIAS_CRYPTO("stdrng"); +MODULE_ALIAS_CRYPTO("krng"); diff --git a/crypto/lrw.c b/crypto/lrw.c index ba42acc..6f9908a 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -400,3 +400,4 @@ module_exit(crypto_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LRW block cipher mode"); +MODULE_ALIAS_CRYPTO("lrw"); diff --git a/crypto/lz4.c b/crypto/lz4.c index 4586dd1..53279ab 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -104,3 +104,4 @@ module_exit(lz4_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZ4 Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 151ba31..eaec5fa 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -104,3 +104,4 @@ module_exit(lz4hc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4hc"); diff --git a/crypto/lzo.c b/crypto/lzo.c index 1c2aa69..d1ff694 100644 --- a/crypto/lzo.c +++ b/crypto/lzo.c @@ -103,3 +103,4 @@ module_exit(lzo_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lzo"); diff --git a/crypto/md4.c b/crypto/md4.c index 0477a6a..3515af4 100644 --- a/crypto/md4.c +++ b/crypto/md4.c @@ -255,4 +255,4 @@ module_exit(md4_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD4 Message Digest Algorithm"); - +MODULE_ALIAS_CRYPTO("md4"); diff --git a/crypto/md5.c b/crypto/md5.c index 7febeaa..36f5e5b 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -168,3 +168,4 @@ module_exit(md5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD5 Message Digest Algorithm"); +MODULE_ALIAS_CRYPTO("md5"); diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index 079b761..46195e0 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -184,3 +184,4 @@ module_exit(michael_mic_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Michael MIC"); MODULE_AUTHOR("Jouni Malinen <j@w1.fi>"); +MODULE_ALIAS_CRYPTO("michael_mic"); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index d1b8bdf..f654965 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -295,3 +295,4 @@ module_exit(crypto_pcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PCBC block cipher algorithm"); +MODULE_ALIAS_CRYPTO("pcbc"); diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8c920c..6bc736e 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -565,3 +565,4 @@ module_exit(pcrypt_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_DESCRIPTION("Parallel crypto wrapper"); +MODULE_ALIAS_CRYPTO("pcrypt"); diff --git a/crypto/rmd128.c b/crypto/rmd128.c index 8a0f68b..049486e 100644 --- a/crypto/rmd128.c +++ b/crypto/rmd128.c @@ -327,3 +327,4 @@ module_exit(rmd128_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-128 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd128"); diff --git a/crypto/rmd160.c b/crypto/rmd160.c index 525d7bb..de585e5 100644 --- a/crypto/rmd160.c +++ b/crypto/rmd160.c @@ -371,3 +371,4 @@ module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd160"); diff --git a/crypto/rmd256.c b/crypto/rmd256.c index 69293d9..4ec02a7 100644 --- a/crypto/rmd256.c +++ b/crypto/rmd256.c @@ -346,3 +346,4 @@ module_exit(rmd256_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-256 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd256"); diff --git a/crypto/rmd320.c b/crypto/rmd320.c index 09f97df..770f2cb 100644 --- a/crypto/rmd320.c +++ b/crypto/rmd320.c @@ -395,3 +395,4 @@ module_exit(rmd320_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-320 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd320"); diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 9a4770c..f550b5d 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -248,4 +248,5 @@ module_exit(salsa20_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); -MODULE_ALIAS("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20-generic"); diff --git a/crypto/seed.c b/crypto/seed.c index 9c904d6..c6ba843 100644 --- a/crypto/seed.c +++ b/crypto/seed.c @@ -476,3 +476,4 @@ module_exit(seed_fini); MODULE_DESCRIPTION("SEED Cipher Algorithm"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Hye-Shik Chang <perky@FreeBSD.org>, Kim Hyun <hkim@kisa.or.kr>"); +MODULE_ALIAS_CRYPTO("seed"); diff --git a/crypto/seqiv.c b/crypto/seqiv.c index f2cba4ed..49a4069 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -362,3 +362,4 @@ module_exit(seqiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Sequence Number IV Generator"); +MODULE_ALIAS_CRYPTO("seqiv"); diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 7ddbd7e..94970a7 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -665,5 +665,6 @@ module_exit(serpent_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Cipher Algorithm"); MODULE_AUTHOR("Dag Arne Osvik <osvik@ii.uib.no>"); -MODULE_ALIAS("tnepres"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("tnepres"); +MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-generic"); diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index 4279480..fdf7c00 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -153,4 +153,5 @@ module_exit(sha1_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-generic"); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index 5433667..136381b 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -384,5 +384,7 @@ module_exit(sha256_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm"); -MODULE_ALIAS("sha224"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-generic"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-generic"); diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 6ed124f..6c6d901 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -287,5 +287,7 @@ module_exit(sha512_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms"); -MODULE_ALIAS("sha384"); -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha384-generic"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha512-generic"); diff --git a/crypto/tea.c b/crypto/tea.c index 0a57232..b70b441 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -270,8 +270,9 @@ static void __exit tea_mod_fini(void) crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs)); } -MODULE_ALIAS("xtea"); -MODULE_ALIAS("xeta"); +MODULE_ALIAS_CRYPTO("tea"); +MODULE_ALIAS_CRYPTO("xtea"); +MODULE_ALIAS_CRYPTO("xeta"); module_init(tea_mod_init); module_exit(tea_mod_fini); diff --git a/crypto/tgr192.c b/crypto/tgr192.c index 8740355..f7ed2fb 100644 --- a/crypto/tgr192.c +++ b/crypto/tgr192.c @@ -676,8 +676,9 @@ static void __exit tgr192_mod_fini(void) crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs)); } -MODULE_ALIAS("tgr160"); -MODULE_ALIAS("tgr128"); +MODULE_ALIAS_CRYPTO("tgr192"); +MODULE_ALIAS_CRYPTO("tgr160"); +MODULE_ALIAS_CRYPTO("tgr128"); module_init(tgr192_mod_init); module_exit(tgr192_mod_fini); diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 2d50005..ebf7a3e 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -211,4 +211,5 @@ module_exit(twofish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm"); -MODULE_ALIAS("twofish"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-generic"); diff --git a/crypto/vmac.c b/crypto/vmac.c index 2eb11a3..bf2d3a8 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -713,3 +713,4 @@ module_exit(vmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("VMAC hash algorithm"); +MODULE_ALIAS_CRYPTO("vmac"); diff --git a/crypto/wp512.c b/crypto/wp512.c index 180f1d6..253db94 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -1167,8 +1167,9 @@ static void __exit wp512_mod_fini(void) crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } -MODULE_ALIAS("wp384"); -MODULE_ALIAS("wp256"); +MODULE_ALIAS_CRYPTO("wp512"); +MODULE_ALIAS_CRYPTO("wp384"); +MODULE_ALIAS_CRYPTO("wp256"); module_init(wp512_mod_init); module_exit(wp512_mod_fini); diff --git a/crypto/xcbc.c b/crypto/xcbc.c index a5fbdf3..df90b33 100644 --- a/crypto/xcbc.c +++ b/crypto/xcbc.c @@ -286,3 +286,4 @@ module_exit(crypto_xcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XCBC keyed hash algorithm"); +MODULE_ALIAS_CRYPTO("xcbc"); diff --git a/crypto/xts.c b/crypto/xts.c index ca1608f..f6fd43f 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -362,3 +362,4 @@ module_exit(crypto_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); +MODULE_ALIAS_CRYPTO("xts"); diff --git a/crypto/zlib.c b/crypto/zlib.c index 06b62e5..d980788 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -378,3 +378,4 @@ module_exit(zlib_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Zlib Compression Algorithm"); MODULE_AUTHOR("Sony Corporation"); +MODULE_ALIAS_CRYPTO("zlib"); diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c index 84190ed..aff69d9 100644 --- a/drivers/acpi/acpi_cmos_rtc.c +++ b/drivers/acpi/acpi_cmos_rtc.c @@ -35,7 +35,7 @@ acpi_cmos_rtc_space_handler(u32 function, acpi_physical_address address, void *handler_context, void *region_context) { int i; - u8 *value = (u8 *)&value64; + u8 *value = (u8 *)value64; if (address > 0xff || !value64) return AE_BAD_PARAMETER; diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 999adb5..1a8cdf9 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -360,7 +360,19 @@ static void acpi_memory_device_remove(struct acpi_device *device) acpi_memory_device_free(mem_device); } +static bool __initdata acpi_no_memhotplug; + void __init acpi_memory_hotplug_init(void) { + if (acpi_no_memhotplug) + return; + acpi_scan_add_handler_with_hotplug(&memory_device_handler, "memory"); } + +static int __init disable_acpi_memory_hotplug(char *str) +{ + acpi_no_memhotplug = true; + return 1; +} +__setup("acpi_no_memhotplug", disable_acpi_memory_hotplug); diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index f29e06e..f99cb6a 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -410,7 +410,6 @@ static int acpi_processor_add(struct acpi_device *device, goto err; pr->dev = dev; - dev->offline = pr->flags.need_hotplug_init; /* Trigger the processor driver's .probe() if present. */ if (device_attach(dev) >= 0) diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c index f8e71ea..b2d2473 100644 --- a/drivers/acpi/acpica/nsrepair.c +++ b/drivers/acpi/acpica/nsrepair.c @@ -207,13 +207,30 @@ acpi_ns_simple_repair(struct acpi_evaluate_info *info, * this predefined name. Either one return value is expected, or none, * for both methods and other objects. * - * Exit now if there is no return object. Warning if one was expected. + * Try to fix if there was no return object. Warning if failed to fix. */ if (!return_object) { if (expected_btypes && (!(expected_btypes & ACPI_RTYPE_NONE))) { - ACPI_WARN_PREDEFINED((AE_INFO, info->full_pathname, - ACPI_WARN_ALWAYS, - "Missing expected return value")); + if (package_index != ACPI_NOT_PACKAGE_ELEMENT) { + ACPI_WARN_PREDEFINED((AE_INFO, + info->full_pathname, + ACPI_WARN_ALWAYS, + "Found unexpected NULL package element")); + + status = + acpi_ns_repair_null_element(info, + expected_btypes, + package_index, + return_object_ptr); + if (ACPI_SUCCESS(status)) { + return (AE_OK); /* Repair was successful */ + } + } else { + ACPI_WARN_PREDEFINED((AE_INFO, + info->full_pathname, + ACPI_WARN_ALWAYS, + "Missing expected return value")); + } return (AE_AML_NO_RETURN_VALUE); } diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c index 1731c27..2cac1d1 100644 --- a/drivers/acpi/acpica/utcopy.c +++ b/drivers/acpi/acpica/utcopy.c @@ -1001,5 +1001,11 @@ acpi_ut_copy_iobject_to_iobject(union acpi_operand_object *source_desc, status = acpi_ut_copy_simple_object(source_desc, *dest_desc); } + /* Delete the allocated object if copy failed */ + + if (ACPI_FAILURE(status)) { + acpi_ut_remove_reference(*dest_desc); + } + return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c index cb1e9cc..3d8748a 100644 --- a/drivers/acpi/acpica/utstring.c +++ b/drivers/acpi/acpica/utstring.c @@ -353,7 +353,7 @@ void acpi_ut_print_string(char *string, u16 max_length) } acpi_os_printf("\""); - for (i = 0; string[i] && (i < max_length); i++) { + for (i = 0; (i < max_length) && string[i]; i++) { /* Escape sequences */ diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index ffa5af4..a59d3d3 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -34,6 +34,7 @@ #include <linux/dmi.h> #include <linux/slab.h> #include <linux/suspend.h> +#include <linux/delay.h> #include <asm/unaligned.h> #ifdef CONFIG_ACPI_PROCFS_POWER @@ -1071,6 +1072,28 @@ static struct dmi_system_id bat_dmi_table[] = { {}, }; +/* + * Some machines'(E,G Lenovo Z480) ECs are not stable + * during boot up and this causes battery driver fails to be + * probed due to failure of getting battery information + * from EC sometimes. After several retries, the operation + * may work. So add retry code here and 20ms sleep between + * every retries. + */ +static int acpi_battery_update_retry(struct acpi_battery *battery) +{ + int retry, ret; + + for (retry = 5; retry; retry--) { + ret = acpi_battery_update(battery); + if (!ret) + break; + + msleep(20); + } + return ret; +} + static int acpi_battery_add(struct acpi_device *device) { int result = 0; @@ -1089,9 +1112,11 @@ static int acpi_battery_add(struct acpi_device *device) mutex_init(&battery->sysfs_lock); if (acpi_has_method(battery->device->handle, "_BIX")) set_bit(ACPI_BATTERY_XINFO_PRESENT, &battery->flags); - result = acpi_battery_update(battery); + + result = acpi_battery_update_retry(battery); if (result) goto fail; + #ifdef CONFIG_ACPI_PROCFS_POWER result = acpi_battery_add_fs(device); #endif diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index f37dec5..16eb678 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -345,6 +345,14 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_VERSION, "2349D15"), }, }, + { + .callback = dmi_disable_osi_win8, + .ident = "Dell Inspiron 7737", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7737"), + }, + }, /* * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. @@ -405,6 +413,19 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T500"), }, }, + /* + * Without this this EEEpc exports a non working WMI interface, with + * this it exports a working "good old" eeepc_laptop interface, fixing + * both brightness control, and rfkill not working. + */ + { + .callback = dmi_enable_osi_linux, + .ident = "Asus EEE PC 1015PX", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"), + }, + }, {} }; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 7d83ef1..17c12ac 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -57,6 +57,12 @@ EXPORT_SYMBOL(acpi_root_dir); #ifdef CONFIG_X86 +#ifdef CONFIG_ACPI_CUSTOM_DSDT +static inline int set_copy_dsdt(const struct dmi_system_id *id) +{ + return 0; +} +#else static int set_copy_dsdt(const struct dmi_system_id *id) { printk(KERN_NOTICE "%s detected - " @@ -64,6 +70,7 @@ static int set_copy_dsdt(const struct dmi_system_id *id) acpi_gbl_copy_dsdt_locally = 1; return 0; } +#endif static struct dmi_system_id dsdt_dmi_table[] __initdata = { /* diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 51b7008..85752c6 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -81,6 +81,9 @@ enum { EC_FLAGS_BLOCKED, /* Transactions are blocked */ }; +#define ACPI_EC_COMMAND_POLL 0x01 /* Available for command byte */ +#define ACPI_EC_COMMAND_COMPLETE 0x02 /* Completed last byte */ + /* ec.c is compiled in acpi namespace so this shows up as acpi.ec_delay param */ static unsigned int ec_delay __read_mostly = ACPI_EC_DELAY; module_param(ec_delay, uint, 0644); @@ -116,7 +119,7 @@ struct transaction { u8 ri; u8 wlen; u8 rlen; - bool done; + u8 flags; }; struct acpi_ec *boot_ec, *first_ec; @@ -126,6 +129,7 @@ static int EC_FLAGS_MSI; /* Out-of-spec MSI controller */ static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */ static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */ static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ +static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ /* -------------------------------------------------------------------------- Transaction Management @@ -157,60 +161,84 @@ static inline void acpi_ec_write_data(struct acpi_ec *ec, u8 data) outb(data, ec->data_addr); } -static int ec_transaction_done(struct acpi_ec *ec) +static int ec_transaction_completed(struct acpi_ec *ec) { unsigned long flags; int ret = 0; spin_lock_irqsave(&ec->lock, flags); - if (!ec->curr || ec->curr->done) + if (ec->curr && (ec->curr->flags & ACPI_EC_COMMAND_COMPLETE)) ret = 1; spin_unlock_irqrestore(&ec->lock, flags); return ret; } -static void start_transaction(struct acpi_ec *ec) -{ - ec->curr->irq_count = ec->curr->wi = ec->curr->ri = 0; - ec->curr->done = false; - acpi_ec_write_cmd(ec, ec->curr->command); -} - -static void advance_transaction(struct acpi_ec *ec, u8 status) +static bool advance_transaction(struct acpi_ec *ec) { - unsigned long flags; struct transaction *t; + u8 status; + bool wakeup = false; - spin_lock_irqsave(&ec->lock, flags); + pr_debug("===== %s =====\n", in_interrupt() ? "IRQ" : "TASK"); + status = acpi_ec_read_status(ec); t = ec->curr; if (!t) - goto unlock; - if (t->wlen > t->wi) { - if ((status & ACPI_EC_FLAG_IBF) == 0) - acpi_ec_write_data(ec, - t->wdata[t->wi++]); - else - goto err; - } else if (t->rlen > t->ri) { - if ((status & ACPI_EC_FLAG_OBF) == 1) { - t->rdata[t->ri++] = acpi_ec_read_data(ec); - if (t->rlen == t->ri) - t->done = true; + goto err; + if (t->flags & ACPI_EC_COMMAND_POLL) { + if (t->wlen > t->wi) { + if ((status & ACPI_EC_FLAG_IBF) == 0) + acpi_ec_write_data(ec, t->wdata[t->wi++]); + else + goto err; + } else if (t->rlen > t->ri) { + if ((status & ACPI_EC_FLAG_OBF) == 1) { + t->rdata[t->ri++] = acpi_ec_read_data(ec); + if (t->rlen == t->ri) { + t->flags |= ACPI_EC_COMMAND_COMPLETE; + if (t->command == ACPI_EC_COMMAND_QUERY) + pr_debug("hardware QR_EC completion\n"); + wakeup = true; + } + } else + goto err; + } else if (t->wlen == t->wi && + (status & ACPI_EC_FLAG_IBF) == 0) { + t->flags |= ACPI_EC_COMMAND_COMPLETE; + wakeup = true; + } + return wakeup; + } else { + if (EC_FLAGS_QUERY_HANDSHAKE && + !(status & ACPI_EC_FLAG_SCI) && + (t->command == ACPI_EC_COMMAND_QUERY)) { + t->flags |= ACPI_EC_COMMAND_POLL; + t->rdata[t->ri++] = 0x00; + t->flags |= ACPI_EC_COMMAND_COMPLETE; + pr_debug("software QR_EC completion\n"); + wakeup = true; + } else if ((status & ACPI_EC_FLAG_IBF) == 0) { + acpi_ec_write_cmd(ec, t->command); + t->flags |= ACPI_EC_COMMAND_POLL; } else goto err; - } else if (t->wlen == t->wi && - (status & ACPI_EC_FLAG_IBF) == 0) - t->done = true; - goto unlock; + return wakeup; + } err: /* * If SCI bit is set, then don't think it's a false IRQ * otherwise will take a not handled IRQ as a false one. */ - if (in_interrupt() && !(status & ACPI_EC_FLAG_SCI)) - ++t->irq_count; + if (!(status & ACPI_EC_FLAG_SCI)) { + if (in_interrupt() && t) + ++t->irq_count; + } + return wakeup; +} -unlock: - spin_unlock_irqrestore(&ec->lock, flags); +static void start_transaction(struct acpi_ec *ec) +{ + ec->curr->irq_count = ec->curr->wi = ec->curr->ri = 0; + ec->curr->flags = 0; + (void)advance_transaction(ec); } static int acpi_ec_sync_query(struct acpi_ec *ec, u8 *data); @@ -235,15 +263,17 @@ static int ec_poll(struct acpi_ec *ec) /* don't sleep with disabled interrupts */ if (EC_FLAGS_MSI || irqs_disabled()) { udelay(ACPI_EC_MSI_UDELAY); - if (ec_transaction_done(ec)) + if (ec_transaction_completed(ec)) return 0; } else { if (wait_event_timeout(ec->wait, - ec_transaction_done(ec), + ec_transaction_completed(ec), msecs_to_jiffies(1))) return 0; } - advance_transaction(ec, acpi_ec_read_status(ec)); + spin_lock_irqsave(&ec->lock, flags); + (void)advance_transaction(ec); + spin_unlock_irqrestore(&ec->lock, flags); } while (time_before(jiffies, delay)); pr_debug(PREFIX "controller reset, restart transaction\n"); spin_lock_irqsave(&ec->lock, flags); @@ -275,23 +305,6 @@ static int acpi_ec_transaction_unlocked(struct acpi_ec *ec, return ret; } -static int ec_check_ibf0(struct acpi_ec *ec) -{ - u8 status = acpi_ec_read_status(ec); - return (status & ACPI_EC_FLAG_IBF) == 0; -} - -static int ec_wait_ibf0(struct acpi_ec *ec) -{ - unsigned long delay = jiffies + msecs_to_jiffies(ec_delay); - /* interrupt wait manually if GPE mode is not active */ - while (time_before(jiffies, delay)) - if (wait_event_timeout(ec->wait, ec_check_ibf0(ec), - msecs_to_jiffies(1))) - return 0; - return -ETIME; -} - static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t) { int status; @@ -312,12 +325,6 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t) goto unlock; } } - if (ec_wait_ibf0(ec)) { - pr_err(PREFIX "input buffer is not empty, " - "aborting transaction\n"); - status = -ETIME; - goto end; - } pr_debug(PREFIX "transaction start (cmd=0x%02x, addr=0x%02x)\n", t->command, t->wdata ? t->wdata[0] : 0); /* disable GPE during transaction if storm is detected */ @@ -341,7 +348,6 @@ static int acpi_ec_transaction(struct acpi_ec *ec, struct transaction *t) set_bit(EC_FLAGS_GPE_STORM, &ec->flags); } pr_debug(PREFIX "transaction end\n"); -end: if (ec->global_lock) acpi_release_global_lock(glk); unlock: @@ -661,17 +667,14 @@ static int ec_check_sci(struct acpi_ec *ec, u8 state) static u32 acpi_ec_gpe_handler(acpi_handle gpe_device, u32 gpe_number, void *data) { + unsigned long flags; struct acpi_ec *ec = data; - u8 status = acpi_ec_read_status(ec); - - pr_debug(PREFIX "~~~> interrupt, status:0x%02x\n", status); - advance_transaction(ec, status); - if (ec_transaction_done(ec) && - (acpi_ec_read_status(ec) & ACPI_EC_FLAG_IBF) == 0) { + spin_lock_irqsave(&ec->lock, flags); + if (advance_transaction(ec)) wake_up(&ec->wait); - ec_check_sci(ec, acpi_ec_read_status(ec)); - } + spin_unlock_irqrestore(&ec->lock, flags); + ec_check_sci(ec, acpi_ec_read_status(ec)); return ACPI_INTERRUPT_HANDLED | ACPI_REENABLE_GPE; } @@ -990,6 +993,18 @@ static int ec_enlarge_storm_threshold(const struct dmi_system_id *id) } /* + * Acer EC firmware refuses to respond QR_EC when SCI_EVT is not set, for + * which case, we complete the QR_EC without issuing it to the firmware. + * https://bugzilla.kernel.org/show_bug.cgi?id=86211 + */ +static int ec_flag_query_handshake(const struct dmi_system_id *id) +{ + pr_debug("Detected the EC firmware requiring QR_EC issued when SCI_EVT set\n"); + EC_FLAGS_QUERY_HANDSHAKE = 1; + return 0; +} + +/* * On some hardware it is necessary to clear events accumulated by the EC during * sleep. These ECs stop reporting GPEs until they are manually polled, if too * many events are accumulated. (e.g. Samsung Series 5/9 notebooks) @@ -1059,6 +1074,9 @@ static struct dmi_system_id ec_dmi_table[] __initdata = { { ec_clear_on_resume, "Samsung hardware", { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL}, + { + ec_flag_query_handshake, "Acer hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), }, NULL}, {}, }; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index e5f416c..d73f852 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -421,7 +421,7 @@ static void acpi_os_drop_map_ref(struct acpi_ioremap *map) static void acpi_os_map_cleanup(struct acpi_ioremap *map) { if (!map->refcount) { - synchronize_rcu(); + synchronize_rcu_expedited(); acpi_unmap(map->phys, map->virt); kfree(map); } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c7414a5..2a4ae32 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1099,9 +1099,9 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) if (pr->id == 0 && cpuidle_get_driver() == &acpi_idle_driver) { - cpuidle_pause_and_lock(); /* Protect against cpu-hotplug */ get_online_cpus(); + cpuidle_pause_and_lock(); /* Disable all cpuidle devices */ for_each_online_cpu(cpu) { @@ -1128,8 +1128,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) cpuidle_enable_device(dev); } } - put_online_cpus(); cpuidle_resume_and_unlock(); + put_online_cpus(); } return 0; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 0bdacc5..2ba8f02 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -77,7 +77,7 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_MEMORY24: memory24 = &ares->data.memory24; - if (!memory24->address_length) + if (!memory24->minimum && !memory24->address_length) return false; acpi_dev_get_memresource(res, memory24->minimum, memory24->address_length, @@ -85,7 +85,7 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) break; case ACPI_RESOURCE_TYPE_MEMORY32: memory32 = &ares->data.memory32; - if (!memory32->address_length) + if (!memory32->minimum && !memory32->address_length) return false; acpi_dev_get_memresource(res, memory32->minimum, memory32->address_length, @@ -93,7 +93,7 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: fixed_memory32 = &ares->data.fixed_memory32; - if (!fixed_memory32->address_length) + if (!fixed_memory32->address && !fixed_memory32->address_length) return false; acpi_dev_get_memresource(res, fixed_memory32->address, fixed_memory32->address_length, @@ -150,7 +150,7 @@ bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_IO: io = &ares->data.io; - if (!io->address_length) + if (!io->minimum && !io->address_length) return false; acpi_dev_get_ioresource(res, io->minimum, io->address_length, @@ -158,7 +158,7 @@ bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) break; case ACPI_RESOURCE_TYPE_FIXED_IO: fixed_io = &ares->data.fixed_io; - if (!fixed_io->address_length) + if (!fixed_io->address && !fixed_io->address_length) return false; acpi_dev_get_ioresource(res, fixed_io->address, fixed_io->address_length, diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 5708e44..47e4deb 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -81,13 +81,6 @@ module_param(brightness_switch_enabled, bool, 0644); static bool allow_duplicates; module_param(allow_duplicates, bool, 0644); -/* - * Some BIOSes claim they use minimum backlight at boot, - * and this may bring dimming screen after boot - */ -static bool use_bios_initial_backlight = 1; -module_param(use_bios_initial_backlight, bool, 0644); - static int register_count; static int acpi_video_bus_add(struct acpi_device *device); static int acpi_video_bus_remove(struct acpi_device *device); @@ -388,12 +381,6 @@ static int __init video_set_bqc_offset(const struct dmi_system_id *d) return 0; } -static int video_ignore_initial_backlight(const struct dmi_system_id *d) -{ - use_bios_initial_backlight = 0; - return 0; -} - static struct dmi_system_id video_dmi_table[] __initdata = { /* * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 @@ -438,54 +425,6 @@ static struct dmi_system_id video_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"), }, }, - { - .callback = video_ignore_initial_backlight, - .ident = "HP Folio 13-2000", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"), - }, - }, - { - .callback = video_ignore_initial_backlight, - .ident = "Fujitsu E753", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"), - DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"), - }, - }, - { - .callback = video_ignore_initial_backlight, - .ident = "HP Pavilion dm4", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"), - }, - }, - { - .callback = video_ignore_initial_backlight, - .ident = "HP Pavilion g6 Notebook PC", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"), - }, - }, - { - .callback = video_ignore_initial_backlight, - .ident = "HP 1000 Notebook PC", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"), - }, - }, - { - .callback = video_ignore_initial_backlight, - .ident = "HP Pavilion m4", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion m4 Notebook PC"), - }, - }, {} }; @@ -827,20 +766,18 @@ acpi_video_init_brightness(struct acpi_video_device *device) if (!device->cap._BQC) goto set_level; - if (use_bios_initial_backlight) { - level = acpi_video_bqc_value_to_level(device, level_old); - /* - * On some buggy laptops, _BQC returns an uninitialized - * value when invoked for the first time, i.e. - * level_old is invalid (no matter whether it's a level - * or an index). Set the backlight to max_level in this case. - */ - for (i = 2; i < br->count; i++) - if (level_old == br->levels[i]) - break; - if (i == br->count || !level) - level = max_level; - } + level = acpi_video_bqc_value_to_level(device, level_old); + /* + * On some buggy laptops, _BQC returns an uninitialized + * value when invoked for the first time, i.e. + * level_old is invalid (no matter whether it's a level + * or an index). Set the backlight to max_level in this case. + */ + for (i = 2; i < br->count; i++) + if (level == br->levels[i]) + break; + if (i == br->count || !level) + level = max_level; set_level: result = acpi_video_device_lcd_set_level(device, level); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dc11b7a..53111fd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -61,6 +61,7 @@ enum board_ids { /* board IDs by feature in alphabetical order */ board_ahci, board_ahci_ign_iferr, + board_ahci_nomsi, board_ahci_noncq, board_ahci_nosntf, board_ahci_yes_fbs, @@ -120,6 +121,13 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, + [board_ahci_nomsi] = { + AHCI_HFLAGS (AHCI_HFLAG_NO_MSI), + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_ops, + }, [board_ahci_noncq] = { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ), .flags = AHCI_FLAG_COMMON, @@ -304,6 +312,22 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9c85), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c87), board_ahci }, /* Wildcat Point-LP RAID */ { PCI_VDEVICE(INTEL, 0x9c8f), board_ahci }, /* Wildcat Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x8c82), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c83), board_ahci }, /* 9 Series AHCI */ + { PCI_VDEVICE(INTEL, 0x8c84), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c85), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c86), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ + { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ + { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ + { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, @@ -441,16 +465,23 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x917a), .driver_data = board_ahci_yes_fbs }, /* 88se9172 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9172), + .driver_data = board_ahci_yes_fbs }, /* 88se9182 */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9182), .driver_data = board_ahci_yes_fbs }, /* 88se9172 */ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9192), .driver_data = board_ahci_yes_fbs }, /* 88se9172 on some Gigabyte */ + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0), + .driver_data = board_ahci_yes_fbs }, { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x91a3), .driver_data = board_ahci_yes_fbs }, { PCI_DEVICE(PCI_VENDOR_ID_MARVELL_EXT, 0x9230), .driver_data = board_ahci_yes_fbs }, + { PCI_DEVICE(PCI_VENDOR_ID_TTI, 0x0642), + .driver_data = board_ahci_yes_fbs }, /* Promise */ { PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */ + { PCI_VDEVICE(PROMISE, 0x3781), board_ahci }, /* FastTrak TX8660 ahci-mode */ /* Asmedia */ { PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci }, /* ASM1060 */ @@ -459,10 +490,11 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(ASMEDIA, 0x0612), board_ahci }, /* ASM1062 */ /* - * Samsung SSDs found on some macbooks. NCQ times out. - * https://bugzilla.kernel.org/show_bug.cgi?id=60731 + * Samsung SSDs found on some macbooks. NCQ times out if MSI is + * enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731 */ - { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_noncq }, + { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi }, + { PCI_VDEVICE(SAMSUNG, 0xa800), board_ahci_nomsi }, /* Enmotus */ { PCI_DEVICE(0x1c44, 0x8000), board_ahci }, @@ -1169,18 +1201,18 @@ int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis) return rc; for (i = 0; i < host->n_ports; i++) { - const char* desc; struct ahci_port_priv *pp = host->ports[i]->private_data; - /* pp is NULL for dummy ports */ - if (pp) - desc = pp->irq_desc; - else - desc = dev_driver_string(host->dev); + /* Do not receive interrupts sent by dummy ports */ + if (!pp) { + disable_irq(irq + i); + continue; + } - rc = devm_request_threaded_irq(host->dev, - irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED, - desc, host->ports[i]); + rc = devm_request_threaded_irq(host->dev, irq + i, + ahci_hw_interrupt, + ahci_thread_fn, IRQF_SHARED, + pp->irq_desc, host->ports[i]); if (rc) goto out_free_irqs; } diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 513ad7e..d2b5cf3 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -340,6 +340,14 @@ static const struct pci_device_id piix_pci_tbl[] = { { 0x8086, 0x0F21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_byt }, /* SATA Controller IDE (Coleto Creek) */ { 0x8086, 0x23a6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (9 Series) */ + { 0x8086, 0x8c88, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, + /* SATA Controller IDE (9 Series) */ + { 0x8086, 0x8c89, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb }, + /* SATA Controller IDE (9 Series) */ + { 0x8086, 0x8c80, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, + /* SATA Controller IDE (9 Series) */ + { 0x8086, 0x8c81, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, { } /* terminate list */ }; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 40c6ef3..4cc5246 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4241,6 +4241,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle queued TRIM commands */ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, { "Crucial_CT???M500SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Micron_M550*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT*M550SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link @@ -4800,6 +4802,10 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) * ata_qc_new - Request an available ATA command, for queueing * @ap: target port * + * Some ATA host controllers may implement a queue depth which is less + * than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond + * the hardware limitation. + * * LOCKING: * None. */ @@ -4807,21 +4813,27 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) { struct ata_queued_cmd *qc = NULL; - unsigned int i; + unsigned int max_queue = ap->host->n_tags; + unsigned int i, tag; /* no command while frozen */ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) return NULL; - /* the last tag is reserved for internal command. */ - for (i = 0; i < ATA_MAX_QUEUE - 1; i++) - if (!test_and_set_bit(i, &ap->qc_allocated)) { - qc = __ata_qc_from_tag(ap, i); + for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) { + tag = tag < max_queue ? tag : 0; + + /* the last tag is reserved for internal command. */ + if (tag == ATA_TAG_INTERNAL) + continue; + + if (!test_and_set_bit(tag, &ap->qc_allocated)) { + qc = __ata_qc_from_tag(ap, tag); + qc->tag = tag; + ap->last_tag = tag; break; } - - if (qc) - qc->tag = i; + } return qc; } @@ -6111,6 +6123,7 @@ void ata_host_init(struct ata_host *host, struct device *dev, { spin_lock_init(&host->lock); mutex_init(&host->eh_mutex); + host->n_tags = ATA_MAX_QUEUE - 1; host->dev = dev; host->ops = ops; } @@ -6192,6 +6205,8 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) { int i, rc; + host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE - 1); + /* host must have been started */ if (!(host->flags & ATA_HOST_STARTED)) { dev_err(host->dev, "BUG: trying to register unstarted host\n"); @@ -6337,6 +6352,8 @@ int ata_host_activate(struct ata_host *host, int irq, static void ata_port_detach(struct ata_port *ap) { unsigned long flags; + struct ata_link *link; + struct ata_device *dev; if (!ap->ops->error_handler) goto skip_eh; @@ -6356,6 +6373,13 @@ static void ata_port_detach(struct ata_port *ap) cancel_delayed_work_sync(&ap->hotplug_task); skip_eh: + /* clean up zpodd on port removal */ + ata_for_each_link(link, ap, HOST_FIRST) { + ata_for_each_dev(dev, link, ALL) { + if (zpodd_dev_enabled(dev)) + zpodd_exit(dev); + } + } if (ap->pmp_link) { int i; for (i = 0; i < SATA_PMP_MAX_PORTS; i++) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 2afbd46..0f9b9a7 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1333,7 +1333,19 @@ void ata_sff_flush_pio_task(struct ata_port *ap) DPRINTK("ENTER\n"); cancel_delayed_work_sync(&ap->sff_pio_task); + + /* + * We wanna reset the HSM state to IDLE. If we do so without + * grabbing the port lock, critical sections protected by it which + * expect the HSM state to stay stable may get surprised. For + * example, we may set IDLE in between the time + * __ata_sff_port_intr() checks for HSM_ST_IDLE and before it calls + * ata_sff_hsm_move() causing ata_sff_hsm_move() to BUG(). + */ + spin_lock_irq(ap->lock); ap->hsm_task_state = HSM_ST_IDLE; + spin_unlock_irq(ap->lock); + ap->sff_pio_task_link = NULL; if (ata_msg_ctl(ap)) @@ -2008,13 +2020,15 @@ static int ata_bus_softreset(struct ata_port *ap, unsigned int devmask, DPRINTK("ata%u: bus reset via SRST\n", ap->print_id); - /* software reset. causes dev0 to be selected */ - iowrite8(ap->ctl, ioaddr->ctl_addr); - udelay(20); /* FIXME: flush */ - iowrite8(ap->ctl | ATA_SRST, ioaddr->ctl_addr); - udelay(20); /* FIXME: flush */ - iowrite8(ap->ctl, ioaddr->ctl_addr); - ap->last_ctl = ap->ctl; + if (ap->ioaddr.ctl_addr) { + /* software reset. causes dev0 to be selected */ + iowrite8(ap->ctl, ioaddr->ctl_addr); + udelay(20); /* FIXME: flush */ + iowrite8(ap->ctl | ATA_SRST, ioaddr->ctl_addr); + udelay(20); /* FIXME: flush */ + iowrite8(ap->ctl, ioaddr->ctl_addr); + ap->last_ctl = ap->ctl; + } /* wait the port to become ready */ return ata_sff_wait_after_reset(&ap->link, devmask, deadline); @@ -2215,10 +2229,6 @@ void ata_sff_error_handler(struct ata_port *ap) spin_unlock_irqrestore(ap->lock, flags); - /* ignore ata_sff_softreset if ctl isn't accessible */ - if (softreset == ata_sff_softreset && !ap->ioaddr.ctl_addr) - softreset = NULL; - /* ignore built-in hardresets if SCR access is not available */ if ((hardreset == sata_std_hardreset || hardreset == sata_sff_hardreset) && !sata_scr_valid(&ap->link)) diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c index d63ee8f..e3a49df 100644 --- a/drivers/ata/pata_at91.c +++ b/drivers/ata/pata_at91.c @@ -408,12 +408,13 @@ static int pata_at91_probe(struct platform_device *pdev) host->private_data = info; - return ata_host_activate(host, gpio_is_valid(irq) ? gpio_to_irq(irq) : 0, - gpio_is_valid(irq) ? ata_sff_interrupt : NULL, - irq_flags, &pata_at91_sht); + ret = ata_host_activate(host, gpio_is_valid(irq) ? gpio_to_irq(irq) : 0, + gpio_is_valid(irq) ? ata_sff_interrupt : NULL, + irq_flags, &pata_at91_sht); + if (ret) + goto err_put; - if (!ret) - return 0; + return 0; err_put: clk_put(info->mck); diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c index f35f15f..f7badaa 100644 --- a/drivers/ata/pata_scc.c +++ b/drivers/ata/pata_scc.c @@ -586,7 +586,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask, * Note: Original code is ata_bus_softreset(). */ -static unsigned int scc_bus_softreset(struct ata_port *ap, unsigned int devmask, +static int scc_bus_softreset(struct ata_port *ap, unsigned int devmask, unsigned long deadline) { struct ata_ioports *ioaddr = &ap->ioaddr; @@ -600,9 +600,7 @@ static unsigned int scc_bus_softreset(struct ata_port *ap, unsigned int devmask, udelay(20); out_be32(ioaddr->ctl_addr, ap->ctl); - scc_wait_after_reset(&ap->link, devmask, deadline); - - return 0; + return scc_wait_after_reset(&ap->link, devmask, deadline); } /** @@ -619,7 +617,8 @@ static int scc_softreset(struct ata_link *link, unsigned int *classes, { struct ata_port *ap = link->ap; unsigned int slave_possible = ap->flags & ATA_FLAG_SLAVE_POSS; - unsigned int devmask = 0, err_mask; + unsigned int devmask = 0; + int rc; u8 err; DPRINTK("ENTER\n"); @@ -635,9 +634,9 @@ static int scc_softreset(struct ata_link *link, unsigned int *classes, /* issue bus reset */ DPRINTK("about to softreset, devmask=%x\n", devmask); - err_mask = scc_bus_softreset(ap, devmask, deadline); - if (err_mask) { - ata_port_err(ap, "SRST failed (err_mask=0x%x)\n", err_mask); + rc = scc_bus_softreset(ap, devmask, deadline); + if (rc) { + ata_port_err(ap, "SRST failed (err_mask=0x%x)\n", rc); return -EIO; } diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c index 96c6a79..79dedba 100644 --- a/drivers/ata/pata_serverworks.c +++ b/drivers/ata/pata_serverworks.c @@ -252,12 +252,18 @@ static void serverworks_set_dmamode(struct ata_port *ap, struct ata_device *adev pci_write_config_byte(pdev, 0x54, ultra_cfg); } -static struct scsi_host_template serverworks_sht = { +static struct scsi_host_template serverworks_osb4_sht = { + ATA_BMDMA_SHT(DRV_NAME), + .sg_tablesize = LIBATA_DUMB_MAX_PRD, +}; + +static struct scsi_host_template serverworks_csb_sht = { ATA_BMDMA_SHT(DRV_NAME), }; static struct ata_port_operations serverworks_osb4_port_ops = { .inherits = &ata_bmdma_port_ops, + .qc_prep = ata_bmdma_dumb_qc_prep, .cable_detect = serverworks_cable_detect, .mode_filter = serverworks_osb4_filter, .set_piomode = serverworks_set_piomode, @@ -266,6 +272,7 @@ static struct ata_port_operations serverworks_osb4_port_ops = { static struct ata_port_operations serverworks_csb_port_ops = { .inherits = &serverworks_osb4_port_ops, + .qc_prep = ata_bmdma_qc_prep, .mode_filter = serverworks_csb_filter, }; @@ -405,6 +412,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id } }; const struct ata_port_info *ppi[] = { &info[id->driver_data], NULL }; + struct scsi_host_template *sht = &serverworks_csb_sht; int rc; rc = pcim_enable_device(pdev); @@ -418,6 +426,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id /* Select non UDMA capable OSB4 if we can't do fixups */ if (rc < 0) ppi[0] = &info[1]; + sht = &serverworks_osb4_sht; } /* setup CSB5/CSB6 : South Bridge and IDE option RAID */ else if ((pdev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) || @@ -434,7 +443,7 @@ static int serverworks_init_one(struct pci_dev *pdev, const struct pci_device_id ppi[1] = &ata_dummy_port_info; } - return ata_pci_bmdma_init_one(pdev, ppi, &serverworks_sht, NULL, 0); + return ata_pci_bmdma_init_one(pdev, ppi, sht, NULL, 0); } #ifdef CONFIG_PM diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 2e39173..776b59f 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -797,7 +797,7 @@ static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq) if (err) { dev_err(host_pvt.dwc_dev, "%s: dma_request_interrupts returns" " %d\n", __func__, err); - goto error_out; + return err; } /* Enabe DMA */ @@ -808,11 +808,6 @@ static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq) sata_dma_regs); return 0; - -error_out: - dma_dwc_exit(hsdev); - - return err; } static int sata_dwc_scr_read(struct ata_link *link, unsigned int scr, u32 *val) @@ -1662,7 +1657,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) char *ver = (char *)&versionr; u8 *base = NULL; int err = 0; - int irq, rc; + int irq; struct ata_host *host; struct ata_port_info pi = sata_dwc_port_info[0]; const struct ata_port_info *ppi[] = { &pi, NULL }; @@ -1725,7 +1720,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) if (irq == NO_IRQ) { dev_err(&ofdev->dev, "no SATA DMA irq\n"); err = -ENODEV; - goto error_out; + goto error_iomap; } /* Get physical SATA DMA register base address */ @@ -1734,14 +1729,16 @@ static int sata_dwc_probe(struct platform_device *ofdev) dev_err(&ofdev->dev, "ioremap failed for AHBDMA register" " address\n"); err = -ENODEV; - goto error_out; + goto error_iomap; } /* Save dev for later use in dev_xxx() routines */ host_pvt.dwc_dev = &ofdev->dev; /* Initialize AHB DMAC */ - dma_dwc_init(hsdev, irq); + err = dma_dwc_init(hsdev, irq); + if (err) + goto error_dma_iomap; /* Enable SATA Interrupts */ sata_dwc_enable_interrupts(hsdev); @@ -1759,9 +1756,8 @@ static int sata_dwc_probe(struct platform_device *ofdev) * device discovery process, invoking our port_start() handler & * error_handler() to execute a dummy Softreset EH session */ - rc = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht); - - if (rc != 0) + err = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht); + if (err) dev_err(&ofdev->dev, "failed to activate host"); dev_set_drvdata(&ofdev->dev, host); @@ -1770,7 +1766,8 @@ static int sata_dwc_probe(struct platform_device *ofdev) error_out: /* Free SATA DMA resources */ dma_dwc_exit(hsdev); - +error_dma_iomap: + iounmap((void __iomem *)host_pvt.sata_dma_regs); error_iomap: iounmap(base); error_kmalloc: @@ -1791,6 +1788,7 @@ static int sata_dwc_remove(struct platform_device *ofdev) /* Free SATA DMA resources */ dma_dwc_exit(hsdev); + iounmap((void __iomem *)host_pvt.sata_dma_regs); iounmap(hsdev->reg_base); kfree(hsdev); kfree(host); diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 1d6d690..6f80159 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1501,7 +1501,7 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->csr_base = csr_base; irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (irq < 0) { + if (!irq) { dev_err(&ofdev->dev, "invalid irq from platform\n"); goto error_exit_with_cleanup; } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 4c289ab..aed92e4 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -243,13 +243,15 @@ static ssize_t store_drivers_probe(struct bus_type *bus, const char *buf, size_t count) { struct device *dev; + int err = -EINVAL; dev = bus_find_device_by_name(bus, NULL, buf); if (!dev) return -ENODEV; - if (bus_rescan_devices_helper(dev, NULL) != 0) - return -EINVAL; - return count; + if (bus_rescan_devices_helper(dev, NULL) == 0) + err = count; + put_device(dev); + return err; } static struct device *next_device(struct klist_iter *i) diff --git a/drivers/base/core.c b/drivers/base/core.c index 34abf4d..944fecd 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -812,12 +812,12 @@ class_dir_create_and_add(struct class *class, struct kobject *parent_kobj) return &dir->kobj; } +static DEFINE_MUTEX(gdp_mutex); static struct kobject *get_device_parent(struct device *dev, struct device *parent) { if (dev->class) { - static DEFINE_MUTEX(gdp_mutex); struct kobject *kobj = NULL; struct kobject *parent_kobj; struct kobject *k; @@ -881,7 +881,9 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) glue_dir->kset != &dev->class->p->glue_dirs) return; + mutex_lock(&gdp_mutex); kobject_put(glue_dir); + mutex_unlock(&gdp_mutex); } static void cleanup_device_parent(struct device *dev) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0605176..8a8d611 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -52,6 +52,7 @@ static DEFINE_MUTEX(deferred_probe_mutex); static LIST_HEAD(deferred_probe_pending_list); static LIST_HEAD(deferred_probe_active_list); static struct workqueue_struct *deferred_wq; +static atomic_t deferred_trigger_count = ATOMIC_INIT(0); /** * deferred_probe_work_func() - Retry probing devices in the active list. @@ -135,6 +136,17 @@ static bool driver_deferred_probe_enable = false; * This functions moves all devices from the pending list to the active * list and schedules the deferred probe workqueue to process them. It * should be called anytime a driver is successfully bound to a device. + * + * Note, there is a race condition in multi-threaded probe. In the case where + * more than one device is probing at the same time, it is possible for one + * probe to complete successfully while another is about to defer. If the second + * depends on the first, then it will get put on the pending list after the + * trigger event has already occured and will be stuck there. + * + * The atomic 'deferred_trigger_count' is used to determine if a successful + * trigger has occurred in the midst of probing a driver. If the trigger count + * changes in the midst of a probe, then deferred processing should be triggered + * again. */ static void driver_deferred_probe_trigger(void) { @@ -147,6 +159,7 @@ static void driver_deferred_probe_trigger(void) * into the active list so they can be retried by the workqueue */ mutex_lock(&deferred_probe_mutex); + atomic_inc(&deferred_trigger_count); list_splice_tail_init(&deferred_probe_pending_list, &deferred_probe_active_list); mutex_unlock(&deferred_probe_mutex); @@ -265,6 +278,7 @@ static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); static int really_probe(struct device *dev, struct device_driver *drv) { int ret = 0; + int local_trigger_count = atomic_read(&deferred_trigger_count); atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", @@ -310,6 +324,9 @@ probe_failed: /* Driver requested deferred probing */ dev_info(dev, "Driver %s requests probe deferral\n", drv->name); driver_deferred_probe_add(dev); + /* Did a trigger occur while probing? Need to re-trigger if yes */ + if (local_trigger_count != atomic_read(&deferred_trigger_count)) + driver_deferred_probe_trigger(); } else if (ret != -ENODEV && ret != -ENXIO) { /* driver matched but the probe failed */ printk(KERN_WARNING diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 99802d6f..e057744 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -155,13 +155,23 @@ static int __init cma_activate_area(struct cma *cma) base_pfn = pfn; for (j = pageblock_nr_pages; j; --j, pfn++) { WARN_ON_ONCE(!pfn_valid(pfn)); + /* + * alloc_contig_range requires the pfn range + * specified to be in the same zone. Make this + * simple by forcing the entire CMA resv range + * to be in the same zone. + */ if (page_zone(pfn_to_page(pfn)) != zone) - return -EINVAL; + goto err; } init_cma_reserved_pageblock(pfn_to_page(base_pfn)); } while (--i); return 0; + +err: + kfree(cma->bitmap); + return -EINVAL; } static struct cma cma_areas[MAX_CMA_AREAS]; diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 701212b..ec85b81 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -1063,6 +1063,9 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (!firmware_p) return -EINVAL; + if (!name || name[0] == '\0') + return -EINVAL; + ret = _request_firmware_prepare(&fw, name, device); if (ret <= 0) /* error or already assigned */ goto out; diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index d6c2d69..8560dca 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -690,7 +690,7 @@ int regcache_sync_block(struct regmap *map, void *block, unsigned int block_base, unsigned int start, unsigned int end) { - if (regmap_can_raw_write(map)) + if (regmap_can_raw_write(map) && !map->use_single_rw) return regcache_sync_block_raw(map, block, cache_present, block_base, start, end); else diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index de11eca..b18c7da 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -464,16 +464,20 @@ void regmap_debugfs_init(struct regmap *map, const char *name) { struct rb_node *next; struct regmap_range_node *range_node; + const char *devname = "dummy"; INIT_LIST_HEAD(&map->debugfs_off_cache); mutex_init(&map->cache_lock); + if (map->dev) + devname = dev_name(map->dev); + if (name) { map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_name(map->dev), name); + devname, name); name = map->debugfs_name; } else { - name = dev_name(map->dev); + name = devname; } map->debugfs = debugfs_create_dir(name, regmap_debugfs_root); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index b4116f2..3007336 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -115,7 +115,7 @@ bool regmap_readable(struct regmap *map, unsigned int reg) bool regmap_volatile(struct regmap *map, unsigned int reg) { - if (!regmap_readable(map, reg)) + if (!map->format.format_write && !regmap_readable(map, reg)) return false; if (map->volatile_reg) @@ -1390,7 +1390,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, } #ifdef LOG_DEVICE - if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0) + if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0) dev_info(map->dev, "%x <= %x\n", reg, val); #endif @@ -1512,6 +1512,11 @@ int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val, if (val_bytes == 1) { wval = (void *)val; } else { + if (!val_count) { + ret = -EINVAL; + goto out; + } + wval = kmemdup(val, val_count * val_bytes, GFP_KERNEL); if (!wval) { ret = -ENOMEM; @@ -1666,7 +1671,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg, ret = map->reg_read(context, reg, val); if (ret == 0) { #ifdef LOG_DEVICE - if (strcmp(dev_name(map->dev), LOG_DEVICE) == 0) + if (map->dev && strcmp(dev_name(map->dev), LOG_DEVICE) == 0) dev_info(map->dev, "%x => %x\n", reg, *val); #endif diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c index 89c497c..04a14e0 100644 --- a/drivers/block/drbd/drbd_interval.c +++ b/drivers/block/drbd/drbd_interval.c @@ -79,6 +79,7 @@ bool drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) { struct rb_node **new = &root->rb_node, *parent = NULL; + sector_t this_end = this->sector + (this->size >> 9); BUG_ON(!IS_ALIGNED(this->size, 512)); @@ -87,6 +88,8 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) rb_entry(*new, struct drbd_interval, rb); parent = *new; + if (here->end < this_end) + here->end = this_end; if (this->sector < here->sector) new = &(*new)->rb_left; else if (this->sector > here->sector) @@ -99,6 +102,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this) return false; } + this->end = this_end; rb_link_node(&this->rb, parent, new); rb_insert_augmented(&this->rb, root, &augment_callbacks); return true; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 8cc1e64..5369baf 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -525,6 +525,12 @@ void conn_try_outdate_peer_async(struct drbd_tconn *tconn) struct task_struct *opa; kref_get(&tconn->kref); + /* We may just have force_sig()'ed this thread + * to get it out of some blocking network function. + * Clear signals; otherwise kthread_run(), which internally uses + * wait_on_completion_killable(), will mistake our pending signal + * for a new fatal signal and fail. */ + flush_signals(current); opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h"); if (IS_ERR(opa)) { conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n"); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c24379f..b2ae184 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1309,6 +1309,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) { + bvm->bi_bdev = mdev->ldev->backing_bdev; backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 690011d..f0bbdec 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3053,7 +3053,10 @@ static int raw_cmd_copyout(int cmd, void __user *param, int ret; while (ptr) { - ret = copy_to_user(param, ptr, sizeof(*ptr)); + struct floppy_raw_cmd cmd = *ptr; + cmd.next = NULL; + cmd.kernel_data = NULL; + ret = copy_to_user(param, &cmd, sizeof(cmd)); if (ret) return -EFAULT; param += sizeof(struct floppy_raw_cmd); @@ -3107,10 +3110,11 @@ loop: return -ENOMEM; *rcmd = ptr; ret = copy_from_user(ptr, param, sizeof(*ptr)); - if (ret) - return -EFAULT; ptr->next = NULL; ptr->buffer_length = 0; + ptr->kernel_data = NULL; + if (ret) + return -EFAULT; param += sizeof(struct floppy_raw_cmd); if (ptr->cmd_count > 33) /* the command may now also take up the space @@ -3126,7 +3130,6 @@ loop: for (i = 0; i < 16; i++) ptr->reply[i] = 0; ptr->resultcode = 0; - ptr->kernel_data = NULL; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { if (ptr->length <= 0) @@ -3795,7 +3798,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_size = size; bio.bi_bdev = bdev; bio.bi_sector = 0; - bio.bi_flags = (1 << BIO_QUIET); + bio.bi_flags |= (1 << BIO_QUIET); bio.bi_private = &cbdata; bio.bi_end_io = floppy_rb0_cb; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 952dbfe..560227b 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -621,6 +621,12 @@ static void mtip_timeout_function(unsigned long int data) */ writel(1 << bit, port->completed[group]); + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&port->dd->pdev->dev, + command->sg, + command->scatter_ents, + command->direction); + /* Call the async completion callback. */ if (likely(command->async_callback)) command->async_callback(command->async_data, @@ -628,12 +634,6 @@ static void mtip_timeout_function(unsigned long int data) command->async_callback = NULL; command->comp_func = NULL; - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&port->dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); - /* * Clear the allocated bit and active tag for the * command. @@ -711,6 +711,12 @@ static void mtip_async_complete(struct mtip_port *port, "Command tag %d failed due to TFE\n", tag); } + /* Unmap the DMA scatter list entries */ + dma_unmap_sg(&dd->pdev->dev, + command->sg, + command->scatter_ents, + command->direction); + /* Upper layer callback */ if (likely(command->async_callback)) command->async_callback(command->async_data, cb_status); @@ -718,12 +724,6 @@ static void mtip_async_complete(struct mtip_port *port, command->async_callback = NULL; command->comp_func = NULL; - /* Unmap the DMA scatter list entries */ - dma_unmap_sg(&dd->pdev->dev, - command->sg, - command->scatter_ents, - command->direction); - /* Clear the allocated and active bits for the command */ atomic_set(&port->commands[tag].active, 0); release_slot(port, tag); @@ -1493,6 +1493,37 @@ static inline void ata_swap_string(u16 *buf, unsigned int len) be16_to_cpus(&buf[i]); } +static void mtip_set_timeout(struct driver_data *dd, + struct host_to_dev_fis *fis, + unsigned int *timeout, u8 erasemode) +{ + switch (fis->command) { + case ATA_CMD_DOWNLOAD_MICRO: + *timeout = 120000; /* 2 minutes */ + break; + case ATA_CMD_SEC_ERASE_UNIT: + case 0xFC: + if (erasemode) + *timeout = ((*(dd->port->identify + 90) * 2) * 60000); + else + *timeout = ((*(dd->port->identify + 89) * 2) * 60000); + break; + case ATA_CMD_STANDBYNOW1: + *timeout = 120000; /* 2 minutes */ + break; + case 0xF7: + case 0xFA: + *timeout = 60000; /* 60 seconds */ + break; + case ATA_CMD_SMART: + *timeout = 15000; /* 15 seconds */ + break; + default: + *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; + break; + } +} + /* * Request the device identity information. * @@ -1602,6 +1633,7 @@ static int mtip_standby_immediate(struct mtip_port *port) int rv; struct host_to_dev_fis fis; unsigned long start; + unsigned int timeout; /* Build the FIS. */ memset(&fis, 0, sizeof(struct host_to_dev_fis)); @@ -1609,6 +1641,8 @@ static int mtip_standby_immediate(struct mtip_port *port) fis.opts = 1 << 7; fis.command = ATA_CMD_STANDBYNOW1; + mtip_set_timeout(port->dd, &fis, &timeout, 0); + start = jiffies; rv = mtip_exec_internal_command(port, &fis, @@ -1617,7 +1651,7 @@ static int mtip_standby_immediate(struct mtip_port *port) 0, 0, GFP_ATOMIC, - 15000); + timeout); dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", jiffies_to_msecs(jiffies - start)); if (rv) @@ -2156,36 +2190,6 @@ static unsigned int implicit_sector(unsigned char command, } return rv; } -static void mtip_set_timeout(struct driver_data *dd, - struct host_to_dev_fis *fis, - unsigned int *timeout, u8 erasemode) -{ - switch (fis->command) { - case ATA_CMD_DOWNLOAD_MICRO: - *timeout = 120000; /* 2 minutes */ - break; - case ATA_CMD_SEC_ERASE_UNIT: - case 0xFC: - if (erasemode) - *timeout = ((*(dd->port->identify + 90) * 2) * 60000); - else - *timeout = ((*(dd->port->identify + 89) * 2) * 60000); - break; - case ATA_CMD_STANDBYNOW1: - *timeout = 120000; /* 2 minutes */ - break; - case 0xF7: - case 0xFA: - *timeout = 60000; /* 60 seconds */ - break; - case ATA_CMD_SMART: - *timeout = 15000; /* 15 seconds */ - break; - default: - *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; - break; - } -} /* * Executes a taskfile @@ -4040,6 +4044,7 @@ skip_create_disk: blk_queue_max_hw_sectors(dd->queue, 0xffff); blk_queue_max_segment_size(dd->queue, 0x400000); blk_queue_io_min(dd->queue, 4096); + blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask); /* * write back cache is not supported in the device. FUA depends on @@ -4284,6 +4289,57 @@ static DEFINE_HANDLER(5); static DEFINE_HANDLER(6); static DEFINE_HANDLER(7); +static void mtip_disable_link_opts(struct driver_data *dd, struct pci_dev *pdev) +{ + int pos; + unsigned short pcie_dev_ctrl; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pos) { + pci_read_config_word(pdev, + pos + PCI_EXP_DEVCTL, + &pcie_dev_ctrl); + if (pcie_dev_ctrl & (1 << 11) || + pcie_dev_ctrl & (1 << 4)) { + dev_info(&dd->pdev->dev, + "Disabling ERO/No-Snoop on bridge device %04x:%04x\n", + pdev->vendor, pdev->device); + pcie_dev_ctrl &= ~(PCI_EXP_DEVCTL_NOSNOOP_EN | + PCI_EXP_DEVCTL_RELAX_EN); + pci_write_config_word(pdev, + pos + PCI_EXP_DEVCTL, + pcie_dev_ctrl); + } + } +} + +static void mtip_fix_ero_nosnoop(struct driver_data *dd, struct pci_dev *pdev) +{ + /* + * This workaround is specific to AMD/ATI chipset with a PCI upstream + * device with device id 0x5aXX + */ + if (pdev->bus && pdev->bus->self) { + if (pdev->bus->self->vendor == PCI_VENDOR_ID_ATI && + ((pdev->bus->self->device & 0xff00) == 0x5a00)) { + mtip_disable_link_opts(dd, pdev->bus->self); + } else { + /* Check further up the topology */ + struct pci_dev *parent_dev = pdev->bus->self; + if (parent_dev->bus && + parent_dev->bus->parent && + parent_dev->bus->parent->self && + parent_dev->bus->parent->self->vendor == + PCI_VENDOR_ID_ATI && + (parent_dev->bus->parent->self->device & + 0xff00) == 0x5a00) { + mtip_disable_link_opts(dd, + parent_dev->bus->parent->self); + } + } + } +} + /* * Called for each supported PCI device detected. * @@ -4435,6 +4491,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, goto block_initialize_err; } + mtip_fix_ero_nosnoop(dd, pdev); + /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4727,13 +4785,13 @@ static int __init mtip_init(void) */ static void __exit mtip_exit(void) { - debugfs_remove_recursive(dfs_parent); - /* Release the allocated major block device number. */ unregister_blkdev(mtip_major, MTIP_DRV_NAME); /* Unregister the PCI driver. */ pci_unregister_driver(&mtip_pci_driver); + + debugfs_remove_recursive(dfs_parent); } MODULE_AUTHOR("Micron Technology, Inc"); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index cb1db29..a868418 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1379,6 +1379,14 @@ static bool obj_request_exists_test(struct rbd_obj_request *obj_request) return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0; } +static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request) +{ + struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; + + return obj_request->img_offset < + round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header)); +} + static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { dout("%s: obj %p (was %d)\n", __func__, obj_request, @@ -1395,6 +1403,13 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) kref_put(&obj_request->kref, rbd_obj_request_destroy); } +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + static bool img_request_child_test(struct rbd_img_request *img_request); static void rbd_parent_request_destroy(struct kref *kref); static void rbd_img_request_destroy(struct kref *kref); @@ -2148,6 +2163,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) img_request->next_completion = which; out: spin_unlock_irq(&img_request->completion_lock); + rbd_img_request_put(img_request); if (!more) rbd_img_request_complete(img_request); @@ -2244,6 +2260,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, goto out_partial; obj_request->osd_req = osd_req; obj_request->callback = rbd_img_obj_callback; + rbd_img_request_get(img_request); osd_req_op_extent_init(osd_req, 0, opcode, offset, length, 0, 0); @@ -2272,7 +2289,7 @@ out_partial: rbd_obj_request_put(obj_request); out_unwind: for_each_obj_request_safe(img_request, obj_request, next_obj_request) - rbd_obj_request_put(obj_request); + rbd_img_obj_request_del(img_request, obj_request); return -ENOMEM; } @@ -2666,7 +2683,7 @@ static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) */ if (!img_request_write_test(img_request) || !img_request_layered_test(img_request) || - rbd_dev->parent_overlap <= obj_request->img_offset || + !obj_request_overlaps_parent(obj_request) || ((known = obj_request_known_test(obj_request)) && obj_request_exists_test(obj_request))) { @@ -3203,7 +3220,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, page_count = (u32) calc_pages_for(offset, length); pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); if (IS_ERR(pages)) - ret = PTR_ERR(pages); + return PTR_ERR(pages); ret = -ENOMEM; obj_request = rbd_obj_request_create(object_name, offset, length, diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5814deb..0ebadf9 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -9,6 +9,7 @@ #include <linux/blkdev.h> #include <linux/hdreg.h> #include <linux/genhd.h> +#include <linux/cdrom.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -22,8 +23,8 @@ #define DRV_MODULE_NAME "sunvdc" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.0" -#define DRV_MODULE_RELDATE "June 25, 2007" +#define DRV_MODULE_VERSION "1.1" +#define DRV_MODULE_RELDATE "February 13, 2013" static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("Sun LDOM virtual disk client driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); -#define VDC_TX_RING_SIZE 256 +#define VDC_TX_RING_SIZE 512 #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 @@ -65,11 +66,9 @@ struct vdc_port { u64 operations; u32 vdisk_size; u8 vdisk_type; + u8 vdisk_mtype; char disk_name[32]; - - struct vio_disk_geom geom; - struct vio_disk_vtoc label; }; static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) @@ -79,9 +78,16 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) /* Ordered from largest major to lowest */ static struct vio_version vdc_versions[] = { + { .major = 1, .minor = 1 }, { .major = 1, .minor = 0 }, }; +static inline int vdc_version_supported(struct vdc_port *port, + u16 major, u16 minor) +{ + return port->vio.ver.major == major && port->vio.ver.minor >= minor; +} + #define VDCBLK_NAME "vdisk" static int vdc_major; #define PARTITION_SHIFT 3 @@ -94,18 +100,54 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct gendisk *disk = bdev->bd_disk; - struct vdc_port *port = disk->private_data; + sector_t nsect = get_capacity(disk); + sector_t cylinders = nsect; - geo->heads = (u8) port->geom.num_hd; - geo->sectors = (u8) port->geom.num_sec; - geo->cylinders = port->geom.num_cyl; + geo->heads = 0xff; + geo->sectors = 0x3f; + sector_div(cylinders, geo->heads * geo->sectors); + geo->cylinders = cylinders; + if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect) + geo->cylinders = 0xffff; return 0; } +/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev + * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD. + * Needed to be able to install inside an ldom from an iso image. + */ +static int vdc_ioctl(struct block_device *bdev, fmode_t mode, + unsigned command, unsigned long argument) +{ + int i; + struct gendisk *disk; + + switch (command) { + case CDROMMULTISESSION: + pr_debug(PFX "Multisession CDs not supported\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; + + case CDROM_GET_CAPABILITY: + disk = bdev->bd_disk; + + if (bdev->bd_disk && (disk->flags & GENHD_FL_CD)) + return 0; + return -EINVAL; + + default: + pr_debug(PFX "ioctl %08x not supported\n", command); + return -EINVAL; + } +} + static const struct block_device_operations vdc_fops = { .owner = THIS_MODULE, .getgeo = vdc_getgeo, + .ioctl = vdc_ioctl, }; static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) @@ -165,9 +207,9 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) struct vio_disk_attr_info *pkt = arg; viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] " - "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", + "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", pkt->tag.stype, pkt->operations, - pkt->vdisk_size, pkt->vdisk_type, + pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype, pkt->xfer_mode, pkt->vdisk_block_size, pkt->max_xfer_size); @@ -192,8 +234,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) } port->operations = pkt->operations; - port->vdisk_size = pkt->vdisk_size; port->vdisk_type = pkt->vdisk_type; + if (vdc_version_supported(port, 1, 1)) { + port->vdisk_size = pkt->vdisk_size; + port->vdisk_mtype = pkt->vdisk_mtype; + } if (pkt->max_xfer_size < port->max_xfer_size) port->max_xfer_size = pkt->max_xfer_size; port->vdisk_block_size = pkt->vdisk_block_size; @@ -236,7 +281,9 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); - if (blk_queue_stopped(port->disk->queue)) + /* restart blk queue when ring is half emptied */ + if (blk_queue_stopped(port->disk->queue) && + vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) blk_start_queue(port->disk->queue); } @@ -388,12 +435,6 @@ static int __send_request(struct request *req) for (i = 0; i < nsg; i++) len += sg[i].length; - if (unlikely(vdc_tx_dring_avail(dr) < 1)) { - blk_stop_queue(port->disk->queue); - err = -ENOMEM; - goto out; - } - desc = vio_dring_cur(dr); err = ldc_map_sg(port->vio.lp, sg, nsg, @@ -433,21 +474,32 @@ static int __send_request(struct request *req) port->req_id++; dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1); } -out: return err; } -static void do_vdc_request(struct request_queue *q) +static void do_vdc_request(struct request_queue *rq) { - while (1) { - struct request *req = blk_fetch_request(q); + struct request *req; - if (!req) - break; + while ((req = blk_peek_request(rq)) != NULL) { + struct vdc_port *port; + struct vio_dring_state *dr; - if (__send_request(req) < 0) - __blk_end_request_all(req, -EIO); + port = req->rq_disk->private_data; + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (unlikely(vdc_tx_dring_avail(dr) < 1)) + goto wait; + + blk_start_request(req); + + if (__send_request(req) < 0) { + blk_requeue_request(rq, req); +wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } } } @@ -656,25 +708,27 @@ static int probe_disk(struct vdc_port *port) if (comp.err) return comp.err; - err = generic_request(port, VD_OP_GET_VTOC, - &port->label, sizeof(port->label)); - if (err < 0) { - printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err); - return err; - } - - err = generic_request(port, VD_OP_GET_DISKGEOM, - &port->geom, sizeof(port->geom)); - if (err < 0) { - printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " - "error %d\n", err); - return err; + if (vdc_version_supported(port, 1, 1)) { + /* vdisk_size should be set during the handshake, if it wasn't + * then the underlying disk is reserved by another system + */ + if (port->vdisk_size == -1) + return -ENODEV; + } else { + struct vio_disk_geom geom; + + err = generic_request(port, VD_OP_GET_DISKGEOM, + &geom, sizeof(geom)); + if (err < 0) { + printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " + "error %d\n", err); + return err; + } + port->vdisk_size = ((u64)geom.num_cyl * + (u64)geom.num_hd * + (u64)geom.num_sec); } - port->vdisk_size = ((u64)port->geom.num_cyl * - (u64)port->geom.num_hd * - (u64)port->geom.num_sec); - q = blk_init_queue(do_vdc_request, &port->vio.lock); if (!q) { printk(KERN_ERR PFX "%s: Could not allocate queue.\n", @@ -691,6 +745,10 @@ static int probe_disk(struct vdc_port *port) port->disk = g; + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(q, PAGE_SIZE - 1); + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_max_segments(q, port->ring_cookies); blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; @@ -704,9 +762,32 @@ static int probe_disk(struct vdc_port *port) set_capacity(g, port->vdisk_size); - printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n", + if (vdc_version_supported(port, 1, 1)) { + switch (port->vdisk_mtype) { + case VD_MEDIA_TYPE_CD: + pr_info(PFX "Virtual CDROM %s\n", port->disk_name); + g->flags |= GENHD_FL_CD; + g->flags |= GENHD_FL_REMOVABLE; + set_disk_ro(g, 1); + break; + + case VD_MEDIA_TYPE_DVD: + pr_info(PFX "Virtual DVD %s\n", port->disk_name); + g->flags |= GENHD_FL_CD; + g->flags |= GENHD_FL_REMOVABLE; + set_disk_ro(g, 1); + break; + + case VD_MEDIA_TYPE_FIXED: + pr_info(PFX "Virtual Hard disk %s\n", port->disk_name); + break; + } + } + + pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, - port->vdisk_size, (port->vdisk_size >> (20 - 9))); + port->vdisk_size, (port->vdisk_size >> (20 - 9)), + port->vio.ver.major, port->vio.ver.minor); add_disk(g); @@ -765,6 +846,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) else snprintf(port->disk_name, sizeof(port->disk_name), VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26)); + port->vdisk_size = -1; err = vio_driver_init(&port->vio, vdev, VDEV_DISK, vdc_versions, ARRAY_SIZE(vdc_versions), diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 6620b73..6beaaf8 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -755,6 +755,7 @@ again: BUG_ON(new_map_idx >= segs_to_map); if (unlikely(map[new_map_idx].status != 0)) { pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); + put_free_pages(blkif, &pages[seg_idx]->page, 1); pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; ret |= 1; goto next; diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 0a327f4..1685b3c 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/firmware.h> #include <linux/usb.h> +#include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> #define VERSION "1.0" @@ -50,59 +51,68 @@ #define ATH3K_NAME_LEN 0xFF struct ath3k_version { - unsigned int rom_version; - unsigned int build_version; - unsigned int ram_version; - unsigned char ref_clock; - unsigned char reserved[0x07]; -}; + __le32 rom_version; + __le32 build_version; + __le32 ram_version; + __u8 ref_clock; + __u8 reserved[7]; +} __packed; static struct usb_device_id ath3k_table[] = { /* Atheros AR3011 */ { USB_DEVICE(0x0CF3, 0x3000) }, /* Atheros AR3011 with sflash firmware*/ + { USB_DEVICE(0x0489, 0xE027) }, + { USB_DEVICE(0x0489, 0xE03D) }, + { USB_DEVICE(0x0930, 0x0215) }, { USB_DEVICE(0x0CF3, 0x3002) }, { USB_DEVICE(0x0CF3, 0xE019) }, { USB_DEVICE(0x13d3, 0x3304) }, - { USB_DEVICE(0x0930, 0x0215) }, - { USB_DEVICE(0x0489, 0xE03D) }, - { USB_DEVICE(0x0489, 0xE027) }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03F0, 0x311D) }, /* Atheros AR3012 with sflash firmware*/ + { USB_DEVICE(0x0489, 0xe04d) }, + { USB_DEVICE(0x0489, 0xe04e) }, + { USB_DEVICE(0x0489, 0xe057) }, + { USB_DEVICE(0x0489, 0xe056) }, + { USB_DEVICE(0x0489, 0xe05f) }, + { USB_DEVICE(0x04c5, 0x1330) }, + { USB_DEVICE(0x04CA, 0x3004) }, + { USB_DEVICE(0x04CA, 0x3005) }, + { USB_DEVICE(0x04CA, 0x3006) }, + { USB_DEVICE(0x04CA, 0x3007) }, + { USB_DEVICE(0x04CA, 0x3008) }, + { USB_DEVICE(0x04CA, 0x300b) }, + { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x0220) }, + { USB_DEVICE(0x0930, 0x0227) }, + { USB_DEVICE(0x0b05, 0x17d0) }, { USB_DEVICE(0x0CF3, 0x0036) }, { USB_DEVICE(0x0CF3, 0x3004) }, { USB_DEVICE(0x0CF3, 0x3008) }, { USB_DEVICE(0x0CF3, 0x311D) }, + { USB_DEVICE(0x0CF3, 0x311E) }, + { USB_DEVICE(0x0CF3, 0x311F) }, + { USB_DEVICE(0x0cf3, 0x3121) }, { USB_DEVICE(0x0CF3, 0x817a) }, - { USB_DEVICE(0x13d3, 0x3375) }, - { USB_DEVICE(0x04CA, 0x3004) }, - { USB_DEVICE(0x04CA, 0x3005) }, - { USB_DEVICE(0x04CA, 0x3006) }, - { USB_DEVICE(0x04CA, 0x3008) }, - { USB_DEVICE(0x13d3, 0x3362) }, + { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, - { USB_DEVICE(0x0930, 0x0219) }, - { USB_DEVICE(0x0489, 0xe057) }, + { USB_DEVICE(0x13d3, 0x3362) }, + { USB_DEVICE(0x13d3, 0x3375) }, { USB_DEVICE(0x13d3, 0x3393) }, - { USB_DEVICE(0x0489, 0xe04e) }, - { USB_DEVICE(0x0489, 0xe056) }, - { USB_DEVICE(0x0489, 0xe04d) }, - { USB_DEVICE(0x04c5, 0x1330) }, { USB_DEVICE(0x13d3, 0x3402) }, - { USB_DEVICE(0x0cf3, 0x3121) }, - { USB_DEVICE(0x0cf3, 0xe003) }, + { USB_DEVICE(0x13d3, 0x3432) }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xE02C) }, /* Atheros AR5BBU22 with sflash firmware */ - { USB_DEVICE(0x0489, 0xE03C) }, { USB_DEVICE(0x0489, 0xE036) }, + { USB_DEVICE(0x0489, 0xE03C) }, { } /* Terminating entry */ }; @@ -115,33 +125,42 @@ MODULE_DEVICE_TABLE(usb, ath3k_table); static struct usb_device_id ath3k_blist_tbl[] = { /* Atheros AR3012 with sflash firmware*/ + { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x0036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311D), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311E), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU22 with sflash firmware */ - { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xE036), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 }, { } /* Terminating entry */ }; @@ -333,7 +352,8 @@ static int ath3k_load_patch(struct usb_device *udev) unsigned char fw_state; char filename[ATH3K_NAME_LEN] = {0}; const struct firmware *firmware; - struct ath3k_version fw_version, pt_version; + struct ath3k_version fw_version; + __u32 pt_rom_version, pt_build_version; int ret; ret = ath3k_get_state(udev, &fw_state); @@ -354,7 +374,7 @@ static int ath3k_load_patch(struct usb_device *udev) } snprintf(filename, ATH3K_NAME_LEN, "ar3k/AthrBT_0x%08x.dfu", - fw_version.rom_version); + le32_to_cpu(fw_version.rom_version)); ret = request_firmware(&firmware, filename, &udev->dev); if (ret < 0) { @@ -362,12 +382,13 @@ static int ath3k_load_patch(struct usb_device *udev) return ret; } - pt_version.rom_version = *(int *)(firmware->data + firmware->size - 8); - pt_version.build_version = *(int *) - (firmware->data + firmware->size - 4); + pt_rom_version = get_unaligned_le32(firmware->data + + firmware->size - 8); + pt_build_version = get_unaligned_le32(firmware->data + + firmware->size - 4); - if ((pt_version.rom_version != fw_version.rom_version) || - (pt_version.build_version <= fw_version.build_version)) { + if (pt_rom_version != le32_to_cpu(fw_version.rom_version) || + pt_build_version <= le32_to_cpu(fw_version.build_version)) { BT_ERR("Patch file version did not match with firmware"); release_firmware(firmware); return -EINVAL; @@ -416,7 +437,7 @@ static int ath3k_load_syscfg(struct usb_device *udev) } snprintf(filename, ATH3K_NAME_LEN, "ar3k/ramps_0x%08x_%d%s", - fw_version.rom_version, clk_value, ".dfu"); + le32_to_cpu(fw_version.rom_version), clk_value, ".dfu"); ret = request_firmware(&firmware, filename, &udev->dev); if (ret < 0) { diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index 9a9f518..5592b71 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -628,12 +628,17 @@ struct btmrvl_private *btmrvl_add_card(void *card) init_waitqueue_head(&priv->main_thread.wait_q); priv->main_thread.task = kthread_run(btmrvl_service_main_thread, &priv->main_thread, "btmrvl_main_service"); + if (IS_ERR(priv->main_thread.task)) + goto err_thread; priv->btmrvl_dev.card = card; priv->btmrvl_dev.tx_dnld_rdy = true; return priv; +err_thread: + btmrvl_free_adapter(priv); + err_adapter: kfree(priv); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6e30356..64f1915 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -49,6 +49,7 @@ static struct usb_driver btusb_driver; #define BTUSB_WRONG_SCO_MTU 0x40 #define BTUSB_ATH3012 0x80 #define BTUSB_INTEL 0x100 +#define BTUSB_INTEL_BOOT 0x200 static struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -101,21 +102,31 @@ static struct usb_device_id btusb_table[] = { { USB_DEVICE(0x0c10, 0x0000) }, /* Broadcom BCM20702A0 */ + { USB_DEVICE(0x0489, 0xe042) }, + { USB_DEVICE(0x04ca, 0x2003) }, { USB_DEVICE(0x0b05, 0x17b5) }, { USB_DEVICE(0x0b05, 0x17cb) }, - { USB_DEVICE(0x04ca, 0x2003) }, - { USB_DEVICE(0x0489, 0xe042) }, { USB_DEVICE(0x413c, 0x8197) }, /* Foxconn - Hon Hai */ { USB_VENDOR_AND_INTERFACE_INFO(0x0489, 0xff, 0x01, 0x01) }, - /*Broadcom devices with vendor specific id */ + /* Broadcom devices with vendor specific id */ { USB_VENDOR_AND_INTERFACE_INFO(0x0a5c, 0xff, 0x01, 0x01) }, + /* ASUSTek Computer - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0b05, 0xff, 0x01, 0x01) }, + /* Belkin F8065bf - Broadcom based */ { USB_VENDOR_AND_INTERFACE_INFO(0x050d, 0xff, 0x01, 0x01) }, + /* IMC Networks - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01) }, + + /* Intel Bluetooth USB Bootloader (RAM module) */ + { USB_DEVICE(0x8087, 0x0a5a), + .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, + { } /* Terminating entry */ }; @@ -129,52 +140,61 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0a5c, 0x2033), .driver_info = BTUSB_IGNORE }, /* Atheros 3011 with sflash firmware */ + { USB_DEVICE(0x0489, 0xe027), .driver_info = BTUSB_IGNORE }, + { USB_DEVICE(0x0489, 0xe03d), .driver_info = BTUSB_IGNORE }, + { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE }, { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE }, { USB_DEVICE(0x0cf3, 0xe019), .driver_info = BTUSB_IGNORE }, { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE }, - { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE }, - { USB_DEVICE(0x0489, 0xe03d), .driver_info = BTUSB_IGNORE }, - { USB_DEVICE(0x0489, 0xe027), .driver_info = BTUSB_IGNORE }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE }, /* Atheros 3012 with sflash firmware */ + { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe05f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3004), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3005), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3006), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe057), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, - { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x13d3, 0x3432), .driver_info = BTUSB_ATH3012 }, /* Atheros AR5BBU12 with sflash firmware */ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE }, /* Atheros AR5BBU12 with sflash firmware */ - { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0489, 0xe036), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* Broadcom BCM2035 */ - { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU }, - { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU }, { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, + { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU }, /* Broadcom BCM2045 */ { USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_WRONG_SCO_MTU }, @@ -305,6 +325,9 @@ static void btusb_intr_complete(struct urb *urb) BT_ERR("%s corrupted event packet", hdev->name); hdev->stat.err_rx++; } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_INTR_RUNNING, &data->flags)) @@ -393,6 +416,9 @@ static void btusb_bulk_complete(struct urb *urb) BT_ERR("%s corrupted ACL packet", hdev->name); hdev->stat.err_rx++; } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_BULK_RUNNING, &data->flags)) @@ -487,6 +513,9 @@ static void btusb_isoc_complete(struct urb *urb) hdev->stat.err_rx++; } } + } else if (urb->status == -ENOENT) { + /* Avoid suspend failed when usb_kill_urb */ + return; } if (!test_bit(BTUSB_ISOC_RUNNING, &data->flags)) @@ -1438,6 +1467,9 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_INTEL) hdev->setup = btusb_setup_intel; + if (id->driver_info & BTUSB_INTEL_BOOT) + set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + /* Interface numbers are hardcoded in the specification */ data->isoc = usb_ifnum_to_if(data->udev, 1); @@ -1475,6 +1507,18 @@ static int btusb_probe(struct usb_interface *intf, data->isoc = NULL; } + if (id->driver_info & BTUSB_INTEL_BOOT) { + /* A bug in the bootloader causes that interrupt interface is + * only enabled after receiving SetInterface(0, AltSetting=0). + */ + err = usb_set_interface(data->udev, 0, 0); + if (err < 0) { + BT_ERR("failed to set interface 0, alt 0 %d", err); + hci_free_dev(hdev); + return err; + } + } + if (data->isoc) { err = usb_driver_claim_interface(&btusb_driver, data->isoc, data); diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index b6154d5..db35c54 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -237,7 +237,7 @@ static void h5_pkt_cull(struct h5 *h5) break; to_remove--; - seq = (seq - 1) % 8; + seq = (seq - 1) & 0x07; } if (seq != h5->rx_ack) @@ -406,6 +406,7 @@ static int h5_rx_3wire_hdr(struct hci_uart *hu, unsigned char c) H5_HDR_PKT_TYPE(hdr) != HCI_3WIRE_LINK_PKT) { BT_ERR("Non-link packet received in non-active state"); h5_reset_rx(h5); + return 0; } h5->rx_func = h5_rx_payload; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index bc68a44..c4d2f0e 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -118,10 +118,6 @@ static inline struct sk_buff *hci_uart_dequeue(struct hci_uart *hu) int hci_uart_tx_wakeup(struct hci_uart *hu) { - struct tty_struct *tty = hu->tty; - struct hci_dev *hdev = hu->hdev; - struct sk_buff *skb; - if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) { set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); return 0; @@ -129,6 +125,22 @@ int hci_uart_tx_wakeup(struct hci_uart *hu) BT_DBG(""); + schedule_work(&hu->write_work); + + return 0; +} + +static void hci_uart_write_work(struct work_struct *work) +{ + struct hci_uart *hu = container_of(work, struct hci_uart, write_work); + struct tty_struct *tty = hu->tty; + struct hci_dev *hdev = hu->hdev; + struct sk_buff *skb; + + /* REVISIT: should we cope with bad skbs or ->write() returning + * and error value ? + */ + restart: clear_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); @@ -153,7 +165,6 @@ restart: goto restart; clear_bit(HCI_UART_SENDING, &hu->tx_state); - return 0; } static void hci_uart_init_work(struct work_struct *work) @@ -289,6 +300,7 @@ static int hci_uart_tty_open(struct tty_struct *tty) tty->receive_room = 65536; INIT_WORK(&hu->init_ready, hci_uart_init_work); + INIT_WORK(&hu->write_work, hci_uart_write_work); spin_lock_init(&hu->rx_lock); @@ -326,6 +338,8 @@ static void hci_uart_tty_close(struct tty_struct *tty) if (hdev) hci_uart_close(hdev); + cancel_work_sync(&hu->write_work); + if (test_and_clear_bit(HCI_UART_PROTO_SET, &hu->flags)) { if (hdev) { if (test_bit(HCI_UART_REGISTERED, &hu->flags)) diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index fffa61f..12df101 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -68,6 +68,7 @@ struct hci_uart { unsigned long hdev_flags; struct work_struct init_ready; + struct work_struct write_work; struct hci_uart_proto *proto; void *priv; diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index 2394e97..b50c5e3 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -181,12 +181,25 @@ static void mvebu_mbus_disable_window(struct mvebu_mbus_state *mbus, } /* Checks whether the given window number is available */ + +/* On Armada XP, 375 and 38x the MBus window 13 has the remap + * capability, like windows 0 to 7. However, the mvebu-mbus driver + * isn't currently taking into account this special case, which means + * that when window 13 is actually used, the remap registers are left + * to 0, making the device using this MBus window unavailable. The + * quick fix for stable is to not use window 13. A follow up patch + * will correctly handle this window. +*/ static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus, const int win) { void __iomem *addr = mbus->mbuswins_base + mbus->soc->win_cfg_offset(win); u32 ctrl = readl(addr + WIN_CTRL_OFF); + + if (win == 13) + return false; + return !(ctrl & WIN_CTRL_ENABLE); } @@ -222,12 +235,6 @@ static int mvebu_mbus_window_conflicts(struct mvebu_mbus_state *mbus, */ if ((u64)base < wend && end > wbase) return 0; - - /* - * Check if target/attribute conflicts - */ - if (target == wtarget && attr == wattr) - return 0; } return 1; diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index 974321a..1479030 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -345,7 +345,6 @@ out: free_irq(apbs[i].irq, &dummy); iounmap(apbs[i].RamIO); } - pci_disable_device(dev); return ret; } diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index 5f11979..ab11c16 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c @@ -17,19 +17,30 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> #include <linux/module.h> #include <linux/hw_random.h> #include <asm/vio.h> #define MODULE_NAME "pseries-rng" -static int pseries_rng_data_read(struct hwrng *rng, u32 *data) +static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) { - if (plpar_hcall(H_RANDOM, (unsigned long *)data) != H_SUCCESS) { - printk(KERN_ERR "pseries rng hcall error\n"); - return 0; + u64 buffer[PLPAR_HCALL_BUFSIZE]; + size_t size = max < 8 ? max : 8; + int rc; + + rc = plpar_hcall(H_RANDOM, (unsigned long *)buffer); + if (rc != H_SUCCESS) { + pr_err_ratelimited("H_RANDOM call failed %d\n", rc); + return -EIO; } - return 8; + memcpy(data, buffer, size); + + /* The hypervisor interface returns 64 bits */ + return size; } /** @@ -48,7 +59,7 @@ static unsigned long pseries_rng_get_desired_dma(struct vio_dev *vdev) static struct hwrng pseries_rng = { .name = MODULE_NAME, - .data_read = pseries_rng_data_read, + .read = pseries_rng_read, }; static int __init pseries_rng_probe(struct vio_dev *dev, diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c index e53fc24..e1ddcf9 100644 --- a/drivers/char/ipmi/ipmi_kcs_sm.c +++ b/drivers/char/ipmi/ipmi_kcs_sm.c @@ -251,8 +251,9 @@ static inline int check_obf(struct si_sm_data *kcs, unsigned char status, if (!GET_STATUS_OBF(status)) { kcs->obf_timeout -= time; if (kcs->obf_timeout < 0) { - start_error_recovery(kcs, "OBF not ready in time"); - return 1; + kcs->obf_timeout = OBF_RETRY_TIMEOUT; + start_error_recovery(kcs, "OBF not ready in time"); + return 1; } return 0; } diff --git a/drivers/char/random.c b/drivers/char/random.c index ddcbcad..8f1489c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -933,8 +933,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out) * pool while mixing, and hash one final time. */ sha_transform(hash.w, extract, workspace); - memset(extract, 0, sizeof(extract)); - memset(workspace, 0, sizeof(workspace)); + memzero_explicit(extract, sizeof(extract)); + memzero_explicit(workspace, sizeof(workspace)); /* * In case the hash function has some recognizable output @@ -957,7 +957,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out) } memcpy(out, &hash, EXTRACT_SIZE); - memset(&hash, 0, sizeof(hash)); + memzero_explicit(&hash, sizeof(hash)); } static ssize_t extract_entropy(struct entropy_store *r, void *buf, @@ -1005,7 +1005,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, } /* Wipe data just returned from memory */ - memset(tmp, 0, sizeof(tmp)); + memzero_explicit(tmp, sizeof(tmp)); return ret; } @@ -1043,7 +1043,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, } /* Wipe data just returned from memory */ - memset(tmp, 0, sizeof(tmp)); + memzero_explicit(tmp, sizeof(tmp)); return ret; } diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index e3c974a..48138b3 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -533,11 +533,10 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type) int tpm_get_timeouts(struct tpm_chip *chip) { struct tpm_cmd_t tpm_cmd; - struct timeout_t *timeout_cap; + unsigned long new_timeout[4]; + unsigned long old_timeout[4]; struct duration_t *duration_cap; ssize_t rc; - u32 timeout; - unsigned int scale = 1; tpm_cmd.header.in = tpm_getcap_header; tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; @@ -571,25 +570,46 @@ int tpm_get_timeouts(struct tpm_chip *chip) != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32)) return -EINVAL; - timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; - /* Don't overwrite default if value is 0 */ - timeout = be32_to_cpu(timeout_cap->a); - if (timeout && timeout < 1000) { - /* timeouts in msec rather usec */ - scale = 1000; - chip->vendor.timeout_adjusted = true; + old_timeout[0] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.a); + old_timeout[1] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.b); + old_timeout[2] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.c); + old_timeout[3] = be32_to_cpu(tpm_cmd.params.getcap_out.cap.timeout.d); + memcpy(new_timeout, old_timeout, sizeof(new_timeout)); + + /* + * Provide ability for vendor overrides of timeout values in case + * of misreporting. + */ + if (chip->vendor.update_timeouts != NULL) + chip->vendor.timeout_adjusted = + chip->vendor.update_timeouts(chip, new_timeout); + + if (!chip->vendor.timeout_adjusted) { + /* Don't overwrite default if value is 0 */ + if (new_timeout[0] != 0 && new_timeout[0] < 1000) { + int i; + + /* timeouts in msec rather usec */ + for (i = 0; i != ARRAY_SIZE(new_timeout); i++) + new_timeout[i] *= 1000; + chip->vendor.timeout_adjusted = true; + } } - if (timeout) - chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->b); - if (timeout) - chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->c); - if (timeout) - chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale); - timeout = be32_to_cpu(timeout_cap->d); - if (timeout) - chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale); + + /* Report adjusted timeouts */ + if (chip->vendor.timeout_adjusted) { + dev_info(chip->dev, + HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", + old_timeout[0], new_timeout[0], + old_timeout[1], new_timeout[1], + old_timeout[2], new_timeout[2], + old_timeout[3], new_timeout[3]); + } + + chip->vendor.timeout_a = usecs_to_jiffies(new_timeout[0]); + chip->vendor.timeout_b = usecs_to_jiffies(new_timeout[1]); + chip->vendor.timeout_c = usecs_to_jiffies(new_timeout[2]); + chip->vendor.timeout_d = usecs_to_jiffies(new_timeout[3]); duration: tpm_cmd.header.in = tpm_getcap_header; @@ -1423,13 +1443,13 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) int err, total = 0, retries = 5; u8 *dest = out; + if (!out || !num_bytes || max > TPM_MAX_RNG_DATA) + return -EINVAL; + chip = tpm_chip_find_get(chip_num); if (chip == NULL) return -ENODEV; - if (!out || !num_bytes || max > TPM_MAX_RNG_DATA) - return -EINVAL; - do { tpm_cmd.header.in = tpm_getrandom_header; tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes); @@ -1448,6 +1468,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) num_bytes -= recd; } while (retries-- && total < max); + tpm_chip_put(chip); return total ? total : -EIO; } EXPORT_SYMBOL_GPL(tpm_get_random); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index a7bfc17..b911d79 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -95,6 +95,9 @@ struct tpm_vendor_specific { int (*send) (struct tpm_chip *, u8 *, size_t); void (*cancel) (struct tpm_chip *); u8 (*status) (struct tpm_chip *); + bool (*update_timeouts)(struct tpm_chip *chip, + unsigned long *timeout_cap); + void (*release) (struct device *); struct miscdevice miscdev; struct attribute_group *attr_group; diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 156bd3c..06af39c 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -750,6 +750,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } tpm_get_timeouts(chip); + tpm_do_selftest(chip); i2c_set_clientdata(client, chip); diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 5796d01..e7b1a0a 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -373,6 +373,36 @@ out_err: return rc; } +struct tis_vendor_timeout_override { + u32 did_vid; + unsigned long timeout_us[4]; +}; + +static const struct tis_vendor_timeout_override vendor_timeout_overrides[] = { + /* Atmel 3204 */ + { 0x32041114, { (TIS_SHORT_TIMEOUT*1000), (TIS_LONG_TIMEOUT*1000), + (TIS_SHORT_TIMEOUT*1000), (TIS_SHORT_TIMEOUT*1000) } }, +}; + +static bool tpm_tis_update_timeouts(struct tpm_chip *chip, + unsigned long *timeout_cap) +{ + int i; + u32 did_vid; + + did_vid = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); + + for (i = 0; i != ARRAY_SIZE(vendor_timeout_overrides); i++) { + if (vendor_timeout_overrides[i].did_vid != did_vid) + continue; + memcpy(timeout_cap, vendor_timeout_overrides[i].timeout_us, + sizeof(vendor_timeout_overrides[i].timeout_us)); + return true; + } + + return false; +} + /* * Early probing for iTPM with STS_DATA_EXPECT flaw. * Try sending command without itpm flag set and if that @@ -475,6 +505,7 @@ static struct tpm_vendor_specific tpm_tis = { .recv = tpm_tis_recv, .send = tpm_tis_send, .cancel = tpm_tis_ready, + .update_timeouts = tpm_tis_update_timeouts, .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, .req_canceled = tpm_tis_req_canceled, diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c index 7be41e6..bea5922 100644 --- a/drivers/clk/clk-s2mps11.c +++ b/drivers/clk/clk-s2mps11.c @@ -130,7 +130,7 @@ static struct device_node *s2mps11_clk_parse_dt(struct platform_device *pdev) int i; if (!iodev->dev->of_node) - return NULL; + return ERR_PTR(-EINVAL); clk_np = of_find_node_by_name(iodev->dev->of_node, "clocks"); if (!clk_np) { @@ -190,16 +190,13 @@ static int s2mps11_clk_probe(struct platform_device *pdev) goto err_reg; } - s2mps11_clk->lookup = devm_kzalloc(&pdev->dev, - sizeof(struct clk_lookup), GFP_KERNEL); + s2mps11_clk->lookup = clkdev_alloc(s2mps11_clk->clk, + s2mps11_name(s2mps11_clk), NULL); if (!s2mps11_clk->lookup) { ret = -ENOMEM; goto err_lup; } - s2mps11_clk->lookup->con_id = s2mps11_name(s2mps11_clk); - s2mps11_clk->lookup->clk = s2mps11_clk->clk; - clkdev_add(s2mps11_clk->lookup); } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index a004769..f596641 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1368,6 +1368,7 @@ static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long even static void clk_change_rate(struct clk *clk) { struct clk *child; + struct hlist_node *tmp; unsigned long old_rate; unsigned long best_parent_rate = 0; @@ -1391,7 +1392,11 @@ static void clk_change_rate(struct clk *clk) if (clk->notifier_count && old_rate != clk->rate) __clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate); - hlist_for_each_entry(child, &clk->children, child_node) { + /* + * Use safe iteration, as change_rate can actually swap parents + * for certain clock types. + */ + hlist_for_each_entry_safe(child, tmp, &clk->children, child_node) { /* Skip children who will be reparented to another clock */ if (child->new_parent && child->new_parent != clk) continue; diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index c2d2043..125eba8 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -211,7 +211,7 @@ static inline void spear310_clk_init(void) { } /* array of all spear 320 clock lookups */ #ifdef CONFIG_MACH_SPEAR320 -#define SPEAR320_CONTROL_REG (soc_config_base + 0x0000) +#define SPEAR320_CONTROL_REG (soc_config_base + 0x0010) #define SPEAR320_EXT_CTRL_REG (soc_config_base + 0x0018) #define SPEAR320_UARTX_PCLK_MASK 0x1 diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 197074a..4c1d9bb 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1587,12 +1587,14 @@ struct clk *tegra_clk_register_plle_tegra114(const char *name, val_aux = pll_readl(pll_params->aux_reg, pll); if (val & PLL_BASE_ENABLE) { - if (!(val_aux & PLLE_AUX_PLLRE_SEL)) + if ((val_aux & PLLE_AUX_PLLRE_SEL) || + (val_aux & PLLE_AUX_PLLP_SEL)) WARN(1, "pll_e enabled with unsupported parent %s\n", - (val & PLLE_AUX_PLLP_SEL) ? "pllp_out0" : "pll_ref"); + (val_aux & PLLE_AUX_PLLP_SEL) ? "pllp_out0" : + "pll_re_vco"); } else { - val_aux |= PLLE_AUX_PLLRE_SEL; - pll_writel(val, pll_params->aux_reg, pll); + val_aux &= ~(PLLE_AUX_PLLRE_SEL | PLLE_AUX_PLLP_SEL); + pll_writel(val_aux, pll_params->aux_reg, pll); } clk = _tegra_clk_register_pll(pll, name, parent_name, flags, diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index 9467da7..406929d 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -673,6 +673,7 @@ static struct tegra_clk_pll_freq_table pll_e_freq_table[] = { /* PLLE special case: use cpcon field to store cml divider value */ {336000000, 100000000, 100, 21, 16, 11}, {312000000, 100000000, 200, 26, 24, 13}, + {12000000, 100000000, 200, 1, 24, 13}, {0, 0, 0, 0, 0, 0}, }; @@ -1501,7 +1502,7 @@ static void __init tegra114_pll_init(void __iomem *clk_base, clks[pll_re_out] = clk; /* PLLE */ - clk = tegra_clk_register_plle_tegra114("pll_e_out0", "pll_re_vco", + clk = tegra_clk_register_plle_tegra114("pll_e_out0", "pll_ref", clk_base, 0, 100000000, &pll_e_params, pll_e_freq_table, NULL); clk_register_clkdev(clk, "pll_e_out0", NULL); diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c index 2dc8b41..a535c7b 100644 --- a/drivers/clk/versatile/clk-vexpress-osc.c +++ b/drivers/clk/versatile/clk-vexpress-osc.c @@ -102,7 +102,7 @@ void __init vexpress_osc_of_setup(struct device_node *node) osc = kzalloc(sizeof(*osc), GFP_KERNEL); if (!osc) - goto error; + return; osc->func = vexpress_config_func_get_by_node(node); if (!osc->func) { diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 62b0de6..12fbec7 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -94,8 +94,8 @@ static void exynos4_mct_write(unsigned int value, unsigned long offset) __raw_writel(value, reg_base + offset); if (likely(offset >= EXYNOS4_MCT_L_BASE(0))) { - stat_addr = (offset & ~EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET; - switch (offset & EXYNOS4_MCT_L_MASK) { + stat_addr = (offset & EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET; + switch (offset & ~EXYNOS4_MCT_L_MASK) { case MCT_L_TCON_OFFSET: mask = 1 << 3; /* L_TCON write status */ break; @@ -414,8 +414,6 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) evt->set_mode = exynos4_tick_set_mode; evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; evt->rating = 450; - clockevents_config_and_register(evt, clk_rate / (TICK_BASE_CNT + 1), - 0xf, 0x7fffffff); exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET); @@ -428,9 +426,12 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) evt->irq); return -EIO; } + irq_force_affinity(mct_irqs[MCT_L0_IRQ + cpu], cpumask_of(cpu)); } else { enable_percpu_irq(mct_irqs[MCT_L0_IRQ], 0); } + clockevents_config_and_register(evt, clk_rate / (TICK_BASE_CNT + 1), + 0xf, 0x7fffffff); return 0; } @@ -448,7 +449,6 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { struct mct_clock_event_device *mevt; - unsigned int cpu; /* * Grab cpu pointer in each case to avoid spurious @@ -459,12 +459,6 @@ static int exynos4_mct_cpu_notify(struct notifier_block *self, mevt = this_cpu_ptr(&percpu_mct_tick); exynos4_local_timer_setup(&mevt->evt); break; - case CPU_ONLINE: - cpu = (unsigned long)hcpu; - if (mct_int_type == MCT_INT_SPI) - irq_set_affinity(mct_irqs[MCT_L0_IRQ + cpu], - cpumask_of(cpu)); - break; case CPU_DYING: mevt = this_cpu_ptr(&percpu_mct_tick); exynos4_local_timer_stop(&mevt->evt); diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 8ead025..4fe6ac8 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -177,6 +177,11 @@ static void __init sun4i_timer_init(struct device_node *node) writel(TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M), timer_base + TIMER_CTL_REG(0)); + sun4i_clockevent.cpumask = cpumask_of(0); + + clockevents_config_and_register(&sun4i_clockevent, rate, 0x1, + 0xffffffff); + ret = setup_irq(irq, &sun4i_timer_irq); if (ret) pr_warn("failed to setup irq %d\n", irq); @@ -184,11 +189,6 @@ static void __init sun4i_timer_init(struct device_node *node) /* Enable timer0 interrupt */ val = readl(timer_base + TIMER_IRQ_EN_REG); writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - - sun4i_clockevent.cpumask = cpumask_of(0); - - clockevents_config_and_register(&sun4i_clockevent, rate, 0x1, - 0xffffffff); } CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-timer", sun4i_timer_init); diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 18c5b9b..3165811 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -369,7 +369,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, return; /* Can only change if privileged. */ - if (!capable(CAP_NET_ADMIN)) { + if (!__netlink_ns_capable(nsp, &init_user_ns, CAP_NET_ADMIN)) { err = EPERM; goto out; } diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 708f794..d689247 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -50,7 +50,7 @@ obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o # LITTLE drivers, so that it is probed last. obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o -obj-$(CONFIG_ARCH_DAVINCI_DA850) += davinci-cpufreq.o +obj-$(CONFIG_ARCH_DAVINCI) += davinci-cpufreq.o obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += exynos-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ) += exynos4210-cpufreq.o diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 04548f7..d155908 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -412,7 +412,18 @@ show_one(cpuinfo_max_freq, cpuinfo.max_freq); show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -show_one(scaling_cur_freq, cur); + +static ssize_t show_scaling_cur_freq( + struct cpufreq_policy *policy, char *buf) +{ + ssize_t ret; + + if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) + ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu)); + else + ret = sprintf(buf, "%u\n", policy->cur); + return ret; +} static int __cpufreq_set_policy(struct cpufreq_policy *policy, struct cpufreq_policy *new_policy); @@ -815,11 +826,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, if (ret) goto err_out_kobj_put; } - if (cpufreq_driver->target) { - ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); - if (ret) - goto err_out_kobj_put; - } + + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); + if (ret) + goto err_out_kobj_put; + if (cpufreq_driver->bios_limit) { ret = sysfs_create_file(&policy->kobj, &bios_limit.attr); if (ret) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 0806c31..d10d625 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -366,6 +366,11 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, break; case CPUFREQ_GOV_LIMITS: + mutex_lock(&dbs_data->mutex); + if (!cpu_cdbs->cur_policy) { + mutex_unlock(&dbs_data->mutex); + break; + } mutex_lock(&cpu_cdbs->timer_mutex); if (policy->max < cpu_cdbs->cur_policy->cur) __cpufreq_driver_target(cpu_cdbs->cur_policy, @@ -375,6 +380,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); mutex_unlock(&cpu_cdbs->timer_mutex); + mutex_unlock(&dbs_data->mutex); break; } return 0; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d5dc567..132a913 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -550,6 +550,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; + cpu->cpu = cpunum; intel_pstate_get_cpu_pstates(cpu); if (!cpu->pstate.current_pstate) { all_cpu_data[cpunum] = NULL; @@ -557,7 +558,6 @@ static int intel_pstate_init_cpu(unsigned int cpunum) return -ENODATA; } - cpu->cpu = cpunum; cpu->pstate_policy = (struct pstate_adjust_policy *)id->driver_data; init_timer_deferrable(&cpu->timer); @@ -600,6 +600,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits.min_perf_pct = 100; limits.min_perf = int_tofp(1); + limits.max_policy_pct = 100; limits.max_perf_pct = 100; limits.max_perf = int_tofp(1); limits.no_turbo = 0; diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 633ba94..c178ed8 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -563,4 +563,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 9266c0e..93d7753 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -593,7 +593,7 @@ MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("sha1-all"); -MODULE_ALIAS("sha256-all"); -MODULE_ALIAS("sha1-padlock"); -MODULE_ALIAS("sha256-padlock"); +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-padlock"); +MODULE_ALIAS_CRYPTO("sha256-padlock"); diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index a999f53..e4cea7c 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -190,7 +190,7 @@ static void add_session_id(struct cryp_ctx *ctx) static irqreturn_t cryp_interrupt_handler(int irq, void *param) { struct cryp_ctx *ctx; - int i; + int count; struct cryp_device_data *device_data; if (param == NULL) { @@ -215,12 +215,11 @@ static irqreturn_t cryp_interrupt_handler(int irq, void *param) if (cryp_pending_irq_src(device_data, CRYP_IRQ_SRC_OUTPUT_FIFO)) { if (ctx->outlen / ctx->blocksize > 0) { - for (i = 0; i < ctx->blocksize / 4; i++) { - *(ctx->outdata) = readl_relaxed( - &device_data->base->dout); - ctx->outdata += 4; - ctx->outlen -= 4; - } + count = ctx->blocksize / 4; + + readsl(&device_data->base->dout, ctx->outdata, count); + ctx->outdata += count; + ctx->outlen -= count; if (ctx->outlen == 0) { cryp_disable_irq_src(device_data, @@ -230,12 +229,12 @@ static irqreturn_t cryp_interrupt_handler(int irq, void *param) } else if (cryp_pending_irq_src(device_data, CRYP_IRQ_SRC_INPUT_FIFO)) { if (ctx->datalen / ctx->blocksize > 0) { - for (i = 0 ; i < ctx->blocksize / 4; i++) { - writel_relaxed(ctx->indata, - &device_data->base->din); - ctx->indata += 4; - ctx->datalen -= 4; - } + count = ctx->blocksize / 4; + + writesl(&device_data->base->din, ctx->indata, count); + + ctx->indata += count; + ctx->datalen -= count; if (ctx->datalen == 0) cryp_disable_irq_src(device_data, @@ -1811,7 +1810,7 @@ module_exit(ux500_cryp_mod_fini); module_param(cryp_mode, int, 0); MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine."); -MODULE_ALIAS("aes-all"); -MODULE_ALIAS("des-all"); +MODULE_ALIAS_CRYPTO("aes-all"); +MODULE_ALIAS_CRYPTO("des-all"); MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 1c73f4f..8e5e018 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1995,7 +1995,7 @@ module_exit(ux500_hash_mod_fini); MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 HASH engine."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sha1-all"); -MODULE_ALIAS("sha256-all"); -MODULE_ALIAS("hmac-sha1-all"); -MODULE_ALIAS("hmac-sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("hmac-sha1-all"); +MODULE_ALIAS_CRYPTO("hmac-sha256-all"); diff --git a/drivers/dma/TODO b/drivers/dma/TODO index 734ed02..b8045cd 100644 --- a/drivers/dma/TODO +++ b/drivers/dma/TODO @@ -7,7 +7,6 @@ TODO for slave dma - imx-dma - imx-sdma - mxs-dma.c - - dw_dmac - intel_mid_dma 4. Check other subsystems for dma drivers and merge/move to dmaengine 5. Remove dma_slave_config's dma direction. diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 7516be4..ef7f0f7 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -278,6 +278,15 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) channel_set_bit(dw, CH_EN, dwc->mask); } +static void dwc_dostart_first_queued(struct dw_dma_chan *dwc) +{ + if (list_empty(&dwc->queue)) + return; + + list_move(dwc->queue.next, &dwc->active_list); + dwc_dostart(dwc, dwc_first_active(dwc)); +} + /*----------------------------------------------------------------------*/ static void @@ -334,10 +343,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) * the completed ones. */ list_splice_init(&dwc->active_list, &list); - if (!list_empty(&dwc->queue)) { - list_move(dwc->queue.next, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } + dwc_dostart_first_queued(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -467,10 +473,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) /* Try to continue after resetting the channel... */ dwc_chan_disable(dw, dwc); - if (!list_empty(&dwc->queue)) { - list_move(dwc->queue.next, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } + dwc_dostart_first_queued(dwc); spin_unlock_irqrestore(&dwc->lock, flags); } @@ -677,17 +680,9 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) * possible, perhaps even appending to those already submitted * for DMA. But this is hard to do in a race-free manner. */ - if (list_empty(&dwc->active_list)) { - dev_vdbg(chan2dev(tx->chan), "%s: started %u\n", __func__, - desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } else { - dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, - desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->queue); - } + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); + list_add_tail(&desc->desc_node, &dwc->queue); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1093,9 +1088,12 @@ dwc_tx_status(struct dma_chan *chan, static void dwc_issue_pending(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; - if (!list_empty(&dwc->queue)) - dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + spin_lock_irqsave(&dwc->lock, flags); + if (list_empty(&dwc->active_list)) + dwc_dostart_first_queued(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); } static int dwc_alloc_chan_resources(struct dma_chan *chan) @@ -1546,11 +1544,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Disable BLOCK interrupts as well */ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt, - IRQF_SHARED, "dw_dmac", dw); - if (err) - return err; - /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); @@ -1561,6 +1554,11 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw); + err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED, + "dw_dmac", dw); + if (err) + return err; + INIT_LIST_HEAD(&dw->dma.channels); for (i = 0; i < nr_channels; i++) { struct dw_dma_chan *dwc = &dw->chan[i]; @@ -1663,6 +1661,7 @@ int dw_dma_remove(struct dw_dma_chip *chip) dw_dma_off(dw); dma_async_device_unregister(&dw->dma); + free_irq(chip->irq, dw); tasklet_kill(&dw->tasklet); list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels, diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 2539ea0..037443c 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -182,11 +182,13 @@ static void edma_execute(struct edma_chan *echan) echan->ecc->dummy_slot); } - edma_resume(echan->ch_num); - if (edesc->processed <= MAX_NR_SG) { dev_dbg(dev, "first transfer starting %d\n", echan->ch_num); edma_start(echan->ch_num); + } else { + dev_dbg(dev, "chan: %d: completed %d elements, resuming\n", + echan->ch_num, edesc->processed); + edma_resume(echan->ch_num); } /* diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 7807f0e..6729308 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -197,12 +197,10 @@ static void mv_set_mode(struct mv_xor_chan *chan, static void mv_chan_activate(struct mv_xor_chan *chan) { - u32 activation; - dev_dbg(mv_chan_to_devp(chan), " activate chan.\n"); - activation = readl_relaxed(XOR_ACTIVATION(chan)); - activation |= 0x1; - writel_relaxed(activation, XOR_ACTIVATION(chan)); + + /* writel ensures all descriptors are flushed before activation */ + writel(BIT(0), XOR_ACTIVATION(chan)); } static char mv_chan_is_busy(struct mv_xor_chan *chan) diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index df6575f..682288c 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -562,7 +562,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) if (apiexcp & UECC_EXCP_DETECTED) { cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n"); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, pfn, offset, 0, csrow, -1, -1, mci->ctl_name, ""); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 1c4056a..2697dea 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -226,7 +226,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) static void process_ce_no_info(struct mem_ctl_info *mci) { edac_dbg(3, "\n"); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, "e7xxx CE log register overflow", ""); } diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index be10a74..7d5b369 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -242,11 +242,11 @@ static void i3200_process_error_info(struct mem_ctl_info *mci, -1, -1, "i3000 UE", ""); } else if (log & I3200_ECCERRLOG_CE) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, eccerrlog_syndrome(log), eccerrlog_row(channel, log), -1, -1, - "i3000 UE", ""); + "i3000 CE", ""); } } } diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 3e3e431..b93b0d0 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -124,7 +124,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci, dimm->location[0], dimm->location[1], -1, "i82860 UE", ""); else - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, info->eap, 0, info->derrsyn, dimm->location[0], dimm->location[1], -1, "i82860 CE", ""); diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index b56bdaa..9966fc0 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -1180,7 +1180,7 @@ static int max77693_muic_probe(struct platform_device *pdev) /* Initialize MUIC register by using platform data or default data */ - if (pdata->muic_data) { + if (pdata && pdata->muic_data) { init_data = pdata->muic_data->init_data; num_init_data = pdata->muic_data->num_init_data; } else { @@ -1213,7 +1213,7 @@ static int max77693_muic_probe(struct platform_device *pdev) = init_data[i].data; } - if (pdata->muic_data) { + if (pdata && pdata->muic_data) { struct max77693_muic_platform_data *muic_pdata = pdata->muic_data; /* diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c index 67d6738..09f4a93 100644 --- a/drivers/extcon/extcon-max8997.c +++ b/drivers/extcon/extcon-max8997.c @@ -712,7 +712,7 @@ static int max8997_muic_probe(struct platform_device *pdev) goto err_irq; } - if (pdata->muic_pdata) { + if (pdata && pdata->muic_pdata) { struct max8997_muic_platform_data *muic_pdata = pdata->muic_pdata; diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index d7d5c8a..6d44568 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1637,8 +1637,7 @@ static int dispatch_ioctl(struct client *client, _IOC_SIZE(cmd) > sizeof(buffer)) return -ENOTTY; - if (_IOC_DIR(cmd) == _IOC_READ) - memset(&buffer, 0, _IOC_SIZE(cmd)); + memset(&buffer, 0, sizeof(buffer)); if (_IOC_DIR(cmd) & _IOC_WRITE) if (copy_from_user(&buffer, arg, _IOC_SIZE(cmd))) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 743fd42..b1b82e1 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -40,7 +40,7 @@ struct pstore_read_data { static inline u64 generic_id(unsigned long timestamp, unsigned int part, int count) { - return (timestamp * 100 + part) * 1000 + count; + return ((u64) timestamp * 100 + part) * 1000 + count; } static int efi_pstore_read_func(struct efivar_entry *entry, void *data) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index b22659c..e612552 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -481,7 +481,7 @@ EXPORT_SYMBOL_GPL(efivar_entry_remove); */ static void efivar_entry_list_del_unlock(struct efivar_entry *entry) { - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); list_del(&entry->list); spin_unlock_irq(&__efivars->lock); @@ -507,7 +507,7 @@ int __efivar_entry_delete(struct efivar_entry *entry) const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); status = ops->set_variable(entry->var.VariableName, &entry->var.VendorGuid, @@ -667,7 +667,7 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, int strsize1, strsize2; bool found = false; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); list_for_each_entry_safe(entry, n, head, list) { strsize1 = ucs2_strsize(name, 1024); @@ -739,7 +739,7 @@ int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes, const struct efivar_operations *ops = __efivars->ops; efi_status_t status; - WARN_ON(!spin_is_locked(&__efivars->lock)); + lockdep_assert_held(&__efivars->lock); status = ops->get_variable(entry->var.VariableName, &entry->var.VendorGuid, diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index 2deb0c5..380fedb 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -657,8 +657,11 @@ static int mcp23s08_probe(struct spi_device *spi) return -ENODEV; } - for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) + for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) { pullups[addr] = 0; + if (spi_present_mask & (1 << addr)) + chips++; + } } else { type = spi_get_device_id(spi)->driver_data; pdata = dev_get_platdata(&spi->dev); @@ -681,12 +684,12 @@ static int mcp23s08_probe(struct spi_device *spi) pullups[addr] = pdata->chip[addr].pullups; } - if (!chips) - return -ENODEV; - base = pdata->base; } + if (!chips) + return -ENODEV; + data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08), GFP_KERNEL); if (!data) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index f8e6af2..d599fc4 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -214,7 +214,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) ct->regs.ack = PINCTRL_IRQSTAT(port) + MXS_CLR; ct->regs.mask = PINCTRL_IRQEN(port); - irq_setup_generic_chip(gc, IRQ_MSK(32), 0, IRQ_NOREQUEST, 0); + irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, + IRQ_NOREQUEST, 0); } static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0dfaf20..63e7fad 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -12,6 +12,7 @@ */ #include <linux/device.h> +#include <linux/err.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/io.h> @@ -42,8 +43,14 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data) return false; ret = gc->of_xlate(gc, &gg_data->gpiospec, gg_data->flags); - if (ret < 0) - return false; + if (ret < 0) { + /* We've found the gpio chip, but the translation failed. + * Return true to stop looking and return the translation + * error via out_gpio + */ + gg_data->out_gpio = ERR_PTR(ret); + return true; + } gg_data->out_gpio = ret + gc->base; return true; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 0dee0e0..e66d1cdb 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -362,7 +362,7 @@ static ssize_t gpio_value_store(struct device *dev, return status; } -static const DEVICE_ATTR(value, 0644, +static DEVICE_ATTR(value, 0644, gpio_value_show, gpio_value_store); static irqreturn_t gpio_sysfs_irq(int irq, void *priv) @@ -580,18 +580,16 @@ static ssize_t gpio_active_low_store(struct device *dev, return status ? : size; } -static const DEVICE_ATTR(active_low, 0644, +static DEVICE_ATTR(active_low, 0644, gpio_active_low_show, gpio_active_low_store); -static const struct attribute *gpio_attrs[] = { +static struct attribute *gpio_attrs[] = { &dev_attr_value.attr, &dev_attr_active_low.attr, NULL, }; -static const struct attribute_group gpio_attr_group = { - .attrs = (struct attribute **) gpio_attrs, -}; +ATTRIBUTE_GROUPS(gpio); /* * /sys/class/gpio/gpiochipN/ @@ -627,16 +625,13 @@ static ssize_t chip_ngpio_show(struct device *dev, } static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL); -static const struct attribute *gpiochip_attrs[] = { +static struct attribute *gpiochip_attrs[] = { &dev_attr_base.attr, &dev_attr_label.attr, &dev_attr_ngpio.attr, NULL, }; - -static const struct attribute_group gpiochip_attr_group = { - .attrs = (struct attribute **) gpiochip_attrs, -}; +ATTRIBUTE_GROUPS(gpiochip); /* * /sys/class/gpio/export ... write-only @@ -791,18 +786,15 @@ static int gpiod_export(struct gpio_desc *desc, bool direction_may_change) if (desc->chip->names && desc->chip->names[offset]) ioname = desc->chip->names[offset]; - dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0), - desc, ioname ? ioname : "gpio%u", - desc_to_gpio(desc)); + dev = device_create_with_groups(&gpio_class, desc->chip->dev, + MKDEV(0, 0), desc, gpio_groups, + ioname ? ioname : "gpio%u", + desc_to_gpio(desc)); if (IS_ERR(dev)) { status = PTR_ERR(dev); goto fail_unlock; } - status = sysfs_create_group(&dev->kobj, &gpio_attr_group); - if (status) - goto fail_unregister_device; - if (direction_may_change) { status = device_create_file(dev, &dev_attr_direction); if (status) @@ -813,13 +805,15 @@ static int gpiod_export(struct gpio_desc *desc, bool direction_may_change) !test_bit(FLAG_IS_OUT, &desc->flags))) { status = device_create_file(dev, &dev_attr_edge); if (status) - goto fail_unregister_device; + goto fail_remove_attr_direction; } set_bit(FLAG_EXPORT, &desc->flags); mutex_unlock(&sysfs_lock); return 0; +fail_remove_attr_direction: + device_remove_file(dev, &dev_attr_direction); fail_unregister_device: device_unregister(dev); fail_unlock: @@ -971,6 +965,8 @@ static void gpiod_unexport(struct gpio_desc *desc) mutex_unlock(&sysfs_lock); if (dev) { + device_remove_file(dev, &dev_attr_edge); + device_remove_file(dev, &dev_attr_direction); device_unregister(dev); put_device(dev); } @@ -1001,13 +997,13 @@ static int gpiochip_export(struct gpio_chip *chip) /* use chip->base for the ID; it's already known to be unique */ mutex_lock(&sysfs_lock); - dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip, - "gpiochip%d", chip->base); - if (!IS_ERR(dev)) { - status = sysfs_create_group(&dev->kobj, - &gpiochip_attr_group); - } else + dev = device_create_with_groups(&gpio_class, chip->dev, MKDEV(0, 0), + chip, gpiochip_groups, + "gpiochip%d", chip->base); + if (IS_ERR(dev)) status = PTR_ERR(dev); + else + status = 0; chip->exported = (status == 0); mutex_unlock(&sysfs_lock); @@ -1216,18 +1212,20 @@ int gpiochip_add(struct gpio_chip *chip) spin_unlock_irqrestore(&gpio_lock, flags); + if (status) + goto fail; + #ifdef CONFIG_PINCTRL INIT_LIST_HEAD(&chip->pin_ranges); #endif of_gpiochip_add(chip); - if (status) - goto fail; - status = gpiochip_export(chip); - if (status) + if (status) { + of_gpiochip_remove(chip); goto fail; + } pr_debug("gpiochip_add: registered GPIOs %d to %d on device: %s\n", chip->base, chip->base + chip->ngpio - 1, diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 7f6152d..d57a38d 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -100,7 +100,7 @@ static int ast_detect_chip(struct drm_device *dev) } ast->vga2_clone = false; } else { - ast->chip = 2000; + ast->chip = AST2000; DRM_INFO("AST 2000 detected\n"); } } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 7fc9f72..e8f6418 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1012,8 +1012,8 @@ static u32 copy_cursor_image(u8 *src, u8 *dst, int width, int height) srcdata32[1].ul = *((u32 *)(srcxor + 4)) & 0xf0f0f0f0; data32.b[0] = srcdata32[0].b[1] | (srcdata32[0].b[0] >> 4); data32.b[1] = srcdata32[0].b[3] | (srcdata32[0].b[2] >> 4); - data32.b[2] = srcdata32[0].b[1] | (srcdata32[1].b[0] >> 4); - data32.b[3] = srcdata32[0].b[3] | (srcdata32[1].b[2] >> 4); + data32.b[2] = srcdata32[1].b[1] | (srcdata32[1].b[0] >> 4); + data32.b[3] = srcdata32[1].b[3] | (srcdata32[1].b[2] >> 4); writel(data32.ul, dstxor); csum += data32.ul; diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c index 138364d..73fed35 100644 --- a/drivers/gpu/drm/cirrus/cirrus_drv.c +++ b/drivers/gpu/drm/cirrus/cirrus_drv.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/console.h> #include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> #include "cirrus_drv.h" @@ -31,6 +32,8 @@ static struct drm_driver driver; static DEFINE_PCI_DEVICE_TABLE(pciidlist) = { { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0, 0, 0 }, + { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN, + 0x0001, 0, 0, 0 }, {0,} }; @@ -75,6 +78,41 @@ static void cirrus_pci_remove(struct pci_dev *pdev) drm_put_dev(dev); } +static int cirrus_pm_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct cirrus_device *cdev = drm_dev->dev_private; + + drm_kms_helper_poll_disable(drm_dev); + + if (cdev->mode_info.gfbdev) { + console_lock(); + fb_set_suspend(cdev->mode_info.gfbdev->helper.fbdev, 1); + console_unlock(); + } + + return 0; +} + +static int cirrus_pm_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct cirrus_device *cdev = drm_dev->dev_private; + + drm_helper_resume_force_mode(drm_dev); + + if (cdev->mode_info.gfbdev) { + console_lock(); + fb_set_suspend(cdev->mode_info.gfbdev->helper.fbdev, 0); + console_unlock(); + } + + drm_kms_helper_poll_enable(drm_dev); + return 0; +} + static const struct file_operations cirrus_driver_fops = { .owner = THIS_MODULE, .open = drm_open, @@ -104,11 +142,17 @@ static struct drm_driver driver = { .dumb_destroy = drm_gem_dumb_destroy, }; +static const struct dev_pm_ops cirrus_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(cirrus_pm_suspend, + cirrus_pm_resume) +}; + static struct pci_driver cirrus_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, .probe = cirrus_pci_probe, .remove = cirrus_pci_remove, + .driver.pm = &cirrus_pm_ops, }; static int __init cirrus_init(void) diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index 3592616..c6ec012 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -308,6 +308,9 @@ static int cirrus_crtc_mode_set(struct drm_crtc *crtc, WREG_HDR(hdr); cirrus_crtc_do_set_base(crtc, old_fb, x, y, 0); + + /* Unblank (needed on S3 resume, vgabios doesn't do it then) */ + outb(0x20, 0x3c0); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aaeac32..07a77d7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1386,10 +1386,13 @@ unlock: out: switch (ret) { case -EIO: - /* If this -EIO is due to a gpu hang, give the reset code a - * chance to clean up the mess. Otherwise return the proper - * SIGBUS. */ - if (i915_terminally_wedged(&dev_priv->gpu_error)) + /* + * We eat errors when the gpu is terminally wedged to avoid + * userspace unduly crashing (gl has no provisions for mmaps to + * fail). But any other -EIO isn't ours (e.g. swap in failure) + * and so needs to be reported. + */ + if (!i915_terminally_wedged(&dev_priv->gpu_error)) return VM_FAULT_SIGBUS; case -EAGAIN: /* @@ -3419,7 +3422,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; - struct i915_vma *vma; + struct i915_vma *vma, *next; int ret; if (obj->cache_level == cache_level) @@ -3430,7 +3433,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - list_for_each_entry(vma, &obj->vma_list, vma_link) { + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) { ret = i915_vma_unbind(vma); if (ret) @@ -4792,7 +4795,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && !defined(CONFIG_PREEMPT_RT_BASE) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 979a6ea..d5f6bee 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -674,9 +674,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, * relocations were valid. */ for (j = 0; j < exec[i].relocation_count; j++) { - if (copy_to_user(&user_relocs[j].presumed_offset, - &invalid_offset, - sizeof(invalid_offset))) { + if (__copy_to_user(&user_relocs[j].presumed_offset, + &invalid_offset, + sizeof(invalid_offset))) { ret = -EFAULT; mutex_lock(&dev->struct_mutex); goto err; @@ -1213,18 +1213,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list, &dev_priv->gtt.base); if (!ret) { + struct drm_i915_gem_exec_object __user *user_exec_list = + to_user_ptr(args->buffers_ptr); + /* Copy the new buffer offsets back to the user's exec list. */ - for (i = 0; i < args->buffer_count; i++) - exec_list[i].offset = exec2_list[i].offset; - /* ... and back out to userspace */ - ret = copy_to_user(to_user_ptr(args->buffers_ptr), - exec_list, - sizeof(*exec_list) * args->buffer_count); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); + for (i = 0; i < args->buffer_count; i++) { + ret = __copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset)); + if (ret) { + ret = -EFAULT; + DRM_DEBUG("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + break; + } } } @@ -1272,14 +1275,21 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, &dev_priv->gtt.base); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ - ret = copy_to_user(to_user_ptr(args->buffers_ptr), - exec2_list, - sizeof(*exec2_list) * args->buffer_count); - if (ret) { - ret = -EFAULT; - DRM_DEBUG("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); + struct drm_i915_gem_exec_object2 *user_exec_list = + to_user_ptr(args->buffers_ptr); + int i; + + for (i = 0; i < args->buffer_count; i++) { + ret = __copy_to_user(&user_exec_list[i].offset, + &exec2_list[i].offset, + sizeof(user_exec_list[i].offset)); + if (ret) { + ret = -EFAULT; + DRM_DEBUG("failed to copy %d exec entries " + "back to user\n", + args->buffer_count); + break; + } } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c7fa2e4..4ac33d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -521,6 +521,16 @@ void i915_check_and_clear_faults(struct drm_device *dev) POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); } +static void i915_ggtt_flush(struct drm_i915_private *dev_priv) +{ + if (INTEL_INFO(dev_priv->dev)->gen < 6) { + intel_gtt_chipset_flush(); + } else { + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + } +} + void i915_gem_suspend_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -537,6 +547,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.start / PAGE_SIZE, dev_priv->gtt.base.total / PAGE_SIZE, true); + + i915_ggtt_flush(dev_priv); } void i915_gem_restore_gtt_mappings(struct drm_device *dev) @@ -557,7 +569,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_gem_gtt_bind_object(obj, obj->cache_level); } - i915_gem_chipset_flush(dev); + i915_ggtt_flush(dev_priv); } int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index c077df0..c052e1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -74,6 +74,50 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) if (base == 0) return 0; + /* make sure we don't clobber the GTT if it's within stolen memory */ + if (INTEL_INFO(dev)->gen <= 4 && !IS_G33(dev) && !IS_G4X(dev)) { + struct { + u32 start, end; + } stolen[2] = { + { .start = base, .end = base + dev_priv->gtt.stolen_size, }, + { .start = base, .end = base + dev_priv->gtt.stolen_size, }, + }; + u64 gtt_start, gtt_end; + + gtt_start = I915_READ(PGTBL_CTL); + if (IS_GEN4(dev)) + gtt_start = (gtt_start & PGTBL_ADDRESS_LO_MASK) | + (gtt_start & PGTBL_ADDRESS_HI_MASK) << 28; + else + gtt_start &= PGTBL_ADDRESS_LO_MASK; + gtt_end = gtt_start + gtt_total_entries(dev_priv->gtt) * 4; + + if (gtt_start >= stolen[0].start && gtt_start < stolen[0].end) + stolen[0].end = gtt_start; + if (gtt_end > stolen[1].start && gtt_end <= stolen[1].end) + stolen[1].start = gtt_end; + + /* pick the larger of the two chunks */ + if (stolen[0].end - stolen[0].start > + stolen[1].end - stolen[1].start) { + base = stolen[0].start; + dev_priv->gtt.stolen_size = stolen[0].end - stolen[0].start; + } else { + base = stolen[1].start; + dev_priv->gtt.stolen_size = stolen[1].end - stolen[1].start; + } + + if (stolen[0].start != stolen[1].start || + stolen[0].end != stolen[1].end) { + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", + (unsigned long long) gtt_start, + (unsigned long long) gtt_end - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", + base, base + (u32) dev_priv->gtt.stolen_size - 1); + } + } + + /* Verify that nothing else uses this physical address. Stolen * memory should be reserved by the BIOS and hidden from the * kernel. So if the region is already marked as busy, something @@ -82,9 +126,26 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) r = devm_request_mem_region(dev->dev, base, dev_priv->gtt.stolen_size, "Graphics Stolen Memory"); if (r == NULL) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)dev_priv->gtt.stolen_size); - base = 0; + /* + * One more attempt but this time requesting region from + * base + 1, as we have seen that this resolves the region + * conflict with the PCI Bus. + * This is a BIOS w/a: Some BIOS wrap stolen in the root + * PCI bus, but have an off-by-one error. Hence retry the + * reservation starting from 1 instead of 0. + */ + r = devm_request_mem_region(dev->dev, base + 1, + dev_priv->gtt.stolen_size - 1, + "Graphics Stolen Memory"); + /* + * GEN3 firmware likes to smash pci bridges into the stolen + * range. Apparently this works. + */ + if (r == NULL && !IS_GEN3(dev)) { + DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", + base, base + (uint32_t)dev_priv->gtt.stolen_size); + base = 0; + } } return base; @@ -202,7 +263,7 @@ int i915_gem_init_stolen(struct drm_device *dev) int bios_reserved = 0; #ifdef CONFIG_INTEL_IOMMU - if (intel_iommu_gfx_mapped) { + if (intel_iommu_gfx_mapped && INTEL_INFO(dev)->gen < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); return 0; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 375abe7..9d344da 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -309,6 +309,7 @@ #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_WRITE_FLUSH (1<<12) @@ -570,6 +571,9 @@ /* * Instruction and interrupt control regs */ +#define PGTBL_CTL 0x02020 +#define PGTBL_ADDRESS_LO_MASK 0xfffff000 /* bits [31:12] */ +#define PGTBL_ADDRESS_HI_MASK 0x000000f0 /* bits [35:32] (gen4) */ #define PGTBL_ER 0x02024 #define RENDER_RING_BASE 0x02000 #define BSD_RING_BASE 0x04000 diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 53f2bed..16ca7f6 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -657,7 +657,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Set default to SSC at %dMHz\n", dev_priv->vbt.lvds_ssc_freq); } -static int __init intel_no_opregion_vbt_callback(const struct dmi_system_id *id) +static int intel_no_opregion_vbt_callback(const struct dmi_system_id *id) { DRM_DEBUG_KMS("Falling back to manually reading VBT from " "VBIOS ROM for %s\n", diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 10d1de5..3c25af4 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -743,7 +743,7 @@ static const struct drm_encoder_funcs intel_crt_enc_funcs = { .destroy = intel_encoder_destroy, }; -static int __init intel_no_crt_dmi_callback(const struct dmi_system_id *id) +static int intel_no_crt_dmi_callback(const struct dmi_system_id *id) { DRM_INFO("Skipping CRT initialization for %s\n", id->ident); return 1; @@ -758,6 +758,14 @@ static const struct dmi_system_id intel_no_crt[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"), }, }, + { + .callback = intel_no_crt_dmi_callback, + .ident = "DELL XPS 8700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 8700"), + }, + }, { } }; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5aa836e..37a9d3c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3537,7 +3537,6 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) ironlake_fdi_disable(crtc); ironlake_disable_pch_transcoder(dev_priv, pipe); - intel_set_pch_fifo_underrun_reporting(dev, pipe, true); if (HAS_PCH_CPT(dev)) { /* disable TRANS_DP_CTL */ @@ -3613,7 +3612,6 @@ static void haswell_crtc_disable(struct drm_crtc *crtc) if (intel_crtc->config.has_pch_encoder) { lpt_disable_pch_transcoder(dev_priv); - intel_set_pch_fifo_underrun_reporting(dev, TRANSCODER_A, true); intel_ddi_fdi_disable(crtc); } @@ -8688,11 +8686,22 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_I(requested_mode.hdisplay); PIPE_CONF_CHECK_I(requested_mode.vdisplay); - PIPE_CONF_CHECK_I(gmch_pfit.control); - /* pfit ratios are autocomputed by the hw on gen4+ */ - if (INTEL_INFO(dev)->gen < 4) - PIPE_CONF_CHECK_I(gmch_pfit.pgm_ratios); - PIPE_CONF_CHECK_I(gmch_pfit.lvds_border_bits); + /* + * FIXME: BIOS likes to set up a cloned config with lvds+external + * screen. Since we don't yet re-compute the pipe config when moving + * just the lvds port away to another pipe the sw tracking won't match. + * + * Proper atomic modesets with recomputed global state will fix this. + * Until then just don't check gmch state for inherited modes. + */ + if (!PIPE_CONF_QUIRK(PIPE_CONFIG_QUIRK_INHERITED_MODE)) { + PIPE_CONF_CHECK_I(gmch_pfit.control); + /* pfit ratios are autocomputed by the hw on gen4+ */ + if (INTEL_INFO(dev)->gen < 4) + PIPE_CONF_CHECK_I(gmch_pfit.pgm_ratios); + PIPE_CONF_CHECK_I(gmch_pfit.lvds_border_bits); + } + PIPE_CONF_CHECK_I(pch_pfit.enabled); if (current_config->pch_pfit.enabled) { PIPE_CONF_CHECK_I(pch_pfit.pos); @@ -10097,6 +10106,9 @@ static struct intel_quirk intel_quirks[] = { /* Acer Aspire 4736Z */ { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, + /* Acer Aspire 5336 */ + { 0x2a42, 0x1025, 0x048a, quirk_invert_brightness }, + /* Dell XPS13 HD Sandy Bridge */ { 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable }, /* Dell XPS13 HD and XPS13 FHD Ivy Bridge */ @@ -10228,15 +10240,6 @@ void intel_modeset_init(struct drm_device *dev) intel_disable_fbc(dev); } -static void -intel_connector_break_all_links(struct intel_connector *connector) -{ - connector->base.dpms = DRM_MODE_DPMS_OFF; - connector->base.encoder = NULL; - connector->encoder->connectors_active = false; - connector->encoder->base.crtc = NULL; -} - static void intel_enable_pipe_a(struct drm_device *dev) { struct intel_connector *connector; @@ -10318,8 +10321,17 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) if (connector->encoder->base.crtc != &crtc->base) continue; - intel_connector_break_all_links(connector); + connector->base.dpms = DRM_MODE_DPMS_OFF; + connector->base.encoder = NULL; } + /* multiple connectors may have the same encoder: + * handle them and break crtc link separately */ + list_for_each_entry(connector, &dev->mode_config.connector_list, + base.head) + if (connector->encoder->base.crtc == &crtc->base) { + connector->encoder->base.crtc = NULL; + connector->encoder->connectors_active = false; + } WARN_ON(crtc->active); crtc->base.enabled = false; @@ -10390,6 +10402,8 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) drm_get_encoder_name(&encoder->base)); encoder->disable(encoder); } + encoder->base.crtc = NULL; + encoder->connectors_active = false; /* Inconsistent output/port/pipe state happens presumably due to * a bug in one of the get_hw_state functions. Or someplace else @@ -10400,8 +10414,8 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) base.head) { if (connector->encoder != encoder) continue; - - intel_connector_break_all_links(connector); + connector->base.dpms = DRM_MODE_DPMS_OFF; + connector->base.encoder = NULL; } } /* Enabled encoders without active connectors will be fixed in @@ -10443,6 +10457,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) base.head) { memset(&crtc->config, 0, sizeof(crtc->config)); + crtc->config.quirks |= PIPE_CONFIG_QUIRK_INHERITED_MODE; + crtc->active = dev_priv->display.get_pipe_config(crtc, &crtc->config); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7f2b384..569c0c5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -204,7 +204,8 @@ struct intel_crtc_config { * tracked with quirk flags so that fastboot and state checker can act * accordingly. */ -#define PIPE_CONFIG_QUIRK_MODE_SYNC_FLAGS (1<<0) /* unreliable sync mode.flags */ +#define PIPE_CONFIG_QUIRK_MODE_SYNC_FLAGS (1<<0) /* unreliable sync mode.flags */ +#define PIPE_CONFIG_QUIRK_INHERITED_MODE (1<<1) /* mode inherited from firmware */ unsigned long quirks; struct drm_display_mode requested_mode; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 4d302f3d..f5c4366 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -830,11 +830,11 @@ static void intel_disable_hdmi(struct intel_encoder *encoder) } } -static int hdmi_portclock_limit(struct intel_hdmi *hdmi) +static int hdmi_portclock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit) { struct drm_device *dev = intel_hdmi_to_dev(hdmi); - if (!hdmi->has_hdmi_sink || IS_G4X(dev)) + if ((respect_dvi_limit && !hdmi->has_hdmi_sink) || IS_G4X(dev)) return 165000; else if (IS_HASWELL(dev)) return 300000; @@ -845,7 +845,8 @@ static int hdmi_portclock_limit(struct intel_hdmi *hdmi) static int intel_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { - if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector))) + if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector), + true)) return MODE_CLOCK_HIGH; if (mode->clock < 20000) return MODE_CLOCK_LOW; @@ -863,7 +864,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode; int clock_12bpc = pipe_config->requested_mode.clock * 3 / 2; - int portclock_limit = hdmi_portclock_limit(intel_hdmi); + int portclock_limit = hdmi_portclock_limit(intel_hdmi, false); int desired_bpp; if (intel_hdmi->color_range_auto) { diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b8af94a..e5473da 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -523,7 +523,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = { .destroy = intel_encoder_destroy, }; -static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id) +static int intel_no_lvds_dmi_callback(const struct dmi_system_id *id) { DRM_INFO("Skipping LVDS initialization for %s\n", id->ident); return 1; @@ -934,6 +934,17 @@ void intel_lvds_init(struct drm_device *dev) int pipe; u8 pin; + /* + * Unlock registers and just leave them unlocked. Do this before + * checking quirk lists to avoid bogus WARNINGs. + */ + if (HAS_PCH_SPLIT(dev)) { + I915_WRITE(PCH_PP_CONTROL, + I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); + } else { + I915_WRITE(PP_CONTROL, + I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); + } if (!intel_lvds_supported(dev)) return; @@ -1113,17 +1124,6 @@ out: DRM_DEBUG_KMS("detected %s-link lvds configuration\n", lvds_encoder->is_dual_link ? "dual" : "single"); - /* - * Unlock registers and just - * leave them unlocked - */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else { - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); - } lvds_connector->lid_notifier.notifier_call = intel_lid_notify; if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) { DRM_DEBUG_KMS("lid notifier registration failed\n"); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 26c2ea3..a7daa2a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1592,6 +1592,16 @@ static void i9xx_update_wm(struct drm_device *dev) DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm); + if (IS_I915GM(dev) && enabled) { + struct intel_framebuffer *fb; + + fb = to_intel_framebuffer(enabled->fb); + + /* self-refresh seems busted with untiled */ + if (fb->obj->tiling_mode == I915_TILING_NONE) + enabled = NULL; + } + /* * Overlay gets an aggressive default since video jitter is bad. */ @@ -5337,24 +5347,26 @@ static void __intel_set_power_well(struct drm_device *dev, bool enable) static struct i915_power_well *hsw_pwr; /* Display audio driver power well request */ -void i915_request_power_well(void) +int i915_request_power_well(void) { - if (WARN_ON(!hsw_pwr)) - return; + if (!hsw_pwr) + return -ENODEV; spin_lock_irq(&hsw_pwr->lock); if (!hsw_pwr->count++ && !hsw_pwr->i915_request) __intel_set_power_well(hsw_pwr->device, true); spin_unlock_irq(&hsw_pwr->lock); + return 0; } EXPORT_SYMBOL_GPL(i915_request_power_well); /* Display audio driver power well release */ -void i915_release_power_well(void) +int i915_release_power_well(void) { - if (WARN_ON(!hsw_pwr)) - return; + if (!hsw_pwr) + return -ENODEV; + spin_lock_irq(&hsw_pwr->lock); WARN_ON(!hsw_pwr->count); @@ -5362,9 +5374,30 @@ void i915_release_power_well(void) !hsw_pwr->i915_request) __intel_set_power_well(hsw_pwr->device, false); spin_unlock_irq(&hsw_pwr->lock); + return 0; } EXPORT_SYMBOL_GPL(i915_release_power_well); +/* + * Private interface for the audio driver to get CDCLK in kHz. + * + * Caller must request power well using i915_request_power_well() prior to + * making the call. + */ +int i915_get_cdclk_freq(void) +{ + struct drm_i915_private *dev_priv; + + if (!hsw_pwr) + return -ENODEV; + + dev_priv = container_of(hsw_pwr, struct drm_i915_private, + power_well); + + return intel_ddi_get_cdclk_freq(dev_priv); +} +EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); + int i915_init_power_well(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 7507fe0..776ed3f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -322,12 +322,15 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; /* * TLB invalidate requires a post-sync write. */ flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ @@ -423,6 +426,9 @@ static int init_ring_common(struct intel_ring_buffer *ring) } } + /* Enforce ordering by reading HEAD register back */ + I915_READ_HEAD(ring); + /* Initialize the ring. This must happen _after_ we've cleared the ring * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index dd6f84b..6b58be1 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -854,6 +854,10 @@ intel_enable_tv(struct intel_encoder *encoder) struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + /* Prevents vblank waits from timing out in intel_tv_detect_type() */ + intel_wait_for_vblank(encoder->base.dev, + to_intel_crtc(encoder->base.crtc)->pipe); + I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE); } @@ -1530,9 +1534,14 @@ static int tv_is_present_in_vbt(struct drm_device *dev) /* * If the device type is not TV, continue. */ - if (p_child->device_type != DEVICE_TYPE_INT_TV && - p_child->device_type != DEVICE_TYPE_TV) + switch (p_child->device_type) { + case DEVICE_TYPE_INT_TV: + case DEVICE_TYPE_TV: + case DEVICE_TYPE_TV_SVIDEO_COMPOSITE: + break; + default: continue; + } /* Only when the addin_offset is non-zero, it is regarded * as present. */ diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ee7d649..a9a0154 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -166,6 +166,8 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv) { __raw_i915_write32(dev_priv, FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff)); + __raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV, + _MASKED_BIT_DISABLE(0xffff)); /* something from same cacheline, but !FORCEWAKE_VLV */ __raw_posting_read(dev_priv, FORCEWAKE_ACK_VLV); } diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c index 52dd7a1..8f33655 100644 --- a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c +++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c @@ -678,7 +678,7 @@ exec_clkcmp(struct nv50_disp_priv *priv, int head, int id, } if (outp == 8) - return false; + return conf; data = exec_lookup(priv, head, outp, ctrl, dcb, &ver, &hdr, &cnt, &len, &info1); if (data == 0x0000) diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c index 2d9b9d7..f3edd28 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c @@ -124,6 +124,7 @@ dcb_outp_parse(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len, struct dcb_output *outp) { u16 dcb = dcb_outp(bios, idx, ver, len); + memset(outp, 0x00, sizeof(*outp)); if (dcb) { if (*ver >= 0x20) { u32 conn = nv_ro32(bios, dcb + 0x00); diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c b/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c index 39f47b9..c14cb09 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c +++ b/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c @@ -54,8 +54,10 @@ nouveau_fan_update(struct nouveau_fan *fan, bool immediate, int target) /* check that we're not already at the target duty cycle */ duty = fan->get(therm); - if (duty == target) - goto done; + if (duty == target) { + spin_unlock_irqrestore(&fan->lock, flags); + return 0; + } /* smooth out the fanspeed increase/decrease */ if (!immediate && duty >= 0) { @@ -73,8 +75,15 @@ nouveau_fan_update(struct nouveau_fan *fan, bool immediate, int target) nv_debug(therm, "FAN update: %d\n", duty); ret = fan->set(therm, duty); - if (ret) - goto done; + if (ret) { + spin_unlock_irqrestore(&fan->lock, flags); + return ret; + } + + /* fan speed updated, drop the fan lock before grabbing the + * alarm-scheduling lock and risking a deadlock + */ + spin_unlock_irqrestore(&fan->lock, flags); /* schedule next fan update, if not at target speed already */ if (list_empty(&fan->alarm.head) && target != duty) { @@ -92,8 +101,6 @@ nouveau_fan_update(struct nouveau_fan *fan, bool immediate, int target) ptimer->alarm(ptimer, delay * 1000 * 1000, &fan->alarm); } -done: - spin_unlock_irqrestore(&fan->lock, flags); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 200e856..efdb689 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -419,9 +419,6 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev) acpi_status status; acpi_handle dhandle, rom_handle; - if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected) - return false; - dhandle = DEVICE_ACPI_HANDLE(&pdev->dev); if (!dhandle) return false; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 7848590..fb072e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -586,9 +586,9 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, } ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence); - mutex_unlock(&chan->cli->mutex); if (ret) goto fail_unreserve; + mutex_unlock(&chan->cli->mutex); /* Update the crtc struct and cleanup */ crtc->fb = fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c index 81638d7..13790ea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_vga.c +++ b/drivers/gpu/drm/nouveau/nouveau_vga.c @@ -98,7 +98,16 @@ void nouveau_vga_fini(struct nouveau_drm *drm) { struct drm_device *dev = drm->dev; + bool runtime = false; + + if (nouveau_runtime_pm == 1) + runtime = true; + if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm())) + runtime = true; + vga_switcheroo_unregister_client(dev->pdev); + if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus()) + vga_switcheroo_fini_domain_pm_ops(drm->dev->dev); vga_client_register(dev->pdev, NULL, NULL, NULL); } diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index acf6678..9501728 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -199,7 +199,7 @@ static struct dmm_txn *dmm_txn_init(struct dmm *dmm, struct tcm *tcm) static void dmm_txn_append(struct dmm_txn *txn, struct pat_area *area, struct page **pages, uint32_t npages, uint32_t roll) { - dma_addr_t pat_pa = 0; + dma_addr_t pat_pa = 0, data_pa = 0; uint32_t *data; struct pat *pat; struct refill_engine *engine = txn->engine_handle; @@ -223,7 +223,9 @@ static void dmm_txn_append(struct dmm_txn *txn, struct pat_area *area, .lut_id = engine->tcm->lut_id, }; - data = alloc_dma(txn, 4*i, &pat->data_pa); + data = alloc_dma(txn, 4*i, &data_pa); + /* FIXME: what if data_pa is more than 32-bit ? */ + pat->data_pa = data_pa; while (i--) { int n = i + roll; diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 533f6eb..6b01276 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -791,7 +791,7 @@ int omap_gem_get_paddr(struct drm_gem_object *obj, omap_obj->paddr = tiler_ssptr(block); omap_obj->block = block; - DBG("got paddr: %08x", omap_obj->paddr); + DBG("got paddr: %pad", &omap_obj->paddr); } omap_obj->paddr_cnt++; @@ -988,9 +988,9 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off = drm_vma_node_start(&obj->vma_node); - seq_printf(m, "%08x: %2d (%2d) %08llx %08Zx (%2d) %p %4d", + seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d", omap_obj->flags, obj->name, obj->refcount.refcount.counter, - off, omap_obj->paddr, omap_obj->paddr_cnt, + off, &omap_obj->paddr, omap_obj->paddr_cnt, omap_obj->vaddr, omap_obj->roll); if (omap_obj->flags & OMAP_BO_TILED) { @@ -1473,8 +1473,8 @@ void omap_gem_init(struct drm_device *dev) entry->paddr = tiler_ssptr(block); entry->block = block; - DBG("%d:%d: %dx%d: paddr=%08x stride=%d", i, j, w, h, - entry->paddr, + DBG("%d:%d: %dx%d: paddr=%pad stride=%d", i, j, w, h, + &entry->paddr, usergart[i].stride_pfn << PAGE_SHIFT); } } diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 046d5e6..5b62e21 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -142,8 +142,8 @@ static void omap_plane_pre_apply(struct omap_drm_apply *apply) DBG("%dx%d -> %dx%d (%d)", info->width, info->height, info->out_width, info->out_height, info->screen_width); - DBG("%d,%d %08x %08x", info->pos_x, info->pos_y, - info->paddr, info->p_uv_addr); + DBG("%d,%d %pad %pad", info->pos_x, info->pos_y, + &info->paddr, &info->p_uv_addr); /* TODO: */ ilace = false; diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 835caba..5f79e51 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -508,7 +508,6 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, struct qxl_framebuffer *qfb; struct qxl_bo *bo, *old_bo = NULL; struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - uint32_t width, height, base_offset; bool recreate_primary = false; int ret; int surf_id; @@ -538,9 +537,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, if (qcrtc->index == 0) recreate_primary = true; - width = mode->hdisplay; - height = mode->vdisplay; - base_offset = 0; + if (bo->surf.stride * bo->surf.height > qdev->vram_size) { + DRM_ERROR("Mode doesn't fit in vram size (vgamem)"); + return -EINVAL; + } ret = qxl_bo_reserve(bo, false); if (ret != 0) @@ -554,10 +554,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, if (recreate_primary) { qxl_io_destroy_primary(qdev); qxl_io_log(qdev, - "recreate primary: %dx%d (was %dx%d,%d,%d)\n", - width, height, bo->surf.width, - bo->surf.height, bo->surf.stride, bo->surf.format); - qxl_io_create_primary(qdev, base_offset, bo); + "recreate primary: %dx%d,%d,%d\n", + bo->surf.width, bo->surf.height, + bo->surf.stride, bo->surf.format); + qxl_io_create_primary(qdev, 0, bo); bo->is_primary = true; surf_id = 0; } else { diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 21393dc..f4b6b89 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -33,6 +33,9 @@ irqreturn_t qxl_irq_handler(DRM_IRQ_ARGS) pending = xchg(&qdev->ram_header->int_pending, 0); + if (!pending) + return IRQ_NONE; + atomic_inc(&qdev->irq_received); if (pending & QXL_INTERRUPT_DISPLAY) { diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 037786d..ed90fbe 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -433,6 +433,7 @@ static int qxl_sync_obj_flush(void *sync_obj) static void qxl_sync_obj_unref(void **sync_obj) { + *sync_obj = NULL; } static void *qxl_sync_obj_ref(void *sync_obj) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index dcb652a..ba8742a 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -859,14 +859,16 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc, args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */ if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_REF_DIV_SRC; - switch (bpc) { - case 8: - default: - args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP; - break; - case 10: - args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP; - break; + if (encoder_mode == ATOM_ENCODER_MODE_HDMI) { + switch (bpc) { + case 8: + default: + args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP; + break; + case 10: + args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP; + break; + } } args.v5.ucTransmitterID = encoder_id; args.v5.ucEncoderMode = encoder_mode; @@ -881,20 +883,22 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc, args.v6.ucMiscInfo = 0; /* HDMI depth, etc. */ if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK)) args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_REF_DIV_SRC; - switch (bpc) { - case 8: - default: - args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP; - break; - case 10: - args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP; - break; - case 12: - args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP; - break; - case 16: - args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP; - break; + if (encoder_mode == ATOM_ENCODER_MODE_HDMI) { + switch (bpc) { + case 8: + default: + args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP; + break; + case 10: + args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP; + break; + case 12: + args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP; + break; + case 16: + args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP; + break; + } } args.v6.ucTransmitterID = encoder_id; args.v6.ucEncoderMode = encoder_mode; diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 0088541..26059ec 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -384,6 +384,19 @@ static int dp_get_max_dp_pix_clock(int link_rate, /***** radeon specific DP functions *****/ +static int radeon_dp_get_max_link_rate(struct drm_connector *connector, + u8 dpcd[DP_DPCD_SIZE]) +{ + int max_link_rate; + + if (radeon_connector_is_dp12_capable(connector)) + max_link_rate = min(drm_dp_max_link_rate(dpcd), 540000); + else + max_link_rate = min(drm_dp_max_link_rate(dpcd), 270000); + + return max_link_rate; +} + /* First get the min lane# when low rate is used according to pixel clock * (prefer low rate), second check max lane# supported by DP panel, * if the max lane# < low rate lane# then use max lane# instead. @@ -393,7 +406,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector, int pix_clock) { int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); - int max_link_rate = drm_dp_max_link_rate(dpcd); + int max_link_rate = radeon_dp_get_max_link_rate(connector, dpcd); int max_lane_num = drm_dp_max_lane_count(dpcd); int lane_num; int max_dp_pix_clock; @@ -431,7 +444,7 @@ static int radeon_dp_get_dp_link_clock(struct drm_connector *connector, return 540000; } - return drm_dp_max_link_rate(dpcd); + return radeon_dp_get_max_link_rate(connector, dpcd); } static u8 radeon_dp_encoder_service(struct radeon_device *rdev, @@ -561,6 +574,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 7bb7074..6a96517 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -183,7 +183,6 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, struct backlight_properties props; struct radeon_backlight_privdata *pdata; struct radeon_encoder_atom_dig *dig; - u8 backlight_level; char bl_name[16]; /* Mac laptops with multiple GPUs use the gmux driver for backlight @@ -222,12 +221,17 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, pdata->encoder = radeon_encoder; - backlight_level = radeon_atom_get_backlight_level_from_reg(rdev); - dig = radeon_encoder->enc_priv; dig->bl_dev = bd; bd->props.brightness = radeon_atom_backlight_get_brightness(bd); + /* Set a reasonable default here if the level is 0 otherwise + * fbdev will attempt to turn the backlight on after console + * unblanking and it will try and restore 0 which turns the backlight + * off again. + */ + if (bd->props.brightness == 0) + bd->props.brightness = RADEON_MAX_BL_LEVEL; bd->props.power = FB_BLANK_UNBLANK; backlight_update_status(bd); @@ -1910,8 +1914,11 @@ atombios_set_encoder_crtc_source(struct drm_encoder *encoder) args.v2.ucEncodeMode = ATOM_ENCODER_MODE_CRT; else args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder); - } else + } else if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { + args.v2.ucEncodeMode = ATOM_ENCODER_MODE_LVDS; + } else { args.v2.ucEncodeMode = atombios_get_encoder_mode(encoder); + } switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 51e947a..0a8fc92 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -820,6 +820,9 @@ static int ci_set_thermal_temperature_range(struct radeon_device *rdev, WREG32_SMC(CG_THERMAL_CTRL, tmp); #endif + rdev->pm.dpm.thermal.min_temp = low_temp; + rdev->pm.dpm.thermal.max_temp = high_temp; + return 0; } @@ -1130,7 +1133,7 @@ static int ci_stop_dpm(struct radeon_device *rdev) tmp &= ~GLOBAL_PWRMGT_EN; WREG32_SMC(GENERAL_PWRMGT, tmp); - tmp = RREG32(SCLK_PWRMGT_CNTL); + tmp = RREG32_SMC(SCLK_PWRMGT_CNTL); tmp &= ~DYNAMIC_PM_EN; WREG32_SMC(SCLK_PWRMGT_CNTL, tmp); @@ -4695,7 +4698,7 @@ void ci_dpm_disable(struct radeon_device *rdev) ci_enable_spread_spectrum(rdev, false); ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false); ci_stop_dpm(rdev); - ci_enable_ds_master_switch(rdev, true); + ci_enable_ds_master_switch(rdev, false); ci_enable_ulv(rdev, false); ci_clear_vc(rdev); ci_reset_to_default(rdev); @@ -5098,6 +5101,10 @@ int ci_dpm_init(struct radeon_device *rdev) pi->mclk_dpm_key_disabled = 0; pi->pcie_dpm_key_disabled = 0; + /* mclk dpm is unstable on some R7 260X cards */ + if (rdev->pdev->device == 0x6658) + pi->mclk_dpm_key_disabled = 1; + pi->caps_sclk_ds = true; pi->mclk_strobe_mode_threshold = 40000; diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 25370ac..cdc7f40 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -1092,7 +1092,7 @@ static const u32 spectre_golden_registers[] = 0x8a14, 0xf000003f, 0x00000007, 0x8b24, 0xffffffff, 0x00ffffff, 0x28350, 0x3f3f3fff, 0x00000082, - 0x28355, 0x0000003f, 0x00000000, + 0x28354, 0x0000003f, 0x00000000, 0x3e78, 0x00000001, 0x00000002, 0x913c, 0xffff03df, 0x00000004, 0xc768, 0x00000008, 0x00000008, @@ -2745,6 +2745,7 @@ static void cik_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x130B) || (rdev->pdev->device == 0x130E) || (rdev->pdev->device == 0x1315) || + (rdev->pdev->device == 0x1318) || (rdev->pdev->device == 0x131B)) { rdev->config.cik.max_cu_per_sh = 4; rdev->config.cik.max_backends_per_se = 1; @@ -3342,8 +3343,8 @@ static int cik_cp_gfx_start(struct radeon_device *rdev) /* init the CE partitions. CE only used for gfx on CIK */ radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); - radeon_ring_write(ring, 0xc000); - radeon_ring_write(ring, 0xc000); + radeon_ring_write(ring, 0x8000); + radeon_ring_write(ring, 0x8000); /* setup clear context state */ radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); @@ -3763,7 +3764,7 @@ struct bonaire_mqd */ static int cik_cp_compute_resume(struct radeon_device *rdev) { - int r, i, idx; + int r, i, j, idx; u32 tmp; bool use_doorbell = true; u64 hqd_gpu_addr; @@ -3886,7 +3887,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) mqd->queue_state.cp_hqd_pq_wptr= 0; if (RREG32(CP_HQD_ACTIVE) & 1) { WREG32(CP_HQD_DEQUEUE_REQUEST, 1); - for (i = 0; i < rdev->usec_timeout; i++) { + for (j = 0; j < rdev->usec_timeout; j++) { if (!(RREG32(CP_HQD_ACTIVE) & 1)) break; udelay(1); @@ -4768,12 +4769,13 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) { struct radeon_ring *ring = &rdev->ring[ridx]; + int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); if (vm == NULL) return; radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); if (vm->id < 8) { radeon_ring_write(ring, @@ -4832,7 +4834,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 1 << vm->id); /* compute doesn't have PFP */ - if (ridx == RADEON_RING_TYPE_GFX_INDEX) { + if (usepfp) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); @@ -5165,6 +5167,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) } orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data |= 0x00000001; data &= 0xfffffffd; if (orig != data) WREG32(RLC_CGTT_MGCG_OVERRIDE, data); @@ -5196,7 +5199,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) } } else { orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); - data |= 0x00000002; + data |= 0x00000003; if (orig != data) WREG32(RLC_CGTT_MGCG_OVERRIDE, data); @@ -5956,6 +5959,19 @@ static void cik_disable_interrupt_state(struct radeon_device *rdev) WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); } + /* pflip */ + if (rdev->num_crtc >= 2) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); + } + if (rdev->num_crtc >= 4) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0); + } + if (rdev->num_crtc >= 6) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0); + } /* dac hotplug */ WREG32(DAC_AUTODETECT_INT_CONTROL, 0); @@ -6312,6 +6328,25 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); } + if (rdev->num_crtc >= 2) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + } + if (rdev->num_crtc >= 4) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + } + if (rdev->num_crtc >= 6) { + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + } + WREG32(DC_HPD1_INT_CONTROL, hpd1); WREG32(DC_HPD2_INT_CONTROL, hpd2); WREG32(DC_HPD3_INT_CONTROL, hpd3); @@ -6348,6 +6383,29 @@ static inline void cik_irq_ack(struct radeon_device *rdev) rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5); rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6); + rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC0_REGISTER_OFFSET); + rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC1_REGISTER_OFFSET); + if (rdev->num_crtc >= 4) { + rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC2_REGISTER_OFFSET); + rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC3_REGISTER_OFFSET); + } + if (rdev->num_crtc >= 6) { + rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC4_REGISTER_OFFSET); + rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS + + EVERGREEN_CRTC5_REGISTER_OFFSET); + } + + if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK); if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) @@ -6358,6 +6416,12 @@ static inline void cik_irq_ack(struct radeon_device *rdev) WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK); if (rdev->num_crtc >= 4) { + if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK); if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) @@ -6369,6 +6433,12 @@ static inline void cik_irq_ack(struct radeon_device *rdev) } if (rdev->num_crtc >= 6) { + if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); + if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED) + WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, + GRPH_PFLIP_INT_CLEAR); if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK); if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) @@ -6487,6 +6557,7 @@ static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } @@ -6720,6 +6791,15 @@ restart_ih: break; } break; + case 8: /* D1 page flip */ + case 10: /* D2 page flip */ + case 12: /* D3 page flip */ + case 14: /* D4 page flip */ + case 16: /* D5 page flip */ + case 18: /* D6 page flip */ + DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1); + radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1); + break; case 42: /* HPD hotplug */ switch (src_data) { case 0: @@ -8026,6 +8106,9 @@ void dce8_bandwidth_update(struct radeon_device *rdev) u32 num_heads = 0, lb_size; int i; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); for (i = 0; i < rdev->num_crtc; i++) { diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index d565f40..dc055d4 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -369,13 +369,6 @@ int cik_sdma_resume(struct radeon_device *rdev) { int r; - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - RREG32(SRBM_SOFT_RESET); - r = cik_sdma_load_microcode(rdev); if (r) return r; @@ -512,7 +505,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev, tmp = 0xCAFEDEAD; writel(tmp, ptr); - r = radeon_ring_lock(rdev, ring, 4); + r = radeon_ring_lock(rdev, ring, 5); if (r) { DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r); return r; diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 203d2a0..9c8ef20 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -868,6 +868,15 @@ # define DC_HPD6_RX_INTERRUPT (1 << 18) #define DISP_INTERRUPT_STATUS_CONTINUE6 0x6780 +/* 0x6858, 0x7458, 0x10058, 0x10c58, 0x11858, 0x12458 */ +#define GRPH_INT_STATUS 0x6858 +# define GRPH_PFLIP_INT_OCCURRED (1 << 0) +# define GRPH_PFLIP_INT_CLEAR (1 << 8) +/* 0x685c, 0x745c, 0x1005c, 0x10c5c, 0x1185c, 0x1245c */ +#define GRPH_INT_CONTROL 0x685c +# define GRPH_PFLIP_INT_MASK (1 << 0) +# define GRPH_PFLIP_INT_TYPE (1 << 8) + #define DAC_AUTODETECT_INT_CONTROL 0x67c8 #define DC_HPD1_INT_STATUS 0x601c @@ -1686,12 +1695,12 @@ #define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ #define EOP_TCL1_ACTION_EN (1 << 16) #define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TCL2_VOLATILE (1 << 24) #define EOP_CACHE_POLICY(x) ((x) << 25) /* 0 - LRU * 1 - Stream * 2 - Bypass */ -#define EOP_TCL2_VOLATILE (1 << 27) #define DATA_SEL(x) ((x) << 29) /* 0 - discard * 1 - send low 32bit data diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index 91bb470..7143783 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -1549,7 +1549,7 @@ int cypress_populate_smc_voltage_tables(struct radeon_device *rdev, table->voltageMaskTable.highMask[RV770_SMC_VOLTAGEMASK_VDDCI] = 0; table->voltageMaskTable.lowMask[RV770_SMC_VOLTAGEMASK_VDDCI] = - cpu_to_be32(eg_pi->vddc_voltage_table.mask_low); + cpu_to_be32(eg_pi->vddci_voltage_table.mask_low); } return 0; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c429bb9..20b00a0 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -188,7 +188,7 @@ static const u32 evergreen_golden_registers[] = 0x8c1c, 0xffffffff, 0x00001010, 0x28350, 0xffffffff, 0x00000000, 0xa008, 0xffffffff, 0x00010000, - 0x5cc, 0xffffffff, 0x00000001, + 0x5c4, 0xffffffff, 0x00000001, 0x9508, 0xffffffff, 0x00000002, 0x913c, 0x0000000f, 0x0000000a }; @@ -475,7 +475,7 @@ static const u32 cedar_golden_registers[] = 0x8c1c, 0xffffffff, 0x00001010, 0x28350, 0xffffffff, 0x00000000, 0xa008, 0xffffffff, 0x00010000, - 0x5cc, 0xffffffff, 0x00000001, + 0x5c4, 0xffffffff, 0x00000001, 0x9508, 0xffffffff, 0x00000002 }; @@ -634,7 +634,7 @@ static const u32 juniper_mgcg_init[] = static const u32 supersumo_golden_registers[] = { 0x5eb4, 0xffffffff, 0x00000002, - 0x5cc, 0xffffffff, 0x00000001, + 0x5c4, 0xffffffff, 0x00000001, 0x7030, 0xffffffff, 0x00000011, 0x7c30, 0xffffffff, 0x00000011, 0x6104, 0x01000300, 0x00000000, @@ -718,7 +718,7 @@ static const u32 sumo_golden_registers[] = static const u32 wrestler_golden_registers[] = { 0x5eb4, 0xffffffff, 0x00000002, - 0x5cc, 0xffffffff, 0x00000001, + 0x5c4, 0xffffffff, 0x00000001, 0x7030, 0xffffffff, 0x00000011, 0x7c30, 0xffffffff, 0x00000011, 0x6104, 0x01000300, 0x00000000, @@ -2312,6 +2312,9 @@ void evergreen_bandwidth_update(struct radeon_device *rdev) u32 num_heads = 0, lb_size; int i; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); for (i = 0; i < rdev->num_crtc; i++) { @@ -2520,6 +2523,7 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1); tmp |= EVERGREEN_CRTC_BLANK_DATA_EN; WREG32(EVERGREEN_CRTC_BLANK_CONTROL + crtc_offsets[i], tmp); + WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 0); } } else { tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]); @@ -4276,7 +4280,6 @@ int evergreen_irq_set(struct radeon_device *rdev) u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; - u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; u32 dma_cntl, dma_cntl1 = 0; u32 thermal_int = 0; @@ -4459,15 +4462,21 @@ int evergreen_irq_set(struct radeon_device *rdev) WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6); } - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); if (rdev->num_crtc >= 4) { - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); } if (rdev->num_crtc >= 6) { - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); } WREG32(DC_HPD1_INT_CONTROL, hpd1); @@ -4659,6 +4668,7 @@ static u32 evergreen_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } @@ -4856,6 +4866,15 @@ restart_ih: break; } break; + case 8: /* D1 page flip */ + case 10: /* D2 page flip */ + case 12: /* D3 page flip */ + case 14: /* D4 page flip */ + case 16: /* D5 page flip */ + case 18: /* D6 page flip */ + DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1); + radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1); + break; case 42: /* HPD hotplug */ switch (src_data) { case 0: diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index dd6e968..d0e4ab1 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -119,12 +119,6 @@ int cayman_dma_resume(struct radeon_device *rdev) u32 reg_offset, wb_offset; int i, r; - /* Reset dma */ - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - for (i = 0; i < 2; i++) { if (i == 0) { ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index db0fa61..85f36e7 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -1319,7 +1319,7 @@ static void ni_populate_smc_voltage_tables(struct radeon_device *rdev, table->voltageMaskTable.highMask[NISLANDS_SMC_VOLTAGEMASK_VDDCI] = 0; table->voltageMaskTable.lowMask[NISLANDS_SMC_VOLTAGEMASK_VDDCI] = - cpu_to_be32(eg_pi->vddc_voltage_table.mask_low); + cpu_to_be32(eg_pi->vddci_voltage_table.mask_low); } } diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index d713330..f98dcbe 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3189,6 +3189,9 @@ void r100_bandwidth_update(struct radeon_device *rdev) uint32_t pixel_bytes1 = 0; uint32_t pixel_bytes2 = 0; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); if (rdev->mode_info.crtcs[0]->base.enabled) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 5af2729..88eb936 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3371,7 +3371,6 @@ int r600_irq_set(struct radeon_device *rdev) u32 hpd1, hpd2, hpd3, hpd4 = 0, hpd5 = 0, hpd6 = 0; u32 grbm_int_cntl = 0; u32 hdmi0, hdmi1; - u32 d1grph = 0, d2grph = 0; u32 dma_cntl; u32 thermal_int = 0; @@ -3480,8 +3479,8 @@ int r600_irq_set(struct radeon_device *rdev) WREG32(CP_INT_CNTL, cp_int_cntl); WREG32(DMA_CNTL, dma_cntl); WREG32(DxMODE_INT_MASK, mode_int); - WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); - WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); + WREG32(D1GRPH_INTERRUPT_CONTROL, DxGRPH_PFLIP_INT_MASK); + WREG32(D2GRPH_INTERRUPT_CONTROL, DxGRPH_PFLIP_INT_MASK); WREG32(GRBM_INT_CNTL, grbm_int_cntl); if (ASIC_IS_DCE3(rdev)) { WREG32(DC_HPD1_INT_CONTROL, hpd1); @@ -3658,6 +3657,7 @@ static u32 r600_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } @@ -3784,6 +3784,14 @@ restart_ih: break; } break; + case 9: /* D1 pflip */ + DRM_DEBUG("IH: D1 flip\n"); + radeon_crtc_handle_flip(rdev, 0); + break; + case 11: /* D2 pflip */ + DRM_DEBUG("IH: D2 flip\n"); + radeon_crtc_handle_flip(rdev, 1); + break; case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 3b31745..aad3c36 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -116,15 +116,6 @@ int r600_dma_resume(struct radeon_device *rdev) u32 rb_bufsz; int r; - /* Reset dma */ - if (rdev->family >= CHIP_RV770) - WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA); - else - WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA); - RREG32(SRBM_SOFT_RESET); - udelay(50); - WREG32(SRBM_SOFT_RESET, 0); - WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0); WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index 5513d8f..cc4258a 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -158,16 +158,18 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev) u32 line_time_us, vblank_lines; u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */ - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - radeon_crtc = to_radeon_crtc(crtc); - if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) { - line_time_us = (radeon_crtc->hw_mode.crtc_htotal * 1000) / - radeon_crtc->hw_mode.clock; - vblank_lines = radeon_crtc->hw_mode.crtc_vblank_end - - radeon_crtc->hw_mode.crtc_vdisplay + - (radeon_crtc->v_border * 2); - vblank_time_us = vblank_lines * line_time_us; - break; + if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + radeon_crtc = to_radeon_crtc(crtc); + if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) { + line_time_us = (radeon_crtc->hw_mode.crtc_htotal * 1000) / + radeon_crtc->hw_mode.clock; + vblank_lines = radeon_crtc->hw_mode.crtc_vblank_end - + radeon_crtc->hw_mode.crtc_vdisplay + + (radeon_crtc->v_border * 2); + vblank_time_us = vblank_lines * line_time_us; + break; + } } } @@ -181,14 +183,15 @@ u32 r600_dpm_get_vrefresh(struct radeon_device *rdev) struct radeon_crtc *radeon_crtc; u32 vrefresh = 0; - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - radeon_crtc = to_radeon_crtc(crtc); - if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) { - vrefresh = radeon_crtc->hw_mode.vrefresh; - break; + if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + radeon_crtc = to_radeon_crtc(crtc); + if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) { + vrefresh = radeon_crtc->hw_mode.vrefresh; + break; + } } } - return vrefresh; } @@ -1190,7 +1193,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usPowerTuneTableOffset)); rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = - ppt->usMaximumPowerDeliveryLimit; + le16_to_cpu(ppt->usMaximumPowerDeliveryLimit); pt = &ppt->power_tune_table; } else { ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b11433f..5c903a8 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -715,6 +715,12 @@ struct cik_irq_stat_regs { u32 disp_int_cont4; u32 disp_int_cont5; u32 disp_int_cont6; + u32 d1grph_int; + u32 d2grph_int; + u32 d3grph_int; + u32 d4grph_int; + u32 d5grph_int; + u32 d6grph_int; }; union radeon_irq_stat_regs { diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index dfa6412..0f538a4 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -464,6 +464,13 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } } + /* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */ + if ((dev->pdev->device == 0x9805) && + (dev->pdev->subsystem_vendor == 0x1734) && + (dev->pdev->subsystem_device == 0x11bd)) { + if (*connector_type == DRM_MODE_CONNECTOR_VGA) + return false; + } return true; } @@ -1963,7 +1970,7 @@ static const char *thermal_controller_names[] = { "adm1032", "adm1030", "max6649", - "lm64", + "lm63", /* lm64 */ "f75375", "asc7xxx", }; @@ -1974,7 +1981,7 @@ static const char *pp_lib_thermal_controller_names[] = { "adm1032", "adm1030", "max6649", - "lm64", + "lm63", /* lm64 */ "f75375", "RV6xx", "RV770", @@ -2281,19 +2288,31 @@ static void radeon_atombios_add_pplib_thermal_controller(struct radeon_device *r (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); rdev->pm.int_thermal_type = THERMAL_TYPE_KV; - } else if ((controller->ucType == - ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) || - (controller->ucType == - ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL) || - (controller->ucType == - ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL)) { - DRM_INFO("Special thermal controller config\n"); + } else if (controller->ucType == + ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) { + DRM_INFO("External GPIO thermal controller %s fan control\n", + (controller->ucFanParameters & + ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_EXTERNAL_GPIO; + } else if (controller->ucType == + ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL) { + DRM_INFO("ADT7473 with internal thermal controller %s fan control\n", + (controller->ucFanParameters & + ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_ADT7473_WITH_INTERNAL; + } else if (controller->ucType == + ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL) { + DRM_INFO("EMC2103 with internal thermal controller %s fan control\n", + (controller->ucFanParameters & + ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_EMC2103_WITH_INTERNAL; } else if (controller->ucType < ARRAY_SIZE(pp_lib_thermal_controller_names)) { DRM_INFO("Possible %s thermal controller at 0x%02x %s fan control\n", pp_lib_thermal_controller_names[controller->ucType], controller->ucI2cAddress >> 1, (controller->ucFanParameters & ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with"); + rdev->pm.int_thermal_type = THERMAL_TYPE_EXTERNAL; i2c_bus = radeon_lookup_i2c_gpio(rdev, controller->ucI2cLine); rdev->pm.i2c_bus = radeon_i2c_lookup(rdev, &i2c_bus); if (rdev->pm.i2c_bus) { diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index b8db0d7..7c6e3fd 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -525,6 +525,13 @@ static bool radeon_atpx_detect(void) has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); } + /* some newer PX laptops mark the dGPU as a non-VGA display device */ + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { + vga_count++; + + has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); + } + if (has_atpx && vga_count == 2) { acpi_get_name(radeon_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer); printk(KERN_INFO "VGA switcheroo: detected switching method %s handle\n", diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 061b227..b131520 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -196,6 +196,20 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev) } } + if (!found) { + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { + dhandle = ACPI_HANDLE(&pdev->dev); + if (!dhandle) + continue; + + status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); + if (!ACPI_FAILURE(status)) { + found = true; + break; + } + } + } + if (!found) return false; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 6456573..fe90b3e 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -1360,7 +1360,7 @@ bool radeon_connector_is_dp12_capable(struct drm_connector *connector) struct radeon_device *rdev = dev->dev_private; if (ASIC_IS_DCE5(rdev) && - (rdev->clock.dp_extclk >= 53900) && + (rdev->clock.default_dispclk >= 53900) && radeon_connector_encoder_is_hbr2(connector)) { return true; } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 80285e3..ed9a997 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -97,6 +97,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) uint32_t domain = r->write_domain ? r->write_domain : r->read_domains; + if (domain & RADEON_GEM_DOMAIN_CPU) { + DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " + "for command submission\n"); + return -EINVAL; + } + p->relocs[i].lobj.domain = domain; if (domain == RADEON_GEM_DOMAIN_VRAM) domain |= RADEON_GEM_DOMAIN_GTT; @@ -271,10 +277,17 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return -EINVAL; /* we only support VM on some SI+ rings */ - if ((p->rdev->asic->ring[p->ring]->cs_parse == NULL) && - ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { - DRM_ERROR("Ring %d requires VM!\n", p->ring); - return -EINVAL; + if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { + if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { + DRM_ERROR("Ring %d requires VM!\n", p->ring); + return -EINVAL; + } + } else { + if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { + DRM_ERROR("VM not supported on ring %d!\n", + p->ring); + return -EINVAL; + } } } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 0d1aa05..9a19a04 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -281,6 +281,10 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id) u32 update_pending; int vpos, hpos; + /* can happen during initialization */ + if (radeon_crtc == NULL) + return; + spin_lock_irqsave(&rdev->ddev->event_lock, flags); work = radeon_crtc->unpin_work; if (work == NULL || @@ -704,6 +708,10 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) struct radeon_device *rdev = dev->dev_private; int ret = 0; + /* don't leak the edid if we already fetched it in detect() */ + if (radeon_connector->edid) + goto got_edid; + /* on hw with routers, select right port */ if (radeon_connector->router.ddc_valid) radeon_router_select_ddc_port(radeon_connector); @@ -743,8 +751,10 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector) radeon_connector->edid = radeon_bios_get_hardcoded_edid(rdev); } if (radeon_connector->edid) { +got_edid: drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid); ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid); + drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid); return ret; } drm_mode_connector_update_edid_property(&radeon_connector->base, NULL); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index cc9e848..a1a8430 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -196,6 +196,16 @@ static bool radeon_msi_ok(struct radeon_device *rdev) if (rdev->flags & RADEON_IS_AGP) return false; + /* + * Older chips have a HW limitation, they can only generate 40 bits + * of address for "64-bit" MSIs which breaks on some platforms, notably + * IBM POWER servers, so we limit them + */ + if (rdev->family < CHIP_BONAIRE) { + dev_info(rdev->dev, "radeon: MSI limited to 32-bit\n"); + rdev->pdev->no_64bit_msi = 1; + } + /* force MSI on */ if (radeon_msi == 1) return true; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 7456ce1..03ff672 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -501,8 +501,11 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) radeon_vm_init(rdev, &fpriv->vm); r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) + if (r) { + radeon_vm_fini(rdev, &fpriv->vm); + kfree(fpriv); return r; + } /* map the ib pool buffer read only into * virtual address space */ @@ -681,6 +684,8 @@ int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, /* Get associated drm_crtc: */ drmcrtc = &rdev->mode_info.crtcs[crtc]->base; + if (!drmcrtc) + return -EINVAL; /* Helper routine in DRM core does all the work: */ return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error, diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c0fa4aa..315b8e2 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -586,22 +586,30 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) rbo = container_of(bo, struct radeon_bo, tbo); radeon_bo_check_tiling(rbo, 0, 0); rdev = rbo->rdev; - if (bo->mem.mem_type == TTM_PL_VRAM) { - size = bo->mem.num_pages << PAGE_SHIFT; - offset = bo->mem.start << PAGE_SHIFT; - if ((offset + size) > rdev->mc.visible_vram_size) { - /* hurrah the memory is not visible ! */ - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); - rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; - r = ttm_bo_validate(bo, &rbo->placement, false, false); - if (unlikely(r != 0)) - return r; - offset = bo->mem.start << PAGE_SHIFT; - /* this should not happen */ - if ((offset + size) > rdev->mc.visible_vram_size) - return -EINVAL; - } + if (bo->mem.mem_type != TTM_PL_VRAM) + return 0; + + size = bo->mem.num_pages << PAGE_SHIFT; + offset = bo->mem.start << PAGE_SHIFT; + if ((offset + size) <= rdev->mc.visible_vram_size) + return 0; + + /* hurrah the memory is not visible ! */ + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); + rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; + r = ttm_bo_validate(bo, &rbo->placement, false, false); + if (unlikely(r == -ENOMEM)) { + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); + return ttm_bo_validate(bo, &rbo->placement, false, false); + } else if (unlikely(r != 0)) { + return r; } + + offset = bo->mem.start << PAGE_SHIFT; + /* this should never happen */ + if ((offset + size) > rdev->mc.visible_vram_size) + return -EINVAL; + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index a0ec4bb..10fc977 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1362,12 +1362,14 @@ static void radeon_pm_compute_clocks_old(struct radeon_device *rdev) rdev->pm.active_crtcs = 0; rdev->pm.active_crtc_count = 0; - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - radeon_crtc = to_radeon_crtc(crtc); - if (radeon_crtc->enabled) { - rdev->pm.active_crtcs |= (1 << radeon_crtc->crtc_id); - rdev->pm.active_crtc_count++; + if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { + radeon_crtc = to_radeon_crtc(crtc); + if (radeon_crtc->enabled) { + rdev->pm.active_crtcs |= (1 << radeon_crtc->crtc_id); + rdev->pm.active_crtc_count++; + } } } @@ -1431,12 +1433,14 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev) /* update active crtc counts */ rdev->pm.dpm.new_active_crtcs = 0; rdev->pm.dpm.new_active_crtc_count = 0; - list_for_each_entry(crtc, - &ddev->mode_config.crtc_list, head) { - radeon_crtc = to_radeon_crtc(crtc); - if (crtc->enabled) { - rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id); - rdev->pm.dpm.new_active_crtc_count++; + if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) { + list_for_each_entry(crtc, + &ddev->mode_config.crtc_list, head) { + radeon_crtc = to_radeon_crtc(crtc); + if (crtc->enabled) { + rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id); + rdev->pm.dpm.new_active_crtc_count++; + } } } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 84323c9..02d3c38 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -189,7 +189,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo = container_of(bo, struct radeon_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false) + if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); else radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); diff --git a/drivers/gpu/drm/radeon/radeon_ucode.h b/drivers/gpu/drm/radeon/radeon_ucode.h index 3385836..7e48c35 100644 --- a/drivers/gpu/drm/radeon/radeon_ucode.h +++ b/drivers/gpu/drm/radeon/radeon_ucode.h @@ -57,6 +57,9 @@ #define BTC_MC_UCODE_SIZE 6024 #define CAYMAN_MC_UCODE_SIZE 6037 #define SI_MC_UCODE_SIZE 7769 +#define TAHITI_MC_UCODE_SIZE 7808 +#define PITCAIRN_MC_UCODE_SIZE 7775 +#define VERDE_MC_UCODE_SIZE 7875 #define OLAND_MC_UCODE_SIZE 7863 #define CIK_MC_UCODE_SIZE 7866 diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 8393647..a656b1a 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -464,6 +464,10 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, cmd = radeon_get_ib_value(p, p->idx) >> 1; if (cmd < 0x4) { + if (end <= start) { + DRM_ERROR("invalid reloc offset %X!\n", offset); + return -EINVAL; + } if ((end - start) < buf_sizes[cmd]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, (unsigned)(end - start), buf_sizes[cmd]); diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 6acba80..bbe8459 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -582,8 +582,10 @@ int rs600_gart_set_page(struct radeon_device *rdev, int i, uint64_t addr) return -EINVAL; } addr = addr & 0xFFFFFFFFFFFFF000ULL; - addr |= R600_PTE_VALID | R600_PTE_SYSTEM | R600_PTE_SNOOPED; - addr |= R600_PTE_READABLE | R600_PTE_WRITEABLE; + if (addr != rdev->dummy_page.addr) + addr |= R600_PTE_VALID | R600_PTE_READABLE | + R600_PTE_WRITEABLE; + addr |= R600_PTE_SYSTEM | R600_PTE_SNOOPED; writeq(addr, ptr + (i * 8)); return 0; } @@ -824,6 +826,9 @@ void rs600_bandwidth_update(struct radeon_device *rdev) u32 d1mode_priority_a_cnt, d2mode_priority_a_cnt; /* FIXME: implement full support */ + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); if (rdev->mode_info.crtcs[0]->base.enabled) diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 3c38f0a..d33b4ad 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -585,6 +585,9 @@ void rs690_bandwidth_update(struct radeon_device *rdev) u32 d1mode_priority_a_cnt, d1mode_priority_b_cnt; u32 d2mode_priority_a_cnt, d2mode_priority_b_cnt; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); if (rdev->mode_info.crtcs[0]->base.enabled) diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 873eb4b..9de81c5 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -1279,6 +1279,9 @@ void rv515_bandwidth_update(struct radeon_device *rdev) struct drm_display_mode *mode0 = NULL; struct drm_display_mode *mode1 = NULL; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); if (rdev->mode_info.crtcs[0]->base.enabled) diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index a239b30..890cf17 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2328,12 +2328,6 @@ void rv770_get_engine_memory_ss(struct radeon_device *rdev) pi->mclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss, ASIC_INTERNAL_MEMORY_SS, 0); - /* disable ss, causes hangs on some cayman boards */ - if (rdev->family == CHIP_CAYMAN) { - pi->sclk_ss = false; - pi->mclk_ss = false; - } - if (pi->sclk_ss || pi->mclk_ss) pi->dynamic_ss = true; else diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 873e0a6..50482e7 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -39,30 +39,35 @@ MODULE_FIRMWARE("radeon/TAHITI_pfp.bin"); MODULE_FIRMWARE("radeon/TAHITI_me.bin"); MODULE_FIRMWARE("radeon/TAHITI_ce.bin"); MODULE_FIRMWARE("radeon/TAHITI_mc.bin"); +MODULE_FIRMWARE("radeon/TAHITI_mc2.bin"); MODULE_FIRMWARE("radeon/TAHITI_rlc.bin"); MODULE_FIRMWARE("radeon/TAHITI_smc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_me.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin"); +MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin"); MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin"); MODULE_FIRMWARE("radeon/VERDE_pfp.bin"); MODULE_FIRMWARE("radeon/VERDE_me.bin"); MODULE_FIRMWARE("radeon/VERDE_ce.bin"); MODULE_FIRMWARE("radeon/VERDE_mc.bin"); +MODULE_FIRMWARE("radeon/VERDE_mc2.bin"); MODULE_FIRMWARE("radeon/VERDE_rlc.bin"); MODULE_FIRMWARE("radeon/VERDE_smc.bin"); MODULE_FIRMWARE("radeon/OLAND_pfp.bin"); MODULE_FIRMWARE("radeon/OLAND_me.bin"); MODULE_FIRMWARE("radeon/OLAND_ce.bin"); MODULE_FIRMWARE("radeon/OLAND_mc.bin"); +MODULE_FIRMWARE("radeon/OLAND_mc2.bin"); MODULE_FIRMWARE("radeon/OLAND_rlc.bin"); MODULE_FIRMWARE("radeon/OLAND_smc.bin"); MODULE_FIRMWARE("radeon/HAINAN_pfp.bin"); MODULE_FIRMWARE("radeon/HAINAN_me.bin"); MODULE_FIRMWARE("radeon/HAINAN_ce.bin"); MODULE_FIRMWARE("radeon/HAINAN_mc.bin"); +MODULE_FIRMWARE("radeon/HAINAN_mc2.bin"); MODULE_FIRMWARE("radeon/HAINAN_rlc.bin"); MODULE_FIRMWARE("radeon/HAINAN_smc.bin"); @@ -1470,36 +1475,33 @@ static int si_mc_load_microcode(struct radeon_device *rdev) const __be32 *fw_data; u32 running, blackout = 0; u32 *io_mc_regs; - int i, ucode_size, regs_size; + int i, regs_size, ucode_size; if (!rdev->mc_fw) return -EINVAL; + ucode_size = rdev->mc_fw->size / 4; + switch (rdev->family) { case CHIP_TAHITI: io_mc_regs = (u32 *)&tahiti_io_mc_regs; - ucode_size = SI_MC_UCODE_SIZE; regs_size = TAHITI_IO_MC_REGS_SIZE; break; case CHIP_PITCAIRN: io_mc_regs = (u32 *)&pitcairn_io_mc_regs; - ucode_size = SI_MC_UCODE_SIZE; regs_size = TAHITI_IO_MC_REGS_SIZE; break; case CHIP_VERDE: default: io_mc_regs = (u32 *)&verde_io_mc_regs; - ucode_size = SI_MC_UCODE_SIZE; regs_size = TAHITI_IO_MC_REGS_SIZE; break; case CHIP_OLAND: io_mc_regs = (u32 *)&oland_io_mc_regs; - ucode_size = OLAND_MC_UCODE_SIZE; regs_size = TAHITI_IO_MC_REGS_SIZE; break; case CHIP_HAINAN: io_mc_regs = (u32 *)&hainan_io_mc_regs; - ucode_size = OLAND_MC_UCODE_SIZE; regs_size = TAHITI_IO_MC_REGS_SIZE; break; } @@ -1555,7 +1557,7 @@ static int si_init_microcode(struct radeon_device *rdev) const char *chip_name; const char *rlc_chip_name; size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size; - size_t smc_req_size; + size_t smc_req_size, mc2_req_size; char fw_name[30]; int err; @@ -1570,6 +1572,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + mc2_req_size = TAHITI_MC_UCODE_SIZE * 4; smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4); break; case CHIP_PITCAIRN: @@ -1580,6 +1583,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4; smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4); break; case CHIP_VERDE: @@ -1590,6 +1594,7 @@ static int si_init_microcode(struct radeon_device *rdev) ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; mc_req_size = SI_MC_UCODE_SIZE * 4; + mc2_req_size = VERDE_MC_UCODE_SIZE * 4; smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4); break; case CHIP_OLAND: @@ -1599,7 +1604,7 @@ static int si_init_microcode(struct radeon_device *rdev) me_req_size = SI_PM4_UCODE_SIZE * 4; ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; - mc_req_size = OLAND_MC_UCODE_SIZE * 4; + mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4; smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4); break; case CHIP_HAINAN: @@ -1609,7 +1614,7 @@ static int si_init_microcode(struct radeon_device *rdev) me_req_size = SI_PM4_UCODE_SIZE * 4; ce_req_size = SI_CE_UCODE_SIZE * 4; rlc_req_size = SI_RLC_UCODE_SIZE * 4; - mc_req_size = OLAND_MC_UCODE_SIZE * 4; + mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4; smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4); break; default: BUG(); @@ -1662,16 +1667,22 @@ static int si_init_microcode(struct radeon_device *rdev) err = -EINVAL; } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name); err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); - if (err) - goto out; - if (rdev->mc_fw->size != mc_req_size) { + if (err) { + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev); + if (err) + goto out; + } + if ((rdev->mc_fw->size != mc_req_size) && + (rdev->mc_fw->size != mc2_req_size)) { printk(KERN_ERR "si_mc: Bogus length %zu in firmware \"%s\"\n", rdev->mc_fw->size, fw_name); err = -EINVAL; } + DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size); snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev); @@ -2219,6 +2230,9 @@ void dce6_bandwidth_update(struct radeon_device *rdev) u32 num_heads = 0, lb_size; int i; + if (!rdev->mode_info.mode_config_initialized) + return; + radeon_update_display_priority(rdev); for (i = 0; i < rdev->num_crtc; i++) { @@ -4746,7 +4760,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); if (vm->id < 8) { @@ -5720,7 +5734,6 @@ int si_irq_set(struct radeon_device *rdev) u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0; u32 grbm_int_cntl = 0; - u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 dma_cntl, dma_cntl1; u32 thermal_int = 0; @@ -5859,16 +5872,22 @@ int si_irq_set(struct radeon_device *rdev) } if (rdev->num_crtc >= 2) { - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); } if (rdev->num_crtc >= 4) { - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); } if (rdev->num_crtc >= 6) { - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5); - WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); + WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, + GRPH_PFLIP_INT_MASK); } if (!ASIC_IS_NODCE(rdev)) { @@ -6025,6 +6044,7 @@ static inline u32 si_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } @@ -6232,6 +6252,15 @@ restart_ih: break; } break; + case 8: /* D1 page flip */ + case 10: /* D2 page flip */ + case 12: /* D3 page flip */ + case 14: /* D4 page flip */ + case 16: /* D5 page flip */ + case 18: /* D6 page flip */ + DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1); + radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1); + break; case 42: /* HPD hotplug */ switch (src_data) { case 0: diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 83895f2..51588d3 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2901,6 +2901,22 @@ static int si_init_smc_spll_table(struct radeon_device *rdev) return ret; } +struct si_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 max_sclk; + u32 max_mclk; +}; + +/* cards with dpm stability problems */ +static struct si_dpm_quirk si_dpm_quirk_list[] = { + /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ + { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, + { 0, 0, 0, 0 }, +}; + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2911,7 +2927,22 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, u32 mclk, sclk; u16 vddc, vddci; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + u32 max_sclk = 0, max_mclk = 0; int i; + struct si_dpm_quirk *p = si_dpm_quirk_list; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + max_sclk = p->max_sclk; + max_mclk = p->max_mclk; + break; + } + ++p; + } if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) @@ -2965,6 +2996,14 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].mclk > max_mclk_vddc) ps->performance_levels[i].mclk = max_mclk_vddc; } + if (max_mclk) { + if (ps->performance_levels[i].mclk > max_mclk) + ps->performance_levels[i].mclk = max_mclk; + } + if (max_sclk) { + if (ps->performance_levels[i].sclk > max_sclk) + ps->performance_levels[i].sclk = max_sclk; + } } /* XXX validate the min clocks required for display */ @@ -6220,7 +6259,7 @@ static void si_parse_pplib_clock_info(struct radeon_device *rdev, if ((rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) && index == 0) { /* XXX disable for A0 tahiti */ - si_pi->ulv.supported = true; + si_pi->ulv.supported = false; si_pi->ulv.pl = *pl; si_pi->ulv.one_pcie_lane_in_ulv = false; si_pi->ulv.volt_change_delay = SISLANDS_ULVVOLTAGECHANGEDELAY_DFLT; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index d700698..bf980ea 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1868,7 +1868,16 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; - pi->enable_bapm = false; + /* There are stability issues reported on with + * bapm enabled when switching between AC and battery + * power. At the same time, some MSI boards hang + * if it's not enabled and dpm is enabled. Just enable + * it for MSI boards right now. + */ + if (rdev->pdev->subsystem_vendor == 0x1462) + pi->enable_bapm = true; + else + pi->enable_bapm = false; pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 7266805..f680f5f 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -83,7 +83,10 @@ int uvd_v1_0_init(struct radeon_device *rdev) int r; /* raise clocks while booting up the VCPU */ - radeon_set_uvd_clocks(rdev, 53300, 40000); + if (rdev->family < CHIP_RV740) + radeon_set_uvd_clocks(rdev, 10000, 10000); + else + radeon_set_uvd_clocks(rdev, 53300, 40000); r = uvd_v1_0_start(rdev); if (r) @@ -405,7 +408,10 @@ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) struct radeon_fence *fence = NULL; int r; - r = radeon_set_uvd_clocks(rdev, 53300, 40000); + if (rdev->family < CHIP_RV740) + r = radeon_set_uvd_clocks(rdev, 10000, 10000); + else + r = radeon_set_uvd_clocks(rdev, 53300, 40000); if (r) { DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r); return r; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 116da19..2b25d65 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -84,6 +84,7 @@ static int modeset_init(struct drm_device *dev) if ((priv->num_encoders == 0) || (priv->num_connectors == 0)) { /* oh nos! */ dev_err(dev->dev, "no encoders/connectors found\n"); + drm_mode_config_cleanup(dev); return -ENXIO; } @@ -122,6 +123,7 @@ static int tilcdc_unload(struct drm_device *dev) struct tilcdc_drm_private *priv = dev->dev_private; struct tilcdc_module *mod, *cur; + drm_fbdev_cma_fini(priv->fbdev); drm_kms_helper_poll_fini(dev); drm_mode_config_cleanup(dev); drm_vblank_cleanup(dev); @@ -177,33 +179,37 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) dev->dev_private = priv; priv->wq = alloc_ordered_workqueue("tilcdc", 0); + if (!priv->wq) { + ret = -ENOMEM; + goto fail_free_priv; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(dev->dev, "failed to get memory resource\n"); ret = -EINVAL; - goto fail; + goto fail_free_wq; } priv->mmio = ioremap_nocache(res->start, resource_size(res)); if (!priv->mmio) { dev_err(dev->dev, "failed to ioremap\n"); ret = -ENOMEM; - goto fail; + goto fail_free_wq; } priv->clk = clk_get(dev->dev, "fck"); if (IS_ERR(priv->clk)) { dev_err(dev->dev, "failed to get functional clock\n"); ret = -ENODEV; - goto fail; + goto fail_iounmap; } priv->disp_clk = clk_get(dev->dev, "dpll_disp_ck"); if (IS_ERR(priv->clk)) { dev_err(dev->dev, "failed to get display clock\n"); ret = -ENODEV; - goto fail; + goto fail_put_clk; } #ifdef CONFIG_CPU_FREQ @@ -213,7 +219,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) CPUFREQ_TRANSITION_NOTIFIER); if (ret) { dev_err(dev->dev, "failed to register cpufreq notifier\n"); - goto fail; + goto fail_put_disp_clk; } #endif @@ -258,13 +264,13 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) ret = modeset_init(dev); if (ret < 0) { dev_err(dev->dev, "failed to initialize mode setting\n"); - goto fail; + goto fail_cpufreq_unregister; } ret = drm_vblank_init(dev, 1); if (ret < 0) { dev_err(dev->dev, "failed to initialize vblank\n"); - goto fail; + goto fail_mode_config_cleanup; } pm_runtime_get_sync(dev->dev); @@ -272,7 +278,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) pm_runtime_put_sync(dev->dev); if (ret < 0) { dev_err(dev->dev, "failed to install IRQ handler\n"); - goto fail; + goto fail_vblank_cleanup; } platform_set_drvdata(pdev, dev); @@ -288,13 +294,48 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags) priv->fbdev = drm_fbdev_cma_init(dev, bpp, dev->mode_config.num_crtc, dev->mode_config.num_connector); + if (IS_ERR(priv->fbdev)) { + ret = PTR_ERR(priv->fbdev); + goto fail_irq_uninstall; + } drm_kms_helper_poll_init(dev); return 0; -fail: - tilcdc_unload(dev); +fail_irq_uninstall: + pm_runtime_get_sync(dev->dev); + drm_irq_uninstall(dev); + pm_runtime_put_sync(dev->dev); + +fail_vblank_cleanup: + drm_vblank_cleanup(dev); + +fail_mode_config_cleanup: + drm_mode_config_cleanup(dev); + +fail_cpufreq_unregister: + pm_runtime_disable(dev->dev); +#ifdef CONFIG_CPU_FREQ + cpufreq_unregister_notifier(&priv->freq_transition, + CPUFREQ_TRANSITION_NOTIFIER); +fail_put_disp_clk: + clk_put(priv->disp_clk); +#endif + +fail_put_clk: + clk_put(priv->clk); + +fail_iounmap: + iounmap(priv->mmio); + +fail_free_wq: + flush_workqueue(priv->wq); + destroy_workqueue(priv->wq); + +fail_free_priv: + dev->dev_private = NULL; + kfree(priv); return ret; } @@ -628,10 +669,10 @@ static int __init tilcdc_drm_init(void) static void __exit tilcdc_drm_fini(void) { DBG("fini"); - tilcdc_tfp410_fini(); - tilcdc_slave_fini(); - tilcdc_panel_fini(); platform_driver_unregister(&tilcdc_platform_driver); + tilcdc_panel_fini(); + tilcdc_slave_fini(); + tilcdc_tfp410_fini(); } late_initcall(tilcdc_drm_init); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c index 86c6732..b085dcc 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c @@ -151,6 +151,7 @@ struct panel_connector { static void panel_connector_destroy(struct drm_connector *connector) { struct panel_connector *panel_connector = to_panel_connector(connector); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(panel_connector); } @@ -285,10 +286,8 @@ static void panel_destroy(struct tilcdc_module *mod) { struct panel_module *panel_mod = to_panel_module(mod); - if (panel_mod->timings) { + if (panel_mod->timings) display_timings_release(panel_mod->timings); - kfree(panel_mod->timings); - } tilcdc_module_cleanup(mod); kfree(panel_mod->info); diff --git a/drivers/gpu/drm/tilcdc/tilcdc_slave.c b/drivers/gpu/drm/tilcdc/tilcdc_slave.c index 595068b..2f83ffb 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_slave.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_slave.c @@ -166,6 +166,7 @@ struct slave_connector { static void slave_connector_destroy(struct drm_connector *connector) { struct slave_connector *slave_connector = to_slave_connector(connector); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(slave_connector); } diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c index c38b56b..ce75ac8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c @@ -167,6 +167,7 @@ struct tfp410_connector { static void tfp410_connector_destroy(struct drm_connector *connector) { struct tfp410_connector *tfp410_connector = to_tfp410_connector(connector); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(tfp410_connector); } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 863bef9..76329d2 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -297,9 +297,12 @@ static void ttm_pool_update_free_locked(struct ttm_page_pool *pool, * * @pool: to free the pages from * @free_all: If set to true will free all pages in pool + * @use_static: Safe to use static buffer **/ -static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free) +static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free, + bool use_static) { + static struct page *static_buf[NUM_PAGES_TO_ALLOC]; unsigned long irq_flags; struct page *p; struct page **pages_to_free; @@ -309,8 +312,11 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free) if (NUM_PAGES_TO_ALLOC < nr_free) npages_to_free = NUM_PAGES_TO_ALLOC; - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), - GFP_KERNEL); + if (use_static) + pages_to_free = static_buf; + else + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), + GFP_KERNEL); if (!pages_to_free) { pr_err("Failed to allocate memory for pool free operation\n"); return 0; @@ -373,7 +379,8 @@ restart: if (freed_pages) ttm_pages_put(pages_to_free, freed_pages); out: - kfree(pages_to_free); + if (pages_to_free != static_buf) + kfree(pages_to_free); return nr_free; } @@ -382,32 +389,33 @@ out: * * XXX: (dchinner) Deadlock warning! * - * ttm_page_pool_free() does memory allocation using GFP_KERNEL. that means - * this can deadlock when called a sc->gfp_mask that is not equal to - * GFP_KERNEL. - * * This code is crying out for a shrinker per pool.... */ static unsigned long ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - static atomic_t start_pool = ATOMIC_INIT(0); + static DEFINE_MUTEX(lock); + static unsigned start_pool; unsigned i; - unsigned pool_offset = atomic_add_return(1, &start_pool); + unsigned pool_offset; struct ttm_page_pool *pool; int shrink_pages = sc->nr_to_scan; unsigned long freed = 0; - pool_offset = pool_offset % NUM_POOLS; + if (!mutex_trylock(&lock)) + return SHRINK_STOP; + pool_offset = ++start_pool % NUM_POOLS; /* select start pool in round robin fashion */ for (i = 0; i < NUM_POOLS; ++i) { unsigned nr_free = shrink_pages; if (shrink_pages == 0) break; pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; - shrink_pages = ttm_page_pool_free(pool, nr_free); + /* OK to use static buffer since global mutex is held. */ + shrink_pages = ttm_page_pool_free(pool, nr_free, true); freed += nr_free - shrink_pages; } + mutex_unlock(&lock); return freed; } @@ -706,7 +714,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } spin_unlock_irqrestore(&pool->lock, irq_flags); if (npages) - ttm_page_pool_free(pool, npages); + ttm_page_pool_free(pool, npages, false); } /* @@ -845,8 +853,9 @@ void ttm_page_alloc_fini(void) pr_info("Finalizing pool allocator\n"); ttm_pool_mm_shrink_fini(_manager); + /* OK to use static buffer since global mutex is no longer used. */ for (i = 0; i < NUM_POOLS; ++i) - ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES); + ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES, true); kobject_put(&_manager->kobj); _manager = NULL; diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 7957bee..9082ca0 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -410,9 +410,12 @@ static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page) * * @pool: to free the pages from * @nr_free: If set to true will free all pages in pool + * @use_static: Safe to use static buffer **/ -static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free) +static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free, + bool use_static) { + static struct page *static_buf[NUM_PAGES_TO_ALLOC]; unsigned long irq_flags; struct dma_page *dma_p, *tmp; struct page **pages_to_free; @@ -429,8 +432,11 @@ static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free) npages_to_free, nr_free); } #endif - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), - GFP_KERNEL); + if (use_static) + pages_to_free = static_buf; + else + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), + GFP_KERNEL); if (!pages_to_free) { pr_err("%s: Failed to allocate memory for pool free operation\n", @@ -500,7 +506,8 @@ restart: if (freed_pages) ttm_dma_pages_put(pool, &d_pages, pages_to_free, freed_pages); out: - kfree(pages_to_free); + if (pages_to_free != static_buf) + kfree(pages_to_free); return nr_free; } @@ -529,7 +536,8 @@ static void ttm_dma_free_pool(struct device *dev, enum pool_type type) if (pool->type != type) continue; /* Takes a spinlock.. */ - ttm_dma_page_pool_free(pool, FREE_ALL_PAGES); + /* OK to use static buffer since global mutex is held. */ + ttm_dma_page_pool_free(pool, FREE_ALL_PAGES, true); WARN_ON(((pool->npages_in_use + pool->npages_free) != 0)); /* This code path is called after _all_ references to the * struct device has been dropped - so nobody should be @@ -982,7 +990,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) /* shrink pool if necessary (only on !is_cached pools)*/ if (npages) - ttm_dma_page_pool_free(pool, npages); + ttm_dma_page_pool_free(pool, npages, false); ttm->state = tt_unpopulated; } EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); @@ -992,20 +1000,15 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); * * XXX: (dchinner) Deadlock warning! * - * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention - * needs to be paid to sc->gfp_mask to determine if this can be done or not. - * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really - * bad. - * * I'm getting sadder as I hear more pathetical whimpers about needing per-pool * shrinkers */ static unsigned long ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - static atomic_t start_pool = ATOMIC_INIT(0); + static unsigned start_pool; unsigned idx = 0; - unsigned pool_offset = atomic_add_return(1, &start_pool); + unsigned pool_offset; unsigned shrink_pages = sc->nr_to_scan; struct device_pools *p; unsigned long freed = 0; @@ -1013,8 +1016,11 @@ ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (list_empty(&_manager->pools)) return SHRINK_STOP; - mutex_lock(&_manager->lock); - pool_offset = pool_offset % _manager->npools; + if (!mutex_trylock(&_manager->lock)) + return SHRINK_STOP; + if (!_manager->npools) + goto out; + pool_offset = ++start_pool % _manager->npools; list_for_each_entry(p, &_manager->pools, pools) { unsigned nr_free; @@ -1026,13 +1032,15 @@ ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (++idx < pool_offset) continue; nr_free = shrink_pages; - shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free); + /* OK to use static buffer since global mutex is held. */ + shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free, true); freed += nr_free - shrink_pages; pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", p->pool->dev_name, p->pool->name, current->pid, nr_free, shrink_pages); } +out: mutex_unlock(&_manager->lock); return freed; } @@ -1043,7 +1051,8 @@ ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc) struct device_pools *p; unsigned long count = 0; - mutex_lock(&_manager->lock); + if (!mutex_trylock(&_manager->lock)) + return 0; list_for_each_entry(p, &_manager->pools, pools) count += p->pool->npages_free; mutex_unlock(&_manager->lock); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 8b059eb..1b9aa98 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -834,14 +834,36 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv, SVGA3dCmdSurfaceDMA dma; } *cmd; int ret; + SVGA3dCmdSurfaceDMASuffix *suffix; + uint32_t bo_size; cmd = container_of(header, struct vmw_dma_cmd, header); + suffix = (SVGA3dCmdSurfaceDMASuffix *)((unsigned long) &cmd->dma + + header->size - sizeof(*suffix)); + + /* Make sure device and verifier stays in sync. */ + if (unlikely(suffix->suffixSize != sizeof(*suffix))) { + DRM_ERROR("Invalid DMA suffix size.\n"); + return -EINVAL; + } + ret = vmw_translate_guest_ptr(dev_priv, sw_context, &cmd->dma.guest.ptr, &vmw_bo); if (unlikely(ret != 0)) return ret; + /* Make sure DMA doesn't cross BO boundaries. */ + bo_size = vmw_bo->base.num_pages * PAGE_SIZE; + if (unlikely(cmd->dma.guest.ptr.offset > bo_size)) { + DRM_ERROR("Invalid DMA offset.\n"); + return -EINVAL; + } + + bo_size -= cmd->dma.guest.ptr.offset; + if (unlikely(suffix->maximumOffset > bo_size)) + suffix->maximumOffset = bo_size; + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, user_surface_converter, &cmd->dma.host.sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index ed5ce2a..1b0f34b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -147,7 +147,7 @@ static int vmw_fb_check_var(struct fb_var_screeninfo *var, } if (!vmw_kms_validate_mode_vram(vmw_priv, - info->fix.line_length, + var->xres * var->bits_per_pixel/8, var->yoffset + var->yres)) { DRM_ERROR("Requested geom can not fit in framebuffer\n"); return -EINVAL; @@ -162,6 +162,8 @@ static int vmw_fb_set_par(struct fb_info *info) struct vmw_private *vmw_priv = par->vmw_priv; int ret; + info->fix.line_length = info->var.xres * info->var.bits_per_pixel/8; + ret = vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres, info->fix.line_length, par->bpp, par->depth); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index c62d20e..ee742f1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1049,6 +1049,8 @@ int vmw_event_fence_action_create(struct drm_file *file_priv, if (ret != 0) goto out_no_queue; + return 0; + out_no_queue: event->base.destroy(&event->base); out_no_event: @@ -1123,17 +1125,10 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, BUG_ON(fence == NULL); - if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME) - ret = vmw_event_fence_action_create(file_priv, fence, - arg->flags, - arg->user_data, - true); - else - ret = vmw_event_fence_action_create(file_priv, fence, - arg->flags, - arg->user_data, - true); - + ret = vmw_event_fence_action_create(file_priv, fence, + arg->flags, + arg->user_data, + true); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) DRM_ERROR("Failed to attach event to fence.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index 3eb1486..8966493 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -163,8 +163,9 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) mutex_lock(&dev_priv->hw_mutex); + vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC); while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0) - vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC); + ; dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index fc43c06..dab6fab 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1939,6 +1939,14 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }; int i; + u32 assumed_bpp = 2; + + /* + * If using screen objects, then assume 32-bpp because that's what the + * SVGA device is assuming + */ + if (dev_priv->sou_priv) + assumed_bpp = 4; /* Add preferred mode */ { @@ -1949,8 +1957,9 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector, mode->vdisplay = du->pref_height; vmw_guess_mode_timing(mode); - if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2, - mode->vdisplay)) { + if (vmw_kms_validate_mode_vram(dev_priv, + mode->hdisplay * assumed_bpp, + mode->vdisplay)) { drm_mode_probed_add(connector, mode); } else { drm_mode_destroy(dev, mode); @@ -1972,7 +1981,8 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector, bmode->vdisplay > max_height) continue; - if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2, + if (!vmw_kms_validate_mode_vram(dev_priv, + bmode->hdisplay * assumed_bpp, bmode->vdisplay)) continue; diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index b592eef..b083509 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -48,7 +48,7 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) unsigned long reg; int i, id; - for (i = 0; i <= BIT_WORD(host->info->nb_pts); i++) { + for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); for_each_set_bit(id, ®, BITS_PER_LONG) { @@ -65,7 +65,7 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) { u32 i; - for (i = 0; i <= BIT_WORD(host->info->nb_pts); ++i) { + for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) { host1x_sync_writel(host, 0xffffffffu, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); host1x_sync_writel(host, 0xffffffffu, diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index ec0ae2d..37ac7b5 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -623,7 +623,8 @@ static int vga_switcheroo_runtime_suspend(struct device *dev) ret = dev->bus->pm->runtime_suspend(dev); if (ret) return ret; - + if (vgasr_priv.handler->switchto) + vgasr_priv.handler->switchto(VGA_SWITCHEROO_IGD); vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_OFF); return 0; } @@ -659,6 +660,12 @@ int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain * } EXPORT_SYMBOL(vga_switcheroo_init_domain_pm_ops); +void vga_switcheroo_fini_domain_pm_ops(struct device *dev) +{ + dev->pm_domain = NULL; +} +EXPORT_SYMBOL(vga_switcheroo_fini_domain_pm_ops); + static int vga_switcheroo_runtime_resume_hdmi_audio(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); diff --git a/drivers/hid/hid-cherry.c b/drivers/hid/hid-cherry.c index 1bdcccc..f745d2c 100644 --- a/drivers/hid/hid-cherry.c +++ b/drivers/hid/hid-cherry.c @@ -28,7 +28,7 @@ static __u8 *ch_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 17 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { + if (*rsize >= 18 && rdesc[11] == 0x3c && rdesc[12] == 0x02) { hid_info(hdev, "fixing up Cherry Cymotion report descriptor\n"); rdesc[11] = rdesc[16] = 0xff; rdesc[12] = rdesc[17] = 0x03; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index aedfe50..62d7326 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -718,6 +718,9 @@ static int hid_scan_main(struct hid_parser *parser, struct hid_item *item) case HID_MAIN_ITEM_TAG_END_COLLECTION: break; case HID_MAIN_ITEM_TAG_INPUT: + /* ignore constant inputs, they will be ignored by hid-input */ + if (data & HID_MAIN_ITEM_CONSTANT) + break; for (i = 0; i < parser->local.usage_index; i++) hid_scan_input_usage(parser, parser->local.usage[i]); break; @@ -839,7 +842,17 @@ struct hid_report *hid_validate_values(struct hid_device *hid, * ->numbered being checked, which may not always be the case when * drivers go to access report values. */ - report = hid->report_enum[type].report_id_hash[id]; + if (id == 0) { + /* + * Validating on id 0 means we should examine the first + * report in the list. + */ + report = list_entry( + hid->report_enum[type].report_list.next, + struct hid_report, list); + } else { + report = hid->report_enum[type].report_id_hash[id]; + } if (!report) { hid_err(hid, "missing %s %u\n", hid_report_names[type], id); return NULL; @@ -1730,6 +1743,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) }, @@ -1822,8 +1836,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_PS1000) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_IR_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_SAMSUNG, USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE) }, - { HID_USB_DEVICE(USB_VENDOR_ID_SIS2_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH) }, - { HID_USB_DEVICE(USB_VENDOR_ID_SIS2_TOUCH, USB_DEVICE_ID_SIS817_TOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS817_TOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS1030_TOUCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_SKYCABLE, USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_BUZZ_CONTROLLER) }, { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index aeeea79..60348ec 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -288,6 +288,11 @@ #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_73F7 0x73f7 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001 0xa001 +#define USB_VENDOR_ID_ELAN 0x04f3 +#define USB_DEVICE_ID_ELAN_TOUCHSCREEN 0x0089 +#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B 0x009b +#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F 0x016f + #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 @@ -312,6 +317,7 @@ #define USB_VENDOR_ID_ETURBOTOUCH 0x22b9 #define USB_DEVICE_ID_ETURBOTOUCH 0x0006 +#define USB_DEVICE_ID_ETURBOTOUCH_2968 0x2968 #define USB_VENDOR_ID_EZKEY 0x0518 #define USB_DEVICE_ID_BTC_8193 0x0002 @@ -457,6 +463,7 @@ #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067 0xa067 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A072 0xa072 #define USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081 0xa081 +#define USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096 0xa096 #define USB_VENDOR_ID_IMATION 0x0718 #define USB_DEVICE_ID_DISC_STAKKA 0xd000 @@ -494,6 +501,7 @@ #define USB_DEVICE_ID_KYE_GPEN_560 0x5003 #define USB_DEVICE_ID_KYE_EASYPEN_I405X 0x5010 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 +#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 #define USB_VENDOR_ID_LABTEC 0x1020 @@ -636,6 +644,7 @@ #define USB_VENDOR_ID_NEXIO 0x1870 #define USB_DEVICE_ID_NEXIO_MULTITOUCH_420 0x010d +#define USB_DEVICE_ID_NEXIO_MULTITOUCH_PTI0750 0x0110 #define USB_VENDOR_ID_NEXTWINDOW 0x1926 #define USB_DEVICE_ID_NEXTWINDOW_TOUCHSCREEN 0x0003 @@ -689,6 +698,8 @@ #define USB_VENDOR_ID_PENMOUNT 0x14e1 #define USB_DEVICE_ID_PENMOUNT_PCI 0x3500 +#define USB_DEVICE_ID_PENMOUNT_1610 0x1610 +#define USB_DEVICE_ID_PENMOUNT_1640 0x1640 #define USB_VENDOR_ID_PETALYNX 0x18b1 #define USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE 0x0037 @@ -744,6 +755,9 @@ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE 0x0600 +#define USB_VENDOR_ID_SEMICO 0x1a2c +#define USB_DEVICE_ID_SEMICO_USB_KEYKOARD 0x0023 + #define USB_VENDOR_ID_SENNHEISER 0x1395 #define USB_DEVICE_ID_SENNHEISER_BTD500USB 0x002c @@ -753,9 +767,11 @@ #define USB_VENDOR_ID_SIGMATEL 0x066F #define USB_DEVICE_ID_SIGMATEL_STMP3780 0x3780 -#define USB_VENDOR_ID_SIS2_TOUCH 0x0457 +#define USB_VENDOR_ID_SIS_TOUCH 0x0457 #define USB_DEVICE_ID_SIS9200_TOUCH 0x9200 #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 +#define USB_DEVICE_ID_SIS_TS 0x1013 +#define USB_DEVICE_ID_SIS1030_TOUCH 0x1030 #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 @@ -807,6 +823,9 @@ #define USB_DEVICE_ID_SYNAPTICS_DPAD 0x0013 #define USB_DEVICE_ID_SYNAPTICS_LTS1 0x0af8 #define USB_DEVICE_ID_SYNAPTICS_LTS2 0x1d10 +#define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3 +#define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3 +#define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 #define USB_VENDOR_ID_THINGM 0x27b8 #define USB_DEVICE_ID_BLINK1 0x01ed @@ -937,7 +956,5 @@ #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 -#define USB_VENDOR_ID_SIS 0x0457 -#define USB_DEVICE_ID_SIS_TS 0x1013 #endif diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d97f232..6f568b6 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -312,6 +312,9 @@ static const struct hid_device_id hid_battery_quirks[] = { USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO), + HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, {} diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index d645caa..a4beb99 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -300,7 +300,7 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, * - change the button usage range to 4-7 for the extra * buttons */ - if (*rsize >= 74 && + if (*rsize >= 75 && rdesc[61] == 0x05 && rdesc[62] == 0x08 && rdesc[63] == 0x19 && rdesc[64] == 0x08 && rdesc[65] == 0x29 && rdesc[66] == 0x0f && @@ -323,6 +323,7 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, } break; case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: if (*rsize == MOUSEPEN_I608X_RDESC_ORIG_SIZE) { rdesc = mousepen_i608x_rdesc_fixed; *rsize = sizeof(mousepen_i608x_rdesc_fixed); @@ -415,6 +416,7 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) switch (id->product) { case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: case USB_DEVICE_ID_KYE_EASYPEN_M610X: ret = kye_tablet_enable(hdev); if (ret) { @@ -438,6 +440,8 @@ static const struct hid_device_id kye_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index 06eb45f..12fc48c 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -345,14 +345,14 @@ static __u8 *lg_report_fixup(struct hid_device *hdev, __u8 *rdesc, struct usb_device_descriptor *udesc; __u16 bcdDevice, rev_maj, rev_min; - if ((drv_data->quirks & LG_RDESC) && *rsize >= 90 && rdesc[83] == 0x26 && + if ((drv_data->quirks & LG_RDESC) && *rsize >= 91 && rdesc[83] == 0x26 && rdesc[84] == 0x8c && rdesc[85] == 0x02) { hid_info(hdev, "fixing up Logitech keyboard report descriptor\n"); rdesc[84] = rdesc[89] = 0x4d; rdesc[85] = rdesc[90] = 0x10; } - if ((drv_data->quirks & LG_RDESC_REL_ABS) && *rsize >= 50 && + if ((drv_data->quirks & LG_RDESC_REL_ABS) && *rsize >= 51 && rdesc[32] == 0x81 && rdesc[33] == 0x06 && rdesc[49] == 0x81 && rdesc[50] == 0x06) { hid_info(hdev, diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 2e53024..b43d363 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -237,13 +237,6 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev, return; } - if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) || - (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) { - dev_err(&djrcv_hdev->dev, "%s: invalid device index:%d\n", - __func__, dj_report->device_index); - return; - } - if (djrcv_dev->paired_dj_devices[dj_report->device_index]) { /* The device is already known. No need to reallocate it. */ dbg_hid("%s: device is already known\n", __func__); @@ -516,6 +509,14 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev, dj_report->report_params[CMD_SWITCH_PARAM_TIMEOUT_SECONDS] = (u8)timeout; retval = logi_dj_recv_send_report(djrcv_dev, dj_report); kfree(dj_report); + + /* + * Ugly sleep to work around a USB 3.0 bug when the receiver is still + * processing the "switch-to-dj" command while we send an other command. + * 50 msec should gives enough time to the receiver to be ready. + */ + msleep(50); + return retval; } @@ -686,7 +687,6 @@ static int logi_dj_raw_event(struct hid_device *hdev, struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev); struct dj_report *dj_report = (struct dj_report *) data; unsigned long flags; - bool report_processed = false; dbg_hid("%s, size:%d\n", __func__, size); @@ -714,27 +714,41 @@ static int logi_dj_raw_event(struct hid_device *hdev, * anything else with it. */ + /* case 1) */ + if (data[0] != REPORT_ID_DJ_SHORT) + return false; + + if ((dj_report->device_index < DJ_DEVICE_INDEX_MIN) || + (dj_report->device_index > DJ_DEVICE_INDEX_MAX)) { + /* + * Device index is wrong, bail out. + * This driver can ignore safely the receiver notifications, + * so ignore those reports too. + */ + if (dj_report->device_index != DJ_RECEIVER_INDEX) + dev_err(&hdev->dev, "%s: invalid device index:%d\n", + __func__, dj_report->device_index); + return false; + } + spin_lock_irqsave(&djrcv_dev->lock, flags); - if (dj_report->report_id == REPORT_ID_DJ_SHORT) { - switch (dj_report->report_type) { - case REPORT_TYPE_NOTIF_DEVICE_PAIRED: - case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED: - logi_dj_recv_queue_notification(djrcv_dev, dj_report); - break; - case REPORT_TYPE_NOTIF_CONNECTION_STATUS: - if (dj_report->report_params[CONNECTION_STATUS_PARAM_STATUS] == - STATUS_LINKLOSS) { - logi_dj_recv_forward_null_report(djrcv_dev, dj_report); - } - break; - default: - logi_dj_recv_forward_report(djrcv_dev, dj_report); + switch (dj_report->report_type) { + case REPORT_TYPE_NOTIF_DEVICE_PAIRED: + case REPORT_TYPE_NOTIF_DEVICE_UNPAIRED: + logi_dj_recv_queue_notification(djrcv_dev, dj_report); + break; + case REPORT_TYPE_NOTIF_CONNECTION_STATUS: + if (dj_report->report_params[CONNECTION_STATUS_PARAM_STATUS] == + STATUS_LINKLOSS) { + logi_dj_recv_forward_null_report(djrcv_dev, dj_report); } - report_processed = true; + break; + default: + logi_dj_recv_forward_report(djrcv_dev, dj_report); } spin_unlock_irqrestore(&djrcv_dev->lock, flags); - return report_processed; + return true; } static int logi_dj_probe(struct hid_device *hdev, diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h index 4a40003..daeb0aa 100644 --- a/drivers/hid/hid-logitech-dj.h +++ b/drivers/hid/hid-logitech-dj.h @@ -27,6 +27,7 @@ #define DJ_MAX_PAIRED_DEVICES 6 #define DJ_MAX_NUMBER_NOTIFICATIONS 8 +#define DJ_RECEIVER_INDEX 0 #define DJ_DEVICE_INDEX_MIN 1 #define DJ_DEVICE_INDEX_MAX 6 diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 3b43d1c..991ba79 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -290,6 +290,11 @@ static int magicmouse_raw_event(struct hid_device *hdev, if (size < 4 || ((size - 4) % 9) != 0) return 0; npoints = (size - 4) / 9; + if (npoints > 15) { + hid_warn(hdev, "invalid size value (%d) for TRACKPAD_REPORT_ID\n", + size); + return 0; + } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 9 + 4); @@ -307,6 +312,11 @@ static int magicmouse_raw_event(struct hid_device *hdev, if (size < 6 || ((size - 6) % 8) != 0) return 0; npoints = (size - 6) / 8; + if (npoints > 15) { + hid_warn(hdev, "invalid size value (%d) for MOUSE_REPORT_ID\n", + size); + return 0; + } msc->ntouches = 0; for (ii = 0; ii < npoints; ii++) magicmouse_emit_touch(msc, ii, data + ii * 8 + 6); diff --git a/drivers/hid/hid-monterey.c b/drivers/hid/hid-monterey.c index 9e14c00..25daf28 100644 --- a/drivers/hid/hid-monterey.c +++ b/drivers/hid/hid-monterey.c @@ -24,7 +24,7 @@ static __u8 *mr_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 30 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { + if (*rsize >= 31 && rdesc[29] == 0x05 && rdesc[30] == 0x09) { hid_info(hdev, "fixing up button/consumer in HID report descriptor\n"); rdesc[30] = 0x0c; } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d83b1e8..f134d73 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1301,11 +1301,14 @@ static const struct hid_device_id mt_devices[] = { /* SiS panels */ { .driver_data = MT_CLS_DEFAULT, - HID_USB_DEVICE(USB_VENDOR_ID_SIS2_TOUCH, + HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH) }, { .driver_data = MT_CLS_DEFAULT, - HID_USB_DEVICE(USB_VENDOR_ID_SIS2_TOUCH, + HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS817_TOUCH) }, + { .driver_data = MT_CLS_DEFAULT, + HID_USB_DEVICE(USB_VENDOR_ID_SIS_TOUCH, + USB_DEVICE_ID_SIS1030_TOUCH) }, /* Stantum panels */ { .driver_data = MT_CLS_CONFIDENCE, diff --git a/drivers/hid/hid-petalynx.c b/drivers/hid/hid-petalynx.c index 736b250..6aca4f2 100644 --- a/drivers/hid/hid-petalynx.c +++ b/drivers/hid/hid-petalynx.c @@ -25,7 +25,7 @@ static __u8 *pl_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 60 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && + if (*rsize >= 62 && rdesc[39] == 0x2a && rdesc[40] == 0xf5 && rdesc[41] == 0x00 && rdesc[59] == 0x26 && rdesc[60] == 0xf9 && rdesc[61] == 0x00) { hid_info(hdev, "fixing up Petalynx Maxter Remote report descriptor\n"); diff --git a/drivers/hid/hid-picolcd_core.c b/drivers/hid/hid-picolcd_core.c index acbb0210..020df3c 100644 --- a/drivers/hid/hid-picolcd_core.c +++ b/drivers/hid/hid-picolcd_core.c @@ -350,6 +350,12 @@ static int picolcd_raw_event(struct hid_device *hdev, if (!data) return 1; + if (size > 64) { + hid_warn(hdev, "invalid size value (%d) for picolcd raw event\n", + size); + return 0; + } + if (report->id == REPORT_KEY_STATE) { if (data->input_keys) ret = picolcd_raw_keypad(data, report, raw_data+1, size-1); diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 1a07e07..47d7e74 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -35,6 +35,8 @@ static struct class *pyra_class; static void profile_activated(struct pyra_device *pyra, unsigned int new_profile) { + if (new_profile >= ARRAY_SIZE(pyra->profile_settings)) + return; pyra->actual_profile = new_profile; pyra->actual_cpi = pyra->profile_settings[pyra->actual_profile].y_cpi; } @@ -257,9 +259,11 @@ static ssize_t pyra_sysfs_write_settings(struct file *fp, if (off != 0 || count != PYRA_SIZE_SETTINGS) return -EINVAL; - mutex_lock(&pyra->pyra_lock); - settings = (struct pyra_settings const *)buf; + if (settings->startup_profile >= ARRAY_SIZE(pyra->profile_settings)) + return -EINVAL; + + mutex_lock(&pyra->pyra_lock); retval = pyra_set_settings(usb_dev, settings); if (retval) { diff --git a/drivers/hid/hid-sunplus.c b/drivers/hid/hid-sunplus.c index 87fc91e..91072fa 100644 --- a/drivers/hid/hid-sunplus.c +++ b/drivers/hid/hid-sunplus.c @@ -24,7 +24,7 @@ static __u8 *sp_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 107 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && + if (*rsize >= 112 && rdesc[104] == 0x26 && rdesc[105] == 0x80 && rdesc[106] == 0x03) { hid_info(hdev, "fixing up Sunplus Wireless Desktop report descriptor\n"); rdesc[105] = rdesc[110] = 0x03; diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index c133619..e29d8a0 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -136,6 +136,7 @@ struct i2c_hid { * descriptor. */ unsigned int bufsize; /* i2c buffer size */ char *inbuf; /* Input buffer */ + char *rawbuf; /* Raw Input buffer */ char *cmdbuf; /* Command buffer */ char *argsbuf; /* Command arguments buffer */ @@ -355,7 +356,7 @@ static int i2c_hid_hwreset(struct i2c_client *client) static void i2c_hid_get_input(struct i2c_hid *ihid) { int ret, ret_size; - int size = le16_to_cpu(ihid->hdesc.wMaxInputLength); + int size = ihid->bufsize; ret = i2c_master_recv(ihid->client, ihid->inbuf, size); if (ret != size) { @@ -486,9 +487,11 @@ static void i2c_hid_find_max_report(struct hid_device *hid, unsigned int type, static void i2c_hid_free_buffers(struct i2c_hid *ihid) { kfree(ihid->inbuf); + kfree(ihid->rawbuf); kfree(ihid->argsbuf); kfree(ihid->cmdbuf); ihid->inbuf = NULL; + ihid->rawbuf = NULL; ihid->cmdbuf = NULL; ihid->argsbuf = NULL; ihid->bufsize = 0; @@ -504,10 +507,11 @@ static int i2c_hid_alloc_buffers(struct i2c_hid *ihid, size_t report_size) report_size; /* report */ ihid->inbuf = kzalloc(report_size, GFP_KERNEL); + ihid->rawbuf = kzalloc(report_size, GFP_KERNEL); ihid->argsbuf = kzalloc(args_len, GFP_KERNEL); ihid->cmdbuf = kzalloc(sizeof(union command) + args_len, GFP_KERNEL); - if (!ihid->inbuf || !ihid->argsbuf || !ihid->cmdbuf) { + if (!ihid->inbuf || !ihid->rawbuf || !ihid->argsbuf || !ihid->cmdbuf) { i2c_hid_free_buffers(ihid); return -ENOMEM; } @@ -534,12 +538,12 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, ret = i2c_hid_get_report(client, report_type == HID_FEATURE_REPORT ? 0x03 : 0x01, - report_number, ihid->inbuf, ask_count); + report_number, ihid->rawbuf, ask_count); if (ret < 0) return ret; - ret_count = ihid->inbuf[0] | (ihid->inbuf[1] << 8); + ret_count = ihid->rawbuf[0] | (ihid->rawbuf[1] << 8); if (ret_count <= 2) return 0; @@ -548,7 +552,7 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, /* The query buffer contains the size, dropping it in the reply */ count = min(count, ret_count - 2); - memcpy(buf, ihid->inbuf + 2, count); + memcpy(buf, ihid->rawbuf + 2, count); return count; } diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 44df131..bcc3193 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -82,7 +82,7 @@ static int hid_start_in(struct hid_device *hid) struct usbhid_device *usbhid = hid->driver_data; spin_lock_irqsave(&usbhid->lock, flags); - if (hid->open > 0 && + if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) && !test_bit(HID_DISCONNECTED, &usbhid->iofl) && !test_bit(HID_SUSPENDED, &usbhid->iofl) && !test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) { @@ -292,6 +292,8 @@ static void hid_irq_in(struct urb *urb) case 0: /* success */ usbhid_mark_busy(usbhid); usbhid->retry_delay = 0; + if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open) + break; hid_input_report(urb->context, HID_INPUT_REPORT, urb->transfer_buffer, urb->actual_length, 1); @@ -536,7 +538,8 @@ static void __usbhid_submit_report(struct hid_device *hid, struct hid_report *re int head; struct usbhid_device *usbhid = hid->driver_data; - if ((hid->quirks & HID_QUIRK_NOGET) && dir == USB_DIR_IN) + if (((hid->quirks & HID_QUIRK_NOGET) && dir == USB_DIR_IN) || + test_bit(HID_DISCONNECTED, &usbhid->iofl)) return; if (usbhid->urbout && dir == USB_DIR_OUT && report->type == HID_OUTPUT_REPORT) { @@ -734,8 +737,10 @@ void usbhid_close(struct hid_device *hid) if (!--hid->open) { spin_unlock_irq(&usbhid->lock); hid_cancel_delayed_stuff(usbhid); - usb_kill_urb(usbhid->urbin); - usbhid->intf->needs_remote_wakeup = 0; + if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) { + usb_kill_urb(usbhid->urbin); + usbhid->intf->needs_remote_wakeup = 0; + } } else { spin_unlock_irq(&usbhid->lock); } @@ -1119,6 +1124,19 @@ static int usbhid_start(struct hid_device *hid) set_bit(HID_STARTED, &usbhid->iofl); + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) { + ret = usb_autopm_get_interface(usbhid->intf); + if (ret) + goto fail; + usbhid->intf->needs_remote_wakeup = 1; + ret = hid_start_in(hid); + if (ret) { + dev_err(&hid->dev, + "failed to start in urb: %d\n", ret); + } + usb_autopm_put_interface(usbhid->intf); + } + /* Some keyboards don't work until their LEDs have been set. * Since BIOSes do set the LEDs, it must be safe for any device * that supports the keyboard boot protocol. @@ -1151,6 +1169,9 @@ static void usbhid_stop(struct hid_device *hid) if (WARN_ON(!usbhid)) return; + if (hid->quirks & HID_QUIRK_ALWAYS_POLL) + usbhid->intf->needs_remote_wakeup = 0; + clear_bit(HID_STARTED, &usbhid->iofl); spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); @@ -1338,6 +1359,9 @@ static void usbhid_disconnect(struct usb_interface *intf) return; usbhid = hid->driver_data; + spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ + set_bit(HID_DISCONNECTED, &usbhid->iofl); + spin_unlock_irq(&usbhid->lock); hid_destroy_device(hid); kfree(usbhid); } diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0db9a67..3554496 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -49,6 +49,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_EMS, USB_DEVICE_ID_EMS_TRIO_LINKER_PLUS_II, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH_2968, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT }, @@ -69,12 +70,18 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK, HID_QUIRK_NOGET }, { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B, HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GX680R_LED_PANEL, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_NEXIO, USB_DEVICE_ID_NEXIO_MULTITOUCH_PTI0750, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_NOVATEK, USB_DEVICE_ID_NOVATEK_MOUSE, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_1610, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_1640, HID_QUIRK_NOGET }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN1, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN2, HID_QUIRK_NO_INIT_REPORTS }, @@ -84,8 +91,10 @@ static const struct hid_blacklist { { USB_VENDOR_ID_REALTEK, USB_DEVICE_ID_REALTEK_READER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SIGMATEL, USB_DEVICE_ID_SIGMATEL_STMP3780, HID_QUIRK_NOGET }, - { USB_VENDOR_ID_SIS2_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH, HID_QUIRK_NOGET }, - { USB_VENDOR_ID_SIS2_TOUCH, USB_DEVICE_ID_SIS817_TOUCH, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS9200_TOUCH, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS817_TOUCH, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS_TS, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS1030_TOUCH, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SUN, USB_DEVICE_ID_RARITAN_KVM_DONGLE, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_1, HID_QUIRK_NOGET }, { USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_2, HID_QUIRK_NOGET }, @@ -110,11 +119,16 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS1, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS2, HID_QUIRK_NO_INIT_REPORTS }, - { USB_VENDOR_ID_SIS, USB_DEVICE_ID_SIS_TS, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_HD, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_QUAD_HD, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_TP_V103, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096, HID_QUIRK_NO_INIT_INPUT_REPORTS }, { 0, 0 } }; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 6de6c98..dea6613 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -208,8 +208,10 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, ret = vmbus_post_msg(open_msg, sizeof(struct vmbus_channel_open_channel)); - if (ret != 0) + if (ret != 0) { + err = ret; goto error1; + } t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ); if (t == 0) { @@ -404,7 +406,6 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, u32 next_gpadl_handle; unsigned long flags; int ret = 0; - int t; next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); atomic_inc(&vmbus_connection.next_gpadl_handle); @@ -451,9 +452,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } } - t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ); - BUG_ON(t == 0); - + wait_for_completion(&msginfo->waitevent); /* At this point, we received the gpadl created msg */ *gpadl_handle = gpadlmsg->gpadl; @@ -476,7 +475,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) struct vmbus_channel_gpadl_teardown *msg; struct vmbus_channel_msginfo *info; unsigned long flags; - int ret, t; + int ret; info = kmalloc(sizeof(*info) + sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL); @@ -498,11 +497,12 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown)); - BUG_ON(ret != 0); - t = wait_for_completion_timeout(&info->waitevent, 5*HZ); - BUG_ON(t == 0); + if (ret) + goto post_msg_err; + + wait_for_completion(&info->waitevent); - /* Received a torndown response */ +post_msg_err: spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); @@ -512,7 +512,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) } EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl); -static void vmbus_close_internal(struct vmbus_channel *channel) +static int vmbus_close_internal(struct vmbus_channel *channel) { struct vmbus_channel_close_channel *msg; int ret; @@ -534,11 +534,28 @@ static void vmbus_close_internal(struct vmbus_channel *channel) ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel)); - BUG_ON(ret != 0); + if (ret) { + pr_err("Close failed: close post msg return is %d\n", ret); + /* + * If we failed to post the close msg, + * it is perhaps better to leak memory. + */ + return ret; + } + /* Tear down the gpadl for the channel's ring buffer */ - if (channel->ringbuffer_gpadlhandle) - vmbus_teardown_gpadl(channel, - channel->ringbuffer_gpadlhandle); + if (channel->ringbuffer_gpadlhandle) { + ret = vmbus_teardown_gpadl(channel, + channel->ringbuffer_gpadlhandle); + if (ret) { + pr_err("Close failed: teardown gpadl return %d\n", ret); + /* + * If we failed to teardown gpadl, + * it is perhaps better to leak memory. + */ + return ret; + } + } /* Cleanup the ring buffers for this channel */ hv_ringbuffer_cleanup(&channel->outbound); @@ -547,7 +564,7 @@ static void vmbus_close_internal(struct vmbus_channel *channel) free_pages((unsigned long)channel->ringbuffer_pages, get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); - + return ret; } /* diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index fa92046..505fe29 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -202,9 +202,16 @@ static void vmbus_process_rescind_offer(struct work_struct *work) unsigned long flags; struct vmbus_channel *primary_channel; struct vmbus_channel_relid_released msg; + struct device *dev; + + if (channel->device_obj) { + dev = get_device(&channel->device_obj->device); + if (dev) { + vmbus_device_unregister(channel->device_obj); + put_device(dev); + } + } - if (channel->device_obj) - vmbus_device_unregister(channel->device_obj); memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); msg.child_relid = channel->offermsg.child_relid; msg.header.msgtype = CHANNELMSG_RELID_RELEASED; diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 13c23a4..30688f6 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -55,6 +55,9 @@ static __u32 vmbus_get_next_version(__u32 current_version) case (VERSION_WIN8): return VERSION_WIN7; + case (VERSION_WIN8_1): + return VERSION_WIN8; + case (VERSION_WS2008): default: return VERSION_INVAL; @@ -79,6 +82,8 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, msg->monitor_page2 = virt_to_phys( (void *)((unsigned long)vmbus_connection.monitor_pages + PAGE_SIZE)); + if (version == VERSION_WIN8_1) + msg->target_vcpu = hv_context.vp_index[smp_processor_id()]; /* * Add to list before we send the request since we may @@ -315,9 +320,13 @@ static void process_chn_event(u32 relid) */ do { - hv_begin_read(&channel->inbound); + if (read_state) + hv_begin_read(&channel->inbound); channel->onchannel_callback(arg); - bytes_to_read = hv_end_read(&channel->inbound); + if (read_state) + bytes_to_read = hv_end_read(&channel->inbound); + else + bytes_to_read = 0; } while (read_state && (bytes_to_read != 0)); } else { pr_err("no channel callback for relid - %u\n", relid); @@ -400,10 +409,21 @@ int vmbus_post_msg(void *buffer, size_t buflen) * insufficient resources. Retry the operation a couple of * times before giving up. */ - while (retries < 3) { - ret = hv_post_message(conn_id, 1, buffer, buflen); - if (ret != HV_STATUS_INSUFFICIENT_BUFFERS) + while (retries < 10) { + ret = hv_post_message(conn_id, 1, buffer, buflen); + + switch (ret) { + case HV_STATUS_INSUFFICIENT_BUFFERS: + ret = -ENOMEM; + case -ENOMEM: + break; + case HV_STATUS_SUCCESS: return ret; + default: + pr_err("hv_post_msg() failed; error code:%d\n", ret); + return -EINVAL; + } + retries++; msleep(100); } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 7e17a54..393fd8a 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -19,6 +19,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> +#include <linux/jiffies.h> #include <linux/mman.h> #include <linux/delay.h> #include <linux/init.h> @@ -459,6 +460,11 @@ static bool do_hot_add; */ static uint pressure_report_delay = 45; +/* + * The last time we posted a pressure report to host. + */ +static unsigned long last_post_time; + module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); @@ -542,6 +548,7 @@ struct hv_dynmem_device { static struct hv_dynmem_device dm_device; +static void post_status(struct hv_dynmem_device *dm); #ifdef CONFIG_MEMORY_HOTPLUG static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) @@ -612,7 +619,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); - + post_status(&dm_device); } return; @@ -951,11 +958,17 @@ static void post_status(struct hv_dynmem_device *dm) { struct dm_status status; struct sysinfo val; + unsigned long now = jiffies; + unsigned long last_post = last_post_time; if (pressure_report_delay > 0) { --pressure_report_delay; return; } + + if (!time_after(now, (last_post_time + HZ))) + return; + si_meminfo(&val); memset(&status, 0, sizeof(struct dm_status)); status.hdr.type = DM_STATUS_REPORT; @@ -983,6 +996,14 @@ static void post_status(struct hv_dynmem_device *dm) if (status.hdr.trans_id != atomic_read(&trans_id)) return; + /* + * If the last post time that we sampled has changed, + * we have raced, don't post the status. + */ + if (last_post != last_post_time) + return; + + last_post_time = jiffies; vmbus_sendpacket(dm->dev->channel, &status, sizeof(struct dm_status), (unsigned long)NULL, @@ -1117,7 +1138,7 @@ static void balloon_up(struct work_struct *dummy) if (ret == -EAGAIN) msleep(20); - + post_status(&dm_device); } while (ret == -EAGAIN); if (ret) { @@ -1144,8 +1165,10 @@ static void balloon_down(struct hv_dynmem_device *dm, struct dm_unballoon_response resp; int i; - for (i = 0; i < range_count; i++) + for (i = 0; i < range_count; i++) { free_balloon_pages(dm, &range_array[i]); + post_status(&dm_device); + } if (req->more_pages == 1) return; diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index 09988b2..816782a 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -127,6 +127,15 @@ kvp_work_func(struct work_struct *dummy) kvp_respond_to_host(NULL, HV_E_FAIL); } +static void poll_channel(struct vmbus_channel *channel) +{ + unsigned long flags; + + spin_lock_irqsave(&channel->inbound_lock, flags); + hv_kvp_onchannelcallback(channel); + spin_unlock_irqrestore(&channel->inbound_lock, flags); +} + static int kvp_handle_handshake(struct hv_kvp_msg *msg) { int ret = 1; @@ -155,7 +164,7 @@ static int kvp_handle_handshake(struct hv_kvp_msg *msg) kvp_register(dm_reg_value); kvp_transaction.active = false; if (kvp_transaction.kvp_context) - hv_kvp_onchannelcallback(kvp_transaction.kvp_context); + poll_channel(kvp_transaction.kvp_context); } return ret; } @@ -568,6 +577,7 @@ response_done: vmbus_sendpacket(channel, recv_buffer, buf_len, req_id, VM_PKT_DATA_INBAND, 0); + poll_channel(channel); } @@ -603,7 +613,7 @@ void hv_kvp_onchannelcallback(void *context) return; } - vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen, + vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 4, &recvlen, &requestid); if (recvlen > 0) { diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 273e3dd..665b7da 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -312,7 +312,7 @@ static int util_probe(struct hv_device *dev, (struct hv_util_service *)dev_id->driver_data; int ret; - srv->recv_buffer = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); if (!srv->recv_buffer) return -ENOMEM; if (srv->util_init) { diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 4201c7e..da2107b 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -981,7 +981,7 @@ config SENSORS_NCT6775 config SENSORS_NTC_THERMISTOR tristate "NTC thermistor support" - depends on (!OF && !IIO) || (OF && IIO) + depends on !OF || IIO=n || IIO help This driver supports NTC thermistors sensor reading and its interpretation. The driver can also monitor the temperature and diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index 29dd9f7..233b374 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -185,7 +185,7 @@ static ssize_t set_temp_max(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct adm1021_data *data = i2c_get_clientdata(client); long temp; - int err; + int reg_val, err; err = kstrtol(buf, 10, &temp); if (err) @@ -193,10 +193,11 @@ static ssize_t set_temp_max(struct device *dev, temp /= 1000; mutex_lock(&data->update_lock); - data->temp_max[index] = clamp_val(temp, -128, 127); + reg_val = clamp_val(temp, -128, 127); + data->temp_max[index] = reg_val * 1000; if (!read_only) i2c_smbus_write_byte_data(client, ADM1021_REG_TOS_W(index), - data->temp_max[index]); + reg_val); mutex_unlock(&data->update_lock); return count; @@ -210,7 +211,7 @@ static ssize_t set_temp_min(struct device *dev, struct i2c_client *client = to_i2c_client(dev); struct adm1021_data *data = i2c_get_clientdata(client); long temp; - int err; + int reg_val, err; err = kstrtol(buf, 10, &temp); if (err) @@ -218,10 +219,11 @@ static ssize_t set_temp_min(struct device *dev, temp /= 1000; mutex_lock(&data->update_lock); - data->temp_min[index] = clamp_val(temp, -128, 127); + reg_val = clamp_val(temp, -128, 127); + data->temp_min[index] = reg_val * 1000; if (!read_only) i2c_smbus_write_byte_data(client, ADM1021_REG_THYST_W(index), - data->temp_min[index]); + reg_val); mutex_unlock(&data->update_lock); return count; diff --git a/drivers/hwmon/adm1029.c b/drivers/hwmon/adm1029.c index 9ee5e06..39441e5 100644 --- a/drivers/hwmon/adm1029.c +++ b/drivers/hwmon/adm1029.c @@ -232,6 +232,9 @@ static ssize_t set_fan_div(struct device *dev, /* Update the value */ reg = (reg & 0x3F) | (val << 6); + /* Update the cache */ + data->fan_div[attr->index] = reg; + /* Write value */ i2c_smbus_write_byte_data(client, ADM1029_REG_FAN_DIV[attr->index], reg); diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 253ea39..bdceca0 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -365,6 +365,7 @@ set_auto_temp_min(struct device *dev, struct device_attribute *attr, if (ret) return ret; + val = clamp_val(val, 0, 127000); mutex_lock(&data->update_lock); data->auto_temp[nr] = AUTO_TEMP_MIN_TO_REG(val, data->auto_temp[nr]); adm1031_write_value(client, ADM1031_REG_AUTO_TEMP(nr), @@ -394,6 +395,7 @@ set_auto_temp_max(struct device *dev, struct device_attribute *attr, if (ret) return ret; + val = clamp_val(val, 0, 127000); mutex_lock(&data->update_lock); data->temp_max[nr] = AUTO_TEMP_MAX_TO_REG(val, data->auto_temp[nr], data->pwm[nr]); @@ -696,7 +698,7 @@ static ssize_t set_temp_min(struct device *dev, struct device_attribute *attr, if (ret) return ret; - val = clamp_val(val, -55000, nr == 0 ? 127750 : 127875); + val = clamp_val(val, -55000, 127000); mutex_lock(&data->update_lock); data->temp_min[nr] = TEMP_TO_REG(val); adm1031_write_value(client, ADM1031_REG_TEMP_MIN(nr), @@ -717,7 +719,7 @@ static ssize_t set_temp_max(struct device *dev, struct device_attribute *attr, if (ret) return ret; - val = clamp_val(val, -55000, nr == 0 ? 127750 : 127875); + val = clamp_val(val, -55000, 127000); mutex_lock(&data->update_lock); data->temp_max[nr] = TEMP_TO_REG(val); adm1031_write_value(client, ADM1031_REG_TEMP_MAX(nr), @@ -738,7 +740,7 @@ static ssize_t set_temp_crit(struct device *dev, struct device_attribute *attr, if (ret) return ret; - val = clamp_val(val, -55000, nr == 0 ? 127750 : 127875); + val = clamp_val(val, -55000, 127000); mutex_lock(&data->update_lock); data->temp_crit[nr] = TEMP_TO_REG(val); adm1031_write_value(client, ADM1031_REG_TEMP_CRIT(nr), diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index 7f9dc2f..1265164 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -198,7 +198,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client) } channel = be32_to_cpup(property); - if (channel > ADS1015_CHANNELS) { + if (channel >= ADS1015_CHANNELS) { dev_err(&client->dev, "invalid channel index %d on %s\n", channel, node->full_name); @@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client) dev_err(&client->dev, "invalid gain on %s\n", node->full_name); + return -EINVAL; } } @@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client) dev_err(&client->dev, "invalid data_rate on %s\n", node->full_name); + return -EINVAL; } } diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index 0f4dea5..9ee3913 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -515,7 +515,7 @@ static ssize_t set_temp_min(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->temp_min[attr->index] = temp; @@ -549,7 +549,7 @@ static ssize_t set_temp_max(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->temp_max[attr->index] = temp; @@ -826,7 +826,7 @@ static ssize_t set_pwm_tmin(struct device *dev, return -EINVAL; temp = DIV_ROUND_CLOSEST(temp, 1000); - temp = clamp_val(temp, 0, 255); + temp = clamp_val(temp, -128, 127); mutex_lock(&data->lock); data->pwm_tmin[attr->index] = temp; diff --git a/drivers/hwmon/amc6821.c b/drivers/hwmon/amc6821.c index eea8172..8a67ec6 100644 --- a/drivers/hwmon/amc6821.c +++ b/drivers/hwmon/amc6821.c @@ -360,11 +360,13 @@ static ssize_t set_pwm1_enable( if (config) return config; + mutex_lock(&data->update_lock); config = i2c_smbus_read_byte_data(client, AMC6821_REG_CONF1); if (config < 0) { dev_err(&client->dev, "Error reading configuration register, aborting.\n"); - return config; + count = config; + goto unlock; } switch (val) { @@ -381,14 +383,15 @@ static ssize_t set_pwm1_enable( config |= AMC6821_CONF1_FDRC1; break; default: - return -EINVAL; + count = -EINVAL; + goto unlock; } - mutex_lock(&data->update_lock); if (i2c_smbus_write_byte_data(client, AMC6821_REG_CONF1, config)) { dev_err(&client->dev, "Configuration register write error, aborting.\n"); count = -EIO; } +unlock: mutex_unlock(&data->update_lock); return count; } @@ -493,8 +496,9 @@ static ssize_t set_temp_auto_point_temp( return -EINVAL; } - data->valid = 0; mutex_lock(&data->update_lock); + data->valid = 0; + switch (ix) { case 0: ptemp[0] = clamp_val(val / 1000, 0, @@ -658,13 +662,14 @@ static ssize_t set_fan1_div( if (config) return config; + mutex_lock(&data->update_lock); config = i2c_smbus_read_byte_data(client, AMC6821_REG_CONF4); if (config < 0) { dev_err(&client->dev, "Error reading configuration register, aborting.\n"); - return config; + count = config; + goto EXIT; } - mutex_lock(&data->update_lock); switch (val) { case 2: config &= ~AMC6821_CONF4_PSPR; @@ -704,7 +709,7 @@ static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, get_temp_alarm, NULL, IDX_TEMP1_MAX); static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, get_temp_alarm, NULL, IDX_TEMP1_CRIT); -static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO | S_IWUSR, +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, get_temp, NULL, IDX_TEMP2_INPUT); static SENSOR_DEVICE_ATTR(temp2_min, S_IRUGO | S_IWUSR, get_temp, set_temp, IDX_TEMP2_MIN); diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c index 960fac3..48044b0 100644 --- a/drivers/hwmon/da9052-hwmon.c +++ b/drivers/hwmon/da9052-hwmon.c @@ -194,7 +194,7 @@ static ssize_t da9052_hwmon_show_name(struct device *dev, struct device_attribute *devattr, char *buf) { - return sprintf(buf, "da9052-hwmon\n"); + return sprintf(buf, "da9052\n"); } static ssize_t show_label(struct device *dev, diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c index 029ecab..1b275a2 100644 --- a/drivers/hwmon/da9055-hwmon.c +++ b/drivers/hwmon/da9055-hwmon.c @@ -204,7 +204,7 @@ static ssize_t da9055_hwmon_show_name(struct device *dev, struct device_attribute *devattr, char *buf) { - return sprintf(buf, "da9055-hwmon\n"); + return sprintf(buf, "da9055\n"); } static ssize_t show_label(struct device *dev, diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c index 4ae3fff..bea0a34 100644 --- a/drivers/hwmon/dme1737.c +++ b/drivers/hwmon/dme1737.c @@ -247,8 +247,8 @@ struct dme1737_data { u8 pwm_acz[3]; u8 pwm_freq[6]; u8 pwm_rr[2]; - u8 zone_low[3]; - u8 zone_abs[3]; + s8 zone_low[3]; + s8 zone_abs[3]; u8 zone_hyst[2]; u32 alarms; }; @@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res) return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2)); } -static inline int IN_TO_REG(int val, int nominal) +static inline int IN_TO_REG(long val, int nominal) { return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255); } @@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res) return (reg * 1000) >> (res - 8); } -static inline int TEMP_TO_REG(int val) +static inline int TEMP_TO_REG(long val) { return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127); } @@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg) return TEMP_RANGE[(reg >> 4) & 0x0f]; } -static int TEMP_RANGE_TO_REG(int val, int reg) +static int TEMP_RANGE_TO_REG(long val, int reg) { int i; @@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix) return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000; } -static inline int TEMP_HYST_TO_REG(int val, int ix, int reg) +static inline int TEMP_HYST_TO_REG(long val, int ix, int reg) { int hyst = clamp_val((val + 500) / 1000, 0, 15); @@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc) return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg; } -static inline int FAN_TO_REG(int val, int tpc) +static inline int FAN_TO_REG(long val, int tpc) { if (tpc) { return clamp_val(val / tpc, 0, 0xffff); @@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg) return (edge > 0) ? 1 << (edge - 1) : 0; } -static inline int FAN_TYPE_TO_REG(int val, int reg) +static inline int FAN_TYPE_TO_REG(long val, int reg) { int edge = (val == 4) ? 3 : val; @@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg) return 1000 + i * 500; } -static int FAN_MAX_TO_REG(int val) +static int FAN_MAX_TO_REG(long val) { int i; @@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg) return acz[(reg >> 5) & 0x07]; } -static inline int PWM_ACZ_TO_REG(int val, int reg) +static inline int PWM_ACZ_TO_REG(long val, int reg) { int acz = (val == 4) ? 2 : val - 1; @@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg) return PWM_FREQ[reg & 0x0f]; } -static int PWM_FREQ_TO_REG(int val, int reg) +static int PWM_FREQ_TO_REG(long val, int reg) { int i; @@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix) return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0; } -static int PWM_RR_TO_REG(int val, int ix, int reg) +static int PWM_RR_TO_REG(long val, int ix, int reg) { int i; @@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix) return PWM_RR_FROM_REG(reg, ix) ? 1 : 0; } -static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg) +static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg) { int en = (ix == 1) ? 0x80 : 0x08; @@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dme1737_data *data = dev_get_drvdata(dev); - long val; + unsigned long val; int err; - err = kstrtol(buf, 10, &val); + err = kstrtoul(buf, 10, &val); if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c index a26ba7a..298e557 100644 --- a/drivers/hwmon/ds1621.c +++ b/drivers/hwmon/ds1621.c @@ -311,6 +311,7 @@ static ssize_t set_convrate(struct device *dev, struct device_attribute *da, data->conf |= (resol << DS1621_REG_CONFIG_RESOL_SHIFT); i2c_smbus_write_byte_data(client, DS1621_REG_CONF, data->conf); data->update_interval = ds1721_convrates[resol]; + data->zbits = 7 - resol; mutex_unlock(&data->update_lock); return count; diff --git a/drivers/hwmon/emc1403.c b/drivers/hwmon/emc1403.c index 142e1cb..361f50b 100644 --- a/drivers/hwmon/emc1403.c +++ b/drivers/hwmon/emc1403.c @@ -162,7 +162,7 @@ static ssize_t store_hyst(struct device *dev, if (retval < 0) goto fail; - hyst = val - retval * 1000; + hyst = retval * 1000 - val; hyst = DIV_ROUND_CLOSEST(hyst, 1000); if (hyst < 0 || hyst > 255) { retval = -ERANGE; @@ -295,7 +295,7 @@ static int emc1403_detect(struct i2c_client *client, } id = i2c_smbus_read_byte_data(client, THERMAL_REVISION_REG); - if (id != 0x01) + if (id < 0x01 || id > 0x04) return -ENODEV; return 0; diff --git a/drivers/hwmon/emc2103.c b/drivers/hwmon/emc2103.c index 2c137b2..5790246 100644 --- a/drivers/hwmon/emc2103.c +++ b/drivers/hwmon/emc2103.c @@ -250,9 +250,7 @@ static ssize_t set_temp_min(struct device *dev, struct device_attribute *da, if (result < 0) return result; - val = DIV_ROUND_CLOSEST(val, 1000); - if ((val < -63) || (val > 127)) - return -EINVAL; + val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -63, 127); mutex_lock(&data->update_lock); data->temp_min[nr] = val; @@ -274,9 +272,7 @@ static ssize_t set_temp_max(struct device *dev, struct device_attribute *da, if (result < 0) return result; - val = DIV_ROUND_CLOSEST(val, 1000); - if ((val < -63) || (val > 127)) - return -EINVAL; + val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -63, 127); mutex_lock(&data->update_lock); data->temp_max[nr] = val; @@ -390,15 +386,14 @@ static ssize_t set_fan_target(struct device *dev, struct device_attribute *da, { struct emc2103_data *data = emc2103_update_device(dev); struct i2c_client *client = to_i2c_client(dev); - long rpm_target; + unsigned long rpm_target; - int result = kstrtol(buf, 10, &rpm_target); + int result = kstrtoul(buf, 10, &rpm_target); if (result < 0) return result; /* Datasheet states 16384 as maximum RPM target (table 3.2) */ - if ((rpm_target < 0) || (rpm_target > 16384)) - return -EINVAL; + rpm_target = clamp_val(rpm_target, 0, 16384); mutex_lock(&data->update_lock); diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index b7d6a57..ee21bdd 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -172,7 +172,7 @@ static int get_fan_speed_index(struct gpio_fan_data *fan_data) return -EINVAL; } -static int rpm_to_speed_index(struct gpio_fan_data *fan_data, int rpm) +static int rpm_to_speed_index(struct gpio_fan_data *fan_data, unsigned long rpm) { struct gpio_fan_speed *speed = fan_data->speed; int i; diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 70a39a8..554f5c3 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -148,7 +148,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) switch (reg) { case INA2XX_SHUNT_VOLTAGE: - val = DIV_ROUND_CLOSEST(data->regs[reg], + /* signed register */ + val = DIV_ROUND_CLOSEST((s16)data->regs[reg], data->config->shunt_div); break; case INA2XX_BUS_VOLTAGE: @@ -160,8 +161,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) val = data->regs[reg] * data->config->power_lsb; break; case INA2XX_CURRENT: - /* LSB=1mA (selected). Is in mA */ - val = data->regs[reg]; + /* signed register, LSB=1mA (selected), in mA */ + val = (s16)data->regs[reg]; break; default: /* programmer goofed */ diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index a2f3b4a..b879427 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -108,7 +108,7 @@ static inline int FAN_FROM_REG(u8 val, int div) * TEMP: mC (-128C to +127C) * REG: 1C/bit, two's complement */ -static inline s8 TEMP_TO_REG(int val) +static inline s8 TEMP_TO_REG(long val) { int nval = clamp_val(val, -128000, 127000) ; return nval < 0 ? (nval - 500) / 1000 : (nval + 500) / 1000; diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 3894c40..b9d6e7d 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -158,7 +158,7 @@ static inline u16 FAN_TO_REG(unsigned long val) /* Temperature is reported in .001 degC increments */ #define TEMP_TO_REG(val) \ - clamp_val(SCALE(val, 1000, 1), -127, 127) + DIV_ROUND_CLOSEST(clamp_val((val), -127000, 127000), 1000) #define TEMPEXT_FROM_REG(val, ext) \ SCALE(((val) << 4) + (ext), 16, 1000) #define TEMP_FROM_REG(val) ((val) * 1000) @@ -192,7 +192,7 @@ static const int lm85_range_map[] = { 13300, 16000, 20000, 26600, 32000, 40000, 53300, 80000 }; -static int RANGE_TO_REG(int range) +static int RANGE_TO_REG(long range) { int i; @@ -214,7 +214,7 @@ static const int adm1027_freq_map[8] = { /* 1 Hz */ 11, 15, 22, 29, 35, 44, 59, 88 }; -static int FREQ_TO_REG(const int *map, int freq) +static int FREQ_TO_REG(const int *map, unsigned long freq) { int i; @@ -463,6 +463,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index 8a17f01..e76feb86 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -44,6 +44,7 @@ struct ntc_compensation { unsigned int ohm; }; +/* Order matters, ntc_match references the entries by index */ static const struct platform_device_id ntc_thermistor_id[] = { { "ncp15wb473", TYPE_NCPXXWB473 }, { "ncp18wb473", TYPE_NCPXXWB473 }, @@ -141,7 +142,7 @@ struct ntc_data { char name[PLATFORM_NAME_SIZE]; }; -#ifdef CONFIG_OF +#if defined(CONFIG_OF) && IS_ENABLED(CONFIG_IIO) static int ntc_adc_iio_read(struct ntc_thermistor_platform_data *pdata) { struct iio_channel *channel = pdata->chan; @@ -163,15 +164,15 @@ static int ntc_adc_iio_read(struct ntc_thermistor_platform_data *pdata) static const struct of_device_id ntc_match[] = { { .compatible = "ntc,ncp15wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[0] }, { .compatible = "ntc,ncp18wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[1] }, { .compatible = "ntc,ncp21wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[2] }, { .compatible = "ntc,ncp03wb473", - .data = &ntc_thermistor_id[TYPE_NCPXXWB473] }, + .data = &ntc_thermistor_id[3] }, { .compatible = "ntc,ncp15wl333", - .data = &ntc_thermistor_id[TYPE_NCPXXWL333] }, + .data = &ntc_thermistor_id[4] }, { }, }; MODULE_DEVICE_TABLE(of, ntc_match); @@ -223,6 +224,8 @@ ntc_thermistor_parse_dt(struct platform_device *pdev) return NULL; } +#define ntc_match NULL + static void ntc_iio_channel_release(struct ntc_thermistor_platform_data *pdata) { } #endif diff --git a/drivers/hwmon/sis5595.c b/drivers/hwmon/sis5595.c index 72a8897..9ec7d2e 100644 --- a/drivers/hwmon/sis5595.c +++ b/drivers/hwmon/sis5595.c @@ -159,7 +159,7 @@ static inline int TEMP_FROM_REG(s8 val) { return val * 830 + 52120; } -static inline s8 TEMP_TO_REG(int val) +static inline s8 TEMP_TO_REG(long val) { int nval = clamp_val(val, -54120, 157530) ; return nval < 0 ? (nval - 5212 - 415) / 830 : (nval - 5212 + 415) / 830; diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c index efee4c5..34b9a60 100644 --- a/drivers/hwmon/smsc47m192.c +++ b/drivers/hwmon/smsc47m192.c @@ -86,7 +86,7 @@ static inline u8 IN_TO_REG(unsigned long val, int n) */ static inline s8 TEMP_TO_REG(int val) { - return clamp_val(SCALE(val, 1, 1000), -128000, 127000); + return SCALE(clamp_val(val, -128000, 127000), 1, 1000); } static inline int TEMP_FROM_REG(s8 val) @@ -384,6 +384,8 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, err = kstrtoul(buf, 10, &val); if (err) return err; + if (val > 255) + return -EINVAL; data->vrm = val; return count; diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index fd05930..1744453 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -101,6 +101,7 @@ struct at91_twi_dev { unsigned twi_cwgr_reg; struct at91_twi_pdata *pdata; bool use_dma; + bool recv_len_abort; struct at91_twi_dma dma; }; @@ -210,7 +211,7 @@ static void at91_twi_write_data_dma_callback(void *data) struct at91_twi_dev *dev = (struct at91_twi_dev *)data; dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg), - dev->buf_len, DMA_MEM_TO_DEV); + dev->buf_len, DMA_TO_DEVICE); at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); } @@ -267,12 +268,24 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev) *dev->buf = at91_twi_read(dev, AT91_TWI_RHR) & 0xff; --dev->buf_len; + /* return if aborting, we only needed to read RHR to clear RXRDY*/ + if (dev->recv_len_abort) + return; + /* handle I2C_SMBUS_BLOCK_DATA */ if (unlikely(dev->msg->flags & I2C_M_RECV_LEN)) { - dev->msg->flags &= ~I2C_M_RECV_LEN; - dev->buf_len += *dev->buf; - dev->msg->len = dev->buf_len + 1; - dev_dbg(dev->dev, "received block length %d\n", dev->buf_len); + /* ensure length byte is a valid value */ + if (*dev->buf <= I2C_SMBUS_BLOCK_MAX && *dev->buf > 0) { + dev->msg->flags &= ~I2C_M_RECV_LEN; + dev->buf_len += *dev->buf; + dev->msg->len = dev->buf_len + 1; + dev_dbg(dev->dev, "received block length %d\n", + dev->buf_len); + } else { + /* abort and send the stop by reading one more byte */ + dev->recv_len_abort = true; + dev->buf_len = 1; + } } /* send stop if second but last byte has been read */ @@ -289,7 +302,7 @@ static void at91_twi_read_data_dma_callback(void *data) struct at91_twi_dev *dev = (struct at91_twi_dev *)data; dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg), - dev->buf_len, DMA_DEV_TO_MEM); + dev->buf_len, DMA_FROM_DEVICE); /* The last two bytes have to be read without using dma */ dev->buf += dev->buf_len - 2; @@ -421,8 +434,8 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) } } - ret = wait_for_completion_interruptible_timeout(&dev->cmd_complete, - dev->adapter.timeout); + ret = wait_for_completion_timeout(&dev->cmd_complete, + dev->adapter.timeout); if (ret == 0) { dev_err(dev->dev, "controller timed out\n"); at91_init_twi_bus(dev); @@ -444,6 +457,12 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) ret = -EIO; goto error; } + if (dev->recv_len_abort) { + dev_err(dev->dev, "invalid smbus block length recvd\n"); + ret = -EPROTO; + goto error; + } + dev_dbg(dev->dev, "transfer complete\n"); return 0; @@ -500,6 +519,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) dev->buf_len = m_start->len; dev->buf = m_start->buf; dev->msg = m_start; + dev->recv_len_abort = false; ret = at91_do_twi_transfer(dev); diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 132369f..4e73f3e 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -411,11 +411,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) if (dev->cmd_err & DAVINCI_I2C_STR_NACK) { if (msg->flags & I2C_M_IGNORE_NAK) return msg->len; - if (stop) { - w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); - w |= DAVINCI_I2C_MDR_STP; - davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); - } + w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); + w |= DAVINCI_I2C_MDR_STP; + davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); return -EREMOTEIO; } return -EIO; diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 5888fee..a4dd9bd 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -418,6 +418,9 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) */ dw_writel(dev, msgs[dev->msg_write_idx].addr | ic_tar, DW_IC_TAR); + /* enforce disabled interrupts (due to HW issues) */ + i2c_dw_disable_int(dev); + /* Enable the adapter */ __i2c_dw_enable(dev, true); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 1672eff..303972d 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -497,7 +497,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, desc->wr_len_cmd = dma_size; desc->control |= ISMT_DESC_BLK; priv->dma_buffer[0] = command; - memcpy(&priv->dma_buffer[1], &data->block[1], dma_size); + memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1); } else { /* Block Read */ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: READ\n"); diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 564c1c0..b07d53f 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -748,8 +748,7 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data, } tclk = clk_get_rate(drv_data->clk); - rc = of_property_read_u32(np, "clock-frequency", &bus_freq); - if (rc) + if (of_property_read_u32(np, "clock-frequency", &bus_freq)) bus_freq = 100000; /* 100kHz by default */ if (!mv64xxx_find_baud_factors(bus_freq, tclk, diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 2f66478..123dc83 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -923,14 +923,12 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) if (stat & OMAP_I2C_STAT_NACK) { err |= OMAP_I2C_STAT_NACK; omap_i2c_ack_stat(dev, OMAP_I2C_STAT_NACK); - break; } if (stat & OMAP_I2C_STAT_AL) { dev_err(dev->dev, "Arbitration lost\n"); err |= OMAP_I2C_STAT_AL; omap_i2c_ack_stat(dev, OMAP_I2C_STAT_AL); - break; } /* @@ -955,11 +953,13 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) if (dev->fifo_size) num_bytes = dev->buf_len; - omap_i2c_receive_data(dev, num_bytes, true); - - if (dev->errata & I2C_OMAP_ERRATA_I207) + if (dev->errata & I2C_OMAP_ERRATA_I207) { i2c_omap_errata_i207(dev, stat); + num_bytes = (omap_i2c_read_reg(dev, + OMAP_I2C_BUFSTAT_REG) >> 8) & 0x3F; + } + omap_i2c_receive_data(dev, num_bytes, true); omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RDR); continue; } diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index d2fe11d..c8a4260 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -560,6 +560,12 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, ret = -EINVAL; for (i = 0; i < num; i++) { + /* This HW can't send STOP after address phase */ + if (msgs[i].len == 0) { + ret = -EOPNOTSUPP; + break; + } + /*-------------- spin lock -----------------*/ spin_lock_irqsave(&priv->lock, flags); @@ -624,7 +630,8 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap, static u32 rcar_i2c_func(struct i2c_adapter *adap) { - return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; + /* This HW can't do SMBUS_QUICK and NOSTART */ + return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); } static const struct i2c_algorithm rcar_i2c_algo = { diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 3747b9b..f7d5723 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -1200,10 +1200,10 @@ static int s3c24xx_i2c_resume(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); - i2c->suspended = 0; clk_prepare_enable(i2c->clk); s3c24xx_i2c_init(i2c); clk_disable_unprepare(i2c->clk); + i2c->suspended = 0; return 0; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 5323581..97f4e80 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -329,7 +329,7 @@ static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = { { .enter = NULL } }; -static struct cpuidle_state avn_cstates[] __initdata = { +static struct cpuidle_state avn_cstates[] = { { .name = "C1-AVN", .desc = "MWAIT 0x00", @@ -344,6 +344,8 @@ static struct cpuidle_state avn_cstates[] __initdata = { .exit_latency = 15, .target_residency = 45, .enter = &intel_idle }, + { + .enter = NULL } }; /** diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 81e3dc2..4edc95c 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -68,13 +68,13 @@ /* Defaults values */ #define BMA180_DEF_PMODE 0 #define BMA180_DEF_BW 20 -#define BMA180_DEF_SCALE 250 +#define BMA180_DEF_SCALE 2452 /* Available values for sysfs */ #define BMA180_FLP_FREQ_AVAILABLE \ "10 20 40 75 150 300" #define BMA180_SCALE_AVAILABLE \ - "0.000130 0.000190 0.000250 0.000380 0.000500 0.000990 0.001980" + "0.001275 0.001863 0.002452 0.003727 0.004903 0.009709 0.019417" struct bma180_data { struct i2c_client *client; @@ -94,7 +94,7 @@ enum bma180_axis { }; static int bw_table[] = { 10, 20, 40, 75, 150, 300 }; /* Hz */ -static int scale_table[] = { 130, 190, 250, 380, 500, 990, 1980 }; +static int scale_table[] = { 1275, 1863, 2452, 3727, 4903, 9709, 19417 }; static int bma180_get_acc_reg(struct bma180_data *data, enum bma180_axis axis) { @@ -376,6 +376,8 @@ static int bma180_write_raw(struct iio_dev *indio_dev, mutex_unlock(&data->mutex); return ret; case IIO_CHAN_INFO_LOW_PASS_FILTER_3DB_FREQUENCY: + if (val2) + return -EINVAL; mutex_lock(&data->mutex); ret = bma180_set_bw(data, val); mutex_unlock(&data->mutex); @@ -567,7 +569,7 @@ static int bma180_probe(struct i2c_client *client, trig->ops = &bma180_trigger_ops; iio_trigger_set_drvdata(trig, indio_dev); data->trig = trig; - indio_dev->trig = trig; + indio_dev->trig = iio_trigger_get(trig); ret = iio_trigger_register(trig); if (ret) diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index 46d22f3..7fbe136 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -349,7 +349,7 @@ static int hid_accel_3d_probe(struct platform_device *pdev) error_iio_unreg: iio_device_unregister(indio_dev); error_remove_trigger: - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&accel_state->common_attributes); error_unreg_buffer_funcs: iio_triggered_buffer_cleanup(indio_dev); error_free_dev_mem: @@ -362,10 +362,11 @@ static int hid_accel_3d_remove(struct platform_device *pdev) { struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; struct iio_dev *indio_dev = platform_get_drvdata(pdev); + struct accel_3d_state *accel_state = iio_priv(indio_dev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_ACCEL_3D); iio_device_unregister(indio_dev); - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&accel_state->common_attributes); iio_triggered_buffer_cleanup(indio_dev); kfree(indio_dev->channels); diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index f0d6335..05d2733 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -477,7 +477,7 @@ static int ad_sd_probe_trigger(struct iio_dev *indio_dev) goto error_free_irq; /* select default trigger */ - indio_dev->trig = sigma_delta->trig; + indio_dev->trig = iio_trigger_get(sigma_delta->trig); return 0; diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 0f16b55..b023cd3 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -166,12 +166,11 @@ static int at91_adc_channel_init(struct iio_dev *idev) return idev->num_channels; } -static u8 at91_adc_get_trigger_value_by_name(struct iio_dev *idev, +static int at91_adc_get_trigger_value_by_name(struct iio_dev *idev, struct at91_adc_trigger *triggers, const char *trigger_name) { struct at91_adc_state *st = iio_priv(idev); - u8 value = 0; int i; for (i = 0; i < st->trigger_number; i++) { @@ -184,15 +183,16 @@ static u8 at91_adc_get_trigger_value_by_name(struct iio_dev *idev, return -ENOMEM; if (strcmp(trigger_name, name) == 0) { - value = triggers[i].value; kfree(name); - break; + if (triggers[i].value == 0) + return -EINVAL; + return triggers[i].value; } kfree(name); } - return value; + return -EINVAL; } static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) @@ -202,14 +202,14 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) struct iio_buffer *buffer = idev->buffer; struct at91_adc_reg_desc *reg = st->registers; u32 status = at91_adc_readl(st, reg->trigger_register); - u8 value; + int value; u8 bit; value = at91_adc_get_trigger_value_by_name(idev, st->trigger_list, idev->trig->name); - if (value == 0) - return -EINVAL; + if (value < 0) + return value; if (state) { st->buffer = kmalloc(idev->scan_bytes, GFP_KERNEL); diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index a952538..b9ed661 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -155,7 +155,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev, if (time_after(jiffies, timeout)) return -EAGAIN; } - map_val = chan->channel + TOTAL_CHANNELS; + map_val = adc_dev->channel_step[chan->scan_index]; /* * When the sub-system is first enabled, diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 87419c4..4129b6b 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -49,11 +49,10 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig, return 0; } -void hid_sensor_remove_trigger(struct iio_dev *indio_dev) +void hid_sensor_remove_trigger(struct hid_sensor_common *attrb) { - iio_trigger_unregister(indio_dev->trig); - iio_trigger_free(indio_dev->trig); - indio_dev->trig = NULL; + iio_trigger_unregister(attrb->trigger); + iio_trigger_free(attrb->trigger); } EXPORT_SYMBOL(hid_sensor_remove_trigger); @@ -84,7 +83,8 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, dev_err(&indio_dev->dev, "Trigger Register Failed\n"); goto error_free_trig; } - indio_dev->trig = trig; + attrb->trigger = trig; + indio_dev->trig = iio_trigger_get(trig); return ret; diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h index 9a87314..ca02f78 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h @@ -21,6 +21,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, struct hid_sensor_common *attrb); -void hid_sensor_remove_trigger(struct iio_dev *indio_dev); +void hid_sensor_remove_trigger(struct hid_sensor_common *attrb); #endif diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index 71a2c5f..af6f257 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -71,7 +71,7 @@ int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf) goto st_sensors_free_memory; } - for (i = 0; i < n * num_data_channels; i++) { + for (i = 0; i < n * byte_for_channel; i++) { if (i < n) buf[i] = rx_array[i]; else diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 8fc3a97..8d8ca6f 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -49,7 +49,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, dev_err(&indio_dev->dev, "failed to register iio trigger.\n"); goto iio_trigger_register_error; } - indio_dev->trig = sdata->trig; + indio_dev->trig = iio_trigger_get(sdata->trig); return 0; diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c index c688d97..74bbed7 100644 --- a/drivers/iio/gyro/hid-sensor-gyro-3d.c +++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c @@ -347,7 +347,7 @@ static int hid_gyro_3d_probe(struct platform_device *pdev) error_iio_unreg: iio_device_unregister(indio_dev); error_remove_trigger: - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&gyro_state->common_attributes); error_unreg_buffer_funcs: iio_triggered_buffer_cleanup(indio_dev); error_free_dev_mem: @@ -360,10 +360,11 @@ static int hid_gyro_3d_remove(struct platform_device *pdev) { struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; struct iio_dev *indio_dev = platform_get_drvdata(pdev); + struct gyro_3d_state *gyro_state = iio_priv(indio_dev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_GYRO_3D); iio_device_unregister(indio_dev); - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&gyro_state->common_attributes); iio_triggered_buffer_cleanup(indio_dev); kfree(indio_dev->channels); diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 6c43af9..14917fa 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -135,7 +135,7 @@ int itg3200_probe_trigger(struct iio_dev *indio_dev) goto error_free_irq; /* select default trigger */ - indio_dev->trig = st->trig; + indio_dev->trig = iio_trigger_get(st->trig); return 0; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index df7f1e1..27a9176 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -661,6 +661,7 @@ static int inv_mpu_probe(struct i2c_client *client, { struct inv_mpu6050_state *st; struct iio_dev *indio_dev; + struct inv_mpu6050_platform_data *pdata; int result; if (!i2c_check_functionality(client->adapter, @@ -673,8 +674,10 @@ static int inv_mpu_probe(struct i2c_client *client, st = iio_priv(indio_dev); st->client = client; - st->plat_data = *(struct inv_mpu6050_platform_data - *)dev_get_platdata(&client->dev); + pdata = (struct inv_mpu6050_platform_data + *)dev_get_platdata(&client->dev); + if (pdata) + st->plat_data = *pdata; /* power is turned on inside check chip type*/ result = inv_check_and_setup_chip(st, id); if (result) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c index 03b9372..926fcce 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_trigger.c @@ -135,7 +135,7 @@ int inv_mpu6050_probe_trigger(struct iio_dev *indio_dev) ret = iio_trigger_register(st->trig); if (ret) goto error_free_irq; - indio_dev->trig = st->trig; + indio_dev->trig = iio_trigger_get(st->trig); return 0; diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 2db7dcd..ae7ac20 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -119,7 +119,8 @@ static ssize_t iio_scan_el_show(struct device *dev, int ret; struct iio_dev *indio_dev = dev_to_iio_dev(dev); - ret = test_bit(to_iio_dev_attr(attr)->address, + /* Ensure ret is 0 or 1. */ + ret = !!test_bit(to_iio_dev_attr(attr)->address, indio_dev->buffer->scan_mask); return sprintf(buf, "%d\n", ret); @@ -789,7 +790,8 @@ int iio_scan_mask_query(struct iio_dev *indio_dev, if (!buffer->scan_mask) return 0; - return test_bit(bit, buffer->scan_mask); + /* Ensure return value is 0 or 1. */ + return !!test_bit(bit, buffer->scan_mask); }; EXPORT_SYMBOL_GPL(iio_scan_mask_query); @@ -874,7 +876,7 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev, /* Now we have the two masks, work from least sig and build up sizes */ for_each_set_bit(out_ind, - indio_dev->active_scan_mask, + buffer->scan_mask, indio_dev->masklength) { in_ind = find_next_bit(indio_dev->active_scan_mask, indio_dev->masklength, diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 0cf5f8e..4fc88e6 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -178,12 +178,12 @@ static struct iio_channel *of_iio_channel_get_by_name(struct device_node *np, index = of_property_match_string(np, "io-channel-names", name); chan = of_iio_channel_get(np, index); - if (!IS_ERR(chan)) + if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER) break; else if (name && index >= 0) { pr_err("ERROR: could not get IIO channel %s:%s(%i)\n", np->full_name, name ? name : "", index); - return chan; + return NULL; } /* @@ -193,8 +193,9 @@ static struct iio_channel *of_iio_channel_get_by_name(struct device_node *np, */ np = np->parent; if (np && !of_get_property(np, "io-channel-ranges", NULL)) - break; + return NULL; } + return chan; } @@ -317,6 +318,7 @@ struct iio_channel *iio_channel_get(struct device *dev, if (channel != NULL) return channel; } + return iio_channel_get_sys(name, channel_name); } EXPORT_SYMBOL_GPL(iio_channel_get); diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index e59d00c..c104bda 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -313,7 +313,7 @@ static int hid_als_probe(struct platform_device *pdev) error_iio_unreg: iio_device_unregister(indio_dev); error_remove_trigger: - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&als_state->common_attributes); error_unreg_buffer_funcs: iio_triggered_buffer_cleanup(indio_dev); error_free_dev_mem: @@ -326,10 +326,11 @@ static int hid_als_remove(struct platform_device *pdev) { struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; struct iio_dev *indio_dev = platform_get_drvdata(pdev); + struct als_state *als_state = iio_priv(indio_dev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_ALS); iio_device_unregister(indio_dev); - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&als_state->common_attributes); iio_triggered_buffer_cleanup(indio_dev); kfree(indio_dev->channels); diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c index 9edf4c9..aeba3bb 100644 --- a/drivers/iio/magnetometer/ak8975.c +++ b/drivers/iio/magnetometer/ak8975.c @@ -352,8 +352,6 @@ static int ak8975_read_axis(struct iio_dev *indio_dev, int index, int *val) { struct ak8975_data *data = iio_priv(indio_dev); struct i2c_client *client = data->client; - u16 meas_reg; - s16 raw; int ret; mutex_lock(&data->lock); @@ -401,16 +399,11 @@ static int ak8975_read_axis(struct iio_dev *indio_dev, int index, int *val) dev_err(&client->dev, "Read axis data fails\n"); goto exit; } - meas_reg = ret; mutex_unlock(&data->lock); - /* Endian conversion of the measured values. */ - raw = (s16) (le16_to_cpu(meas_reg)); - /* Clamp to valid range. */ - raw = clamp_t(s16, raw, -4096, 4095); - *val = raw; + *val = clamp_t(s16, ret, -4096, 4095); return IIO_VAL_INT; exit: diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c index a98460b..ff7b9da 100644 --- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c +++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c @@ -350,7 +350,7 @@ static int hid_magn_3d_probe(struct platform_device *pdev) error_iio_unreg: iio_device_unregister(indio_dev); error_remove_trigger: - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&magn_state->common_attributes); error_unreg_buffer_funcs: iio_triggered_buffer_cleanup(indio_dev); error_free_dev_mem: @@ -363,10 +363,11 @@ static int hid_magn_3d_remove(struct platform_device *pdev) { struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; struct iio_dev *indio_dev = platform_get_drvdata(pdev); + struct magn_3d_state *magn_state = iio_priv(indio_dev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_COMPASS_3D); iio_device_unregister(indio_dev); - hid_sensor_remove_trigger(indio_dev); + hid_sensor_remove_trigger(&magn_state->common_attributes); iio_triggered_buffer_cleanup(indio_dev); kfree(indio_dev->channels); diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index cab3bc7..5888885 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -42,7 +42,8 @@ #define ST_MAGN_FS_AVL_5600MG 5600 #define ST_MAGN_FS_AVL_8000MG 8000 #define ST_MAGN_FS_AVL_8100MG 8100 -#define ST_MAGN_FS_AVL_10000MG 10000 +#define ST_MAGN_FS_AVL_12000MG 12000 +#define ST_MAGN_FS_AVL_16000MG 16000 /* CUSTOM VALUES FOR SENSOR 1 */ #define ST_MAGN_1_WAI_EXP 0x3c @@ -69,20 +70,20 @@ #define ST_MAGN_1_FS_AVL_4700_VAL 0x05 #define ST_MAGN_1_FS_AVL_5600_VAL 0x06 #define ST_MAGN_1_FS_AVL_8100_VAL 0x07 -#define ST_MAGN_1_FS_AVL_1300_GAIN_XY 1100 -#define ST_MAGN_1_FS_AVL_1900_GAIN_XY 855 -#define ST_MAGN_1_FS_AVL_2500_GAIN_XY 670 -#define ST_MAGN_1_FS_AVL_4000_GAIN_XY 450 -#define ST_MAGN_1_FS_AVL_4700_GAIN_XY 400 -#define ST_MAGN_1_FS_AVL_5600_GAIN_XY 330 -#define ST_MAGN_1_FS_AVL_8100_GAIN_XY 230 -#define ST_MAGN_1_FS_AVL_1300_GAIN_Z 980 -#define ST_MAGN_1_FS_AVL_1900_GAIN_Z 760 -#define ST_MAGN_1_FS_AVL_2500_GAIN_Z 600 -#define ST_MAGN_1_FS_AVL_4000_GAIN_Z 400 -#define ST_MAGN_1_FS_AVL_4700_GAIN_Z 355 -#define ST_MAGN_1_FS_AVL_5600_GAIN_Z 295 -#define ST_MAGN_1_FS_AVL_8100_GAIN_Z 205 +#define ST_MAGN_1_FS_AVL_1300_GAIN_XY 909 +#define ST_MAGN_1_FS_AVL_1900_GAIN_XY 1169 +#define ST_MAGN_1_FS_AVL_2500_GAIN_XY 1492 +#define ST_MAGN_1_FS_AVL_4000_GAIN_XY 2222 +#define ST_MAGN_1_FS_AVL_4700_GAIN_XY 2500 +#define ST_MAGN_1_FS_AVL_5600_GAIN_XY 3030 +#define ST_MAGN_1_FS_AVL_8100_GAIN_XY 4347 +#define ST_MAGN_1_FS_AVL_1300_GAIN_Z 1020 +#define ST_MAGN_1_FS_AVL_1900_GAIN_Z 1315 +#define ST_MAGN_1_FS_AVL_2500_GAIN_Z 1666 +#define ST_MAGN_1_FS_AVL_4000_GAIN_Z 2500 +#define ST_MAGN_1_FS_AVL_4700_GAIN_Z 2816 +#define ST_MAGN_1_FS_AVL_5600_GAIN_Z 3389 +#define ST_MAGN_1_FS_AVL_8100_GAIN_Z 4878 #define ST_MAGN_1_MULTIREAD_BIT false /* CUSTOM VALUES FOR SENSOR 2 */ @@ -105,10 +106,12 @@ #define ST_MAGN_2_FS_MASK 0x60 #define ST_MAGN_2_FS_AVL_4000_VAL 0x00 #define ST_MAGN_2_FS_AVL_8000_VAL 0x01 -#define ST_MAGN_2_FS_AVL_10000_VAL 0x02 -#define ST_MAGN_2_FS_AVL_4000_GAIN 430 -#define ST_MAGN_2_FS_AVL_8000_GAIN 230 -#define ST_MAGN_2_FS_AVL_10000_GAIN 230 +#define ST_MAGN_2_FS_AVL_12000_VAL 0x02 +#define ST_MAGN_2_FS_AVL_16000_VAL 0x03 +#define ST_MAGN_2_FS_AVL_4000_GAIN 146 +#define ST_MAGN_2_FS_AVL_8000_GAIN 292 +#define ST_MAGN_2_FS_AVL_12000_GAIN 438 +#define ST_MAGN_2_FS_AVL_16000_GAIN 584 #define ST_MAGN_2_MULTIREAD_BIT false #define ST_MAGN_2_OUT_X_L_ADDR 0x28 #define ST_MAGN_2_OUT_Y_L_ADDR 0x2a @@ -266,9 +269,14 @@ static const struct st_sensors st_magn_sensors[] = { .gain = ST_MAGN_2_FS_AVL_8000_GAIN, }, [2] = { - .num = ST_MAGN_FS_AVL_10000MG, - .value = ST_MAGN_2_FS_AVL_10000_VAL, - .gain = ST_MAGN_2_FS_AVL_10000_GAIN, + .num = ST_MAGN_FS_AVL_12000MG, + .value = ST_MAGN_2_FS_AVL_12000_VAL, + .gain = ST_MAGN_2_FS_AVL_12000_GAIN, + }, + [3] = { + .num = ST_MAGN_FS_AVL_16000MG, + .value = ST_MAGN_2_FS_AVL_16000_VAL, + .gain = ST_MAGN_2_FS_AVL_16000_GAIN, }, }, }, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index c47c203..4293e89 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -46,6 +46,7 @@ #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -65,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -419,6 +434,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1024,11 +1042,20 @@ static int __init iw_cm_init(void) if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f0d588f..1acb991 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -98,7 +98,7 @@ struct ib_umad_port { struct ib_umad_device { int start_port, end_port; - struct kref ref; + struct kobject kobj; struct ib_umad_port port[0]; }; @@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); + container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } +static struct kobj_type ib_umad_dev_ktype = { + .release = ib_umad_release_dev, +}; + static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : @@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret; + int ret = -ENXIO; port = container_of(inode->i_cdev, struct ib_umad_port, cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; mutex_lock(&port->file_mutex); - if (!port->ib_dev) { - ret = -ENXIO; + if (!port->ib_dev) goto out; - } + ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - ret = -ENOMEM; + if (!file) goto out; - } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); @@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kobject_get(&port->umad_dev->kobj); out: mutex_unlock(&port->file_mutex); @@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); + kobject_put(&dev->kobj); return 0; } @@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) int ret; port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { @@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) { - up(&port->sm_sem); - goto fail; - } + if (ret) + goto err_up_sem; filp->private_data = port; - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kobject_get(&port->umad_dev->kobj); + + return 0; + +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } @@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + kobject_put(&port->umad_dev->kobj); return ret; } @@ -995,6 +1004,7 @@ static int find_overflow_devnum(void) } static int ib_umad_init_port(struct ib_device *device, int port_num, + struct ib_umad_device *umad_dev, struct ib_umad_port *port) { int devnum; @@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; + port->cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; @@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; + port->sm_cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; @@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device) if (!umad_dev) return; - kref_init(&umad_dev->ref); + kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); umad_dev->start_port = s; umad_dev->end_port = e; @@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device) for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) + if (ib_umad_init_port(device, i, umad_dev, + &umad_dev->port[i - s])) goto err; } @@ -1158,7 +1171,7 @@ err: while (--i >= s) ib_umad_kill_port(&umad_dev->port[i - s]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device) @@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device) for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) ib_umad_kill_port(&umad_dev->port[i]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static char *umad_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 095bb04..cb78b1e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 212150c..8cc8375 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 714293b..45802e9 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct ipath_diag_pkt odp; struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; @@ -335,42 +335,24 @@ static ssize_t ipath_diagpkt_write(struct file *fp, u64 val; u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - if (count < sizeof(odp)) { - ret = -EINVAL; - goto bail; - } if (count == sizeof(dp)) { if (copy_from_user(&dp, data, sizeof(dp))) { ret = -EFAULT; goto bail; } - } else if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; - goto bail; - } - - /* - * Due to padding/alignment issues (lessened with new struct) - * the old and new structs are the same length. We need to - * disambiguate them, which we can do because odp.len has never - * been less than the total of LRH+BTH+DETH so far, while - * dp.unit (same offset) unit is unlikely to get that high. - * Similarly, dp.data, the pointer to user at the same offset - * as odp.unit, is almost certainly at least one (512byte)page - * "above" NULL. The if-block below can be omitted if compatibility - * between a new driver and older diagnostic code is unimportant. - * compatibility the other direction (new diags, old driver) is - * handled in the diagnostic code, with a warning. - */ - if (dp.unit >= 20 && dp.data < 512) { - /* very probable version mismatch. Fix it up */ - memcpy(&odp, &dp, sizeof(odp)); - /* We got a legacy dp, copy elements to dp */ + } else if (count == sizeof(odp)) { + if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + dp.len = odp.len; dp.unit = odp.unit; dp.data = odp.data; - dp.len = odp.len; - dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ + dp.pbc_wd = 0; + } else { + ret = -EINVAL; + goto bail; } /* send count must be an exact number of dwords */ @@ -379,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - clen = dp.len >> 2; + plen = dp.len >> 2; dd = ipath_lookup(dp.unit); if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || @@ -422,16 +404,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - - if ((plen + 4) > dd->ipath_ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - plen - 4, dd->ipath_ibmaxlen); + dp.len, dd->ipath_ibmaxlen); ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " @@ -473,11 +461,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp, */ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); ipath_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - __iowrite32_copy(piobuf + 2, tmpbuf, clen); + __iowrite32_copy(piobuf + 2, tmpbuf, plen); ipath_flush_wc(); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 344ab03..706833a 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -32,6 +32,7 @@ #include <linux/kref.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -518,14 +519,24 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; + size_t ucmdlen; int page_shift; int npages; int ncont; int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 0aa478b..47a1f0a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -35,6 +35,7 @@ #include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd; + size_t ucmdlen; int err; int npages; int page_shift; int ncont; u32 offset; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); return -EFAULT; } + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index a886de3..84fea5d 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -84,6 +84,7 @@ struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { @@ -99,6 +100,7 @@ struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_srq_resp { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43..42dde06 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); + err = -EFAULT; goto err_free; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 5b53ca5..09999cd 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1186,7 +1186,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return NULL; + return ERR_PTR(-EFAULT); } if (req.user_wqe_buffers) { virt_wqs = 1; diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 799a0c3..6abd3ed 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -193,6 +193,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) struct qib_qp_iter *iter; loff_t n = *pos; + rcu_read_lock(); iter = qib_qp_iter_init(s->private); if (!iter) return NULL; @@ -224,7 +225,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) { - /* nothing for now */ + rcu_read_unlock(); } static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 275f247..2023cd6 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) struct qib_ctxtdata *rcd = fd->rcd; struct qib_devdata *dd = rcd->dd; - if (dd->flags & QIB_HAS_SEND_DMA) + if (dd->flags & QIB_HAS_SEND_DMA) { fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, dd->unit, @@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) fd->subctxt); if (!fd->pq) return -ENOMEM; + } return 0; } diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 24e802f..76c3e17 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1097,14 +1097,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) int ret; dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_init(&dd->verbs_dev); -#endif + INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1121,11 +1117,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_exit(&dd->verbs_dev); -#endif - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); goto bail; } @@ -1139,9 +1130,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) qib_early_err(&pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ccb1191..1dd9fcb 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) event.event = IB_EVENT_PKEY_CHANGE; event.device = &dd->verbs_dev.ibdev; - event.element.port_num = 1; + event.element.port_num = port; ib_dispatch_event(&event); } return 0; diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 3cca55b..2c018ba 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -1324,7 +1324,6 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) struct qib_qp *pqp = iter->qp; struct qib_qp *qp; - rcu_read_lock(); for (; n < dev->qp_table_size; n++) { if (pqp) qp = rcu_dereference(pqp->next); @@ -1332,18 +1331,11 @@ int qib_qp_iter_next(struct qib_qp_iter *iter) qp = rcu_dereference(dev->qp_table[n]); pqp = qp; if (qp) { - if (iter->qp) - atomic_dec(&iter->qp->refcount); - atomic_inc(&qp->refcount); - rcu_read_unlock(); iter->qp = qp; iter->n = n; return 0; } } - rcu_read_unlock(); - if (iter->qp) - atomic_dec(&iter->qp->refcount); return ret; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 09c7129..60a3ed9 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -27,6 +27,7 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/iscsi/iscsi_transport.h> +#include <linux/semaphore.h> #include "isert_proto.h" #include "ib_isert.h" @@ -39,6 +40,7 @@ static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; +static struct workqueue_struct *isert_release_wq; static void isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); @@ -50,6 +52,11 @@ isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn static int isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); +static int +isert_rdma_post_recvl(struct isert_conn *isert_conn); +static int +isert_rdma_accept(struct isert_conn *isert_conn); +struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -110,9 +117,12 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS; /* * FIXME: Use devattr.max_sge - 2 for max_send_sge as - * work-around for RDMA_READ.. + * work-around for RDMA_READs with ConnectX-2. + * + * Also, still make sure to have at least two SGEs for + * outgoing control PDU responses. */ - attr.cap.max_send_sge = device->dev_attr.max_sge - 2; + attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2); isert_conn->max_sge = attr.cap.max_send_sge; attr.cap.max_recv_sge = 1; @@ -127,12 +137,18 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr); if (ret) { pr_err("rdma_create_qp failed for cma_id %d\n", ret); - return ret; + goto err; } isert_conn->conn_qp = cma_id->qp; pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n"); return 0; +err: + mutex_lock(&device_list_mutex); + device->cq_active_qps[min_index]--; + mutex_unlock(&device_list_mutex); + + return ret; } static void @@ -218,12 +234,16 @@ isert_create_device_ib_res(struct isert_device *device) struct isert_cq_desc *cq_desc; struct ib_device_attr *dev_attr; int ret = 0, i, j; + int max_rx_cqe, max_tx_cqe; dev_attr = &device->dev_attr; ret = isert_query_device(ib_dev, dev_attr); if (ret) return ret; + max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe); + max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe); + /* asign function handlers */ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { device->use_frwr = 1; @@ -265,7 +285,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_rx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_RX_CQ_LEN, i); + max_rx_cqe, i); if (IS_ERR(device->dev_rx_cq[i])) { ret = PTR_ERR(device->dev_rx_cq[i]); device->dev_rx_cq[i] = NULL; @@ -277,7 +297,7 @@ isert_create_device_ib_res(struct isert_device *device) isert_cq_tx_callback, isert_cq_event_callback, (void *)&cq_desc[i], - ISER_MAX_TX_CQ_LEN, i); + max_tx_cqe, i); if (IS_ERR(device->dev_tx_cq[i])) { ret = PTR_ERR(device->dev_tx_cq[i]); device->dev_tx_cq[i] = NULL; @@ -426,11 +446,18 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc; struct isert_device *device = isert_conn->conn_device; - int i, ret; + struct se_session *se_sess = isert_conn->conn->sess->se_sess; + struct se_node_acl *se_nacl = se_sess->se_node_acl; + int i, ret, tag_num; + /* + * Setup the number of FRMRs based upon the number of tags + * available to session in iscsi_target_locate_portal(). + */ + tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth); + tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS; - INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); isert_conn->conn_frwr_pool_size = 0; - for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { + for (i = 0; i < tag_num; i++) { fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); if (!fr_desc) { pr_err("Failed to allocate fast_reg descriptor\n"); @@ -478,13 +505,21 @@ err: static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct iscsi_np *np = cma_id->context; - struct isert_np *isert_np = np->np_context; + struct isert_np *isert_np = cma_id->context; + struct iscsi_np *np = isert_np->np; struct isert_conn *isert_conn; struct isert_device *device; struct ib_device *ib_dev = cma_id->device; int ret = 0; + spin_lock_bh(&np->np_thread_lock); + if (!np->enabled) { + spin_unlock_bh(&np->np_thread_lock); + pr_debug("iscsi_np is not enabled, reject connect request\n"); + return rdma_reject(cma_id, NULL, 0); + } + spin_unlock_bh(&np->np_thread_lock); + pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n", cma_id, cma_id->context); @@ -496,14 +531,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); + init_completion(&isert_conn->login_req_comp); init_completion(&isert_conn->conn_wait); init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); - kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); spin_lock_init(&isert_conn->conn_lock); + INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); - cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; isert_conn->responder_resources = event->param.conn.responder_resources; isert_conn->initiator_depth = event->param.conn.initiator_depth; @@ -559,30 +594,27 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->conn_pd = device->dev_pd; isert_conn->conn_mr = device->dev_mr; - if (device->use_frwr) { - ret = isert_conn_create_frwr_pool(isert_conn); - if (ret) { - pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); - goto out_frwr; - } - } - ret = isert_conn_setup_qp(isert_conn, cma_id); if (ret) goto out_conn_dev; + ret = isert_rdma_post_recvl(isert_conn); + if (ret) + goto out_conn_dev; + + ret = isert_rdma_accept(isert_conn); + if (ret) + goto out_conn_dev; + mutex_lock(&isert_np->np_accept_mutex); - list_add_tail(&isert_np->np_accept_list, &isert_conn->conn_accept_node); + list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list); mutex_unlock(&isert_np->np_accept_mutex); - pr_debug("isert_connect_request() waking up np_accept_wq: %p\n", np); - wake_up(&isert_np->np_accept_wq); + pr_debug("isert_connect_request() up np_sem np: %p\n", np); + up(&isert_np->np_sem); return 0; out_conn_dev: - if (device->use_frwr) - isert_conn_free_frwr_pool(isert_conn); -out_frwr: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -594,6 +626,7 @@ out_login_buf: kfree(isert_conn->login_buf); out: kfree(isert_conn); + rdma_reject(cma_id, NULL, 0); return ret; } @@ -609,18 +642,20 @@ isert_connect_release(struct isert_conn *isert_conn) if (device && device->use_frwr) isert_conn_free_frwr_pool(isert_conn); + isert_free_rx_descriptors(isert_conn); + rdma_destroy_id(isert_conn->conn_cm_id); + if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) isert_conn->conn_qp->recv_cq->cq_context)->cq_index; pr_debug("isert_connect_release: cq_index: %d\n", cq_index); + mutex_lock(&device_list_mutex); isert_conn->conn_device->cq_active_qps[cq_index]--; + mutex_unlock(&device_list_mutex); - rdma_destroy_qp(isert_conn->conn_cm_id); + ib_destroy_qp(isert_conn->conn_qp); } - isert_free_rx_descriptors(isert_conn); - rdma_destroy_id(isert_conn->conn_cm_id); - if (isert_conn->login_buf) { ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE); @@ -640,7 +675,19 @@ isert_connect_release(struct isert_conn *isert_conn) static void isert_connected_handler(struct rdma_cm_id *cma_id) { - return; + struct isert_conn *isert_conn = cma_id->qp->qp_context; + + pr_info("conn %p\n", isert_conn); + + if (!kref_get_unless_zero(&isert_conn->conn_kref)) { + pr_warn("conn %p connect_release is running\n", isert_conn); + return; + } + + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->state != ISER_CONN_FULL_FEATURE) + isert_conn->state = ISER_CONN_UP; + mutex_unlock(&isert_conn->conn_mutex); } static void @@ -661,49 +708,102 @@ isert_put_conn(struct isert_conn *isert_conn) kref_put(&isert_conn->conn_kref, isert_release_conn_kref); } +/** + * isert_conn_terminate() - Initiate connection termination + * @isert_conn: isert connection struct + * + * Notes: + * In case the connection state is FULL_FEATURE, move state + * to TEMINATING and start teardown sequence (rdma_disconnect). + * In case the connection state is UP, complete flush as well. + * + * This routine must be called with conn_mutex held. Thus it is + * safe to call multiple times. + */ static void -isert_disconnect_work(struct work_struct *work) +isert_conn_terminate(struct isert_conn *isert_conn) { - struct isert_conn *isert_conn = container_of(work, - struct isert_conn, conn_logout_work); + int err; - pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state == ISER_CONN_UP) + switch (isert_conn->state) { + case ISER_CONN_TERMINATING: + break; + case ISER_CONN_UP: + /* + * No flush completions will occur as we didn't + * get to ISER_CONN_FULL_FEATURE yet, complete + * to allow teardown progress. + */ + complete(&isert_conn->conn_wait_comp_err); + case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ + pr_info("Terminating conn %p state %d\n", + isert_conn, isert_conn->state); isert_conn->state = ISER_CONN_TERMINATING; - - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - mutex_unlock(&isert_conn->conn_mutex); - goto wake_up; - } - if (!isert_conn->conn_cm_id) { - mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); - return; + err = rdma_disconnect(isert_conn->conn_cm_id); + if (err) + pr_warn("Failed rdma_disconnect isert_conn %p\n", + isert_conn); + break; + default: + pr_warn("conn %p teminating in state %d\n", + isert_conn, isert_conn->state); } - if (!isert_conn->logout_posted) { - pr_debug("Calling rdma_disconnect for !logout_posted from" - " isert_disconnect_work\n"); - rdma_disconnect(isert_conn->conn_cm_id); - mutex_unlock(&isert_conn->conn_mutex); - iscsit_cause_connection_reinstatement(isert_conn->conn, 0); - goto wake_up; +} + +static int +isert_np_cma_handler(struct isert_np *isert_np, + enum rdma_cm_event_type event) +{ + pr_debug("isert np %p, handling event %d\n", isert_np, event); + + switch (event) { + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_np->np_cm_id = NULL; + break; + case RDMA_CM_EVENT_ADDR_CHANGE: + isert_np->np_cm_id = isert_setup_id(isert_np); + if (IS_ERR(isert_np->np_cm_id)) { + pr_err("isert np %p setup id failed: %ld\n", + isert_np, PTR_ERR(isert_np->np_cm_id)); + isert_np->np_cm_id = NULL; + } + break; + default: + pr_err("isert np %p Unexpected event %d\n", + isert_np, event); } + + return -1; +} + +static int +isert_disconnected_handler(struct rdma_cm_id *cma_id, + enum rdma_cm_event_type event) +{ + struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; + + if (isert_np->np_cm_id == cma_id) + return isert_np_cma_handler(cma_id->context, event); + + isert_conn = cma_id->qp->qp_context; + + mutex_lock(&isert_conn->conn_mutex); + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); -wake_up: + pr_info("conn %p completing conn_wait\n", isert_conn); complete(&isert_conn->conn_wait); - isert_put_conn(isert_conn); + + return 0; } static void -isert_disconnected_handler(struct rdma_cm_id *cma_id) +isert_connect_error(struct rdma_cm_id *cma_id) { - struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + struct isert_conn *isert_conn = cma_id->qp->qp_context; - INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); - schedule_work(&isert_conn->conn_logout_work); + isert_put_conn(isert_conn); } static int @@ -716,32 +816,30 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: - pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n"); ret = isert_connect_request(cma_id, event); + if (ret) + pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", + event->event, ret); break; case RDMA_CM_EVENT_ESTABLISHED: - pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n"); isert_connected_handler(cma_id); break; - case RDMA_CM_EVENT_DISCONNECTED: - pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n"); - isert_disconnected_handler(cma_id); - break; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ + case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ + case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ + ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: + isert_connect_error(cma_id); + break; default: - pr_err("Unknown RDMA CMA event: %d\n", event->event); + pr_err("Unhandled RDMA CMA event: %d\n", event->event); break; } - if (ret != 0) { - pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n", - event->event, ret); - dump_stack(); - } - return ret; } @@ -930,6 +1028,15 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, } if (!login->login_failed) { if (login->login_complete) { + if (isert_conn->conn_device->use_frwr) { + ret = isert_conn_create_frwr_pool(isert_conn); + if (ret) { + pr_err("Conn: %p failed to create" + " frwr_pool\n", isert_conn); + return ret; + } + } + ret = isert_alloc_rx_descriptors(isert_conn); if (ret) return ret; @@ -938,7 +1045,10 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, if (ret) return ret; - isert_conn->state = ISER_CONN_UP; + /* Now we are in FULL_FEATURE phase */ + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_FULL_FEATURE; + mutex_unlock(&isert_conn->conn_mutex); goto post_send; } @@ -955,18 +1065,17 @@ post_send: } static void -isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, - struct isert_conn *isert_conn) +isert_rx_login_req(struct isert_conn *isert_conn) { + struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf; + int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; int size; - if (!login) { - pr_err("conn->conn_login is NULL\n"); - dump_stack(); - return; - } + pr_info("conn %p\n", isert_conn); + + WARN_ON_ONCE(!login); if (login->first_request) { struct iscsi_login_req *login_req = @@ -1074,6 +1183,8 @@ sequence_cmd: if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); + else if (dump_payload && imm_data) + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 0; } @@ -1327,11 +1438,20 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, hdr->opcode, hdr->itt, hdr->flags, (int)(xfer_len - ISER_HEADERS_LEN)); - if ((char *)desc == isert_conn->login_req_buf) - isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN, - isert_conn); - else + if ((char *)desc == isert_conn->login_req_buf) { + isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN; + if (isert_conn->conn) { + struct iscsi_login *login = isert_conn->conn->conn_login; + + if (login && !login->first_request) + isert_rx_login_req(isert_conn); + } + mutex_lock(&isert_conn->conn_mutex); + complete(&isert_conn->login_req_comp); + mutex_unlock(&isert_conn->conn_mutex); + } else { isert_rx_do_work(desc, isert_conn); + } ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE); @@ -1414,7 +1534,7 @@ isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn } static void -isert_put_cmd(struct isert_cmd *isert_cmd) +isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; @@ -1430,8 +1550,21 @@ isert_put_cmd(struct isert_cmd *isert_cmd) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - if (cmd->data_direction == DMA_TO_DEVICE) + if (cmd->data_direction == DMA_TO_DEVICE) { iscsit_stop_dataout_timer(cmd); + /* + * Check for special case during comp_err where + * WRITE_PENDING has been handed off from core, + * but requires an extra target_put_sess_cmd() + * before transport_generic_free_cmd() below. + */ + if (comp_err && + cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { + struct se_cmd *se_cmd = &cmd->se_cmd; + + target_put_sess_cmd(se_cmd->se_sess, se_cmd); + } + } device->unreg_rdma_mem(isert_cmd, isert_conn); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1486,7 +1619,7 @@ isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) static void isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, - struct ib_device *ib_dev) + struct ib_device *ib_dev, bool comp_err) { if (isert_cmd->pdu_buf_dma != 0) { pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n"); @@ -1496,7 +1629,7 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, } isert_unmap_tx_desc(tx_desc, ib_dev); - isert_put_cmd(isert_cmd); + isert_put_cmd(isert_cmd, comp_err); } static void @@ -1540,28 +1673,25 @@ isert_do_control_comp(struct work_struct *work) iscsit_tmr_post_handler(cmd, cmd->conn); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; case ISTATE_SEND_REJECT: pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n"); atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; case ISTATE_SEND_LOGOUTRSP: pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); - /* - * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_wait_conn() - */ - isert_conn->logout_posted = true; + + atomic_dec(&isert_conn->post_send_buf_count); iscsit_logout_post_handler(cmd, cmd->conn); break; case ISTATE_SEND_TEXTRSP: atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; default: pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state); @@ -1592,7 +1722,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(tx_desc, isert_cmd, ib_dev); + isert_completion_put(tx_desc, isert_cmd, ib_dev, false); } static void @@ -1646,7 +1776,7 @@ isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn if (!isert_cmd) isert_unmap_tx_desc(tx_desc, ib_dev); else - isert_completion_put(tx_desc, isert_cmd, ib_dev); + isert_completion_put(tx_desc, isert_cmd, ib_dev, true); } static void @@ -1666,9 +1796,11 @@ isert_cq_rx_comp_err(struct isert_conn *isert_conn) msleep(3000); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + complete(&isert_conn->conn_wait_comp_err); } @@ -1922,7 +2054,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) int rc; isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - rc = iscsit_build_text_rsp(cmd, conn, hdr); + rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND); if (rc < 0) return rc; @@ -2444,13 +2576,51 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) return ret; } +struct rdma_cm_id * +isert_setup_id(struct isert_np *isert_np) +{ + struct iscsi_np *np = isert_np->np; + struct rdma_cm_id *id; + struct sockaddr *sa; + int ret; + + sa = (struct sockaddr *)&np->np_sockaddr; + pr_debug("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); + + id = rdma_create_id(isert_cma_handler, isert_np, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(id)) { + pr_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); + ret = PTR_ERR(id); + goto out; + } + pr_debug("id %p context %p\n", id, id->context); + + ret = rdma_bind_addr(id, sa); + if (ret) { + pr_err("rdma_bind_addr() failed: %d\n", ret); + goto out_id; + } + + ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG); + if (ret) { + pr_err("rdma_listen() failed: %d\n", ret); + goto out_id; + } + + return id; +out_id: + rdma_destroy_id(id); +out: + return ERR_PTR(ret); +} + static int isert_setup_np(struct iscsi_np *np, struct __kernel_sockaddr_storage *ksockaddr) { struct isert_np *isert_np; struct rdma_cm_id *isert_lid; - struct sockaddr *sa; int ret; isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL); @@ -2458,13 +2628,12 @@ isert_setup_np(struct iscsi_np *np, pr_err("Unable to allocate struct isert_np\n"); return -ENOMEM; } - init_waitqueue_head(&isert_np->np_accept_wq); + sema_init(&isert_np->np_sem, 0); mutex_init(&isert_np->np_accept_mutex); INIT_LIST_HEAD(&isert_np->np_accept_list); init_completion(&isert_np->np_login_comp); + isert_np->np = np; - sa = (struct sockaddr *)ksockaddr; - pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa); /* * Setup the np->np_sockaddr from the passed sockaddr setup * in iscsi_target_configfs.c code.. @@ -2472,50 +2641,21 @@ isert_setup_np(struct iscsi_np *np, memcpy(&np->np_sockaddr, ksockaddr, sizeof(struct __kernel_sockaddr_storage)); - isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP, - IB_QPT_RC); + isert_lid = isert_setup_id(isert_np); if (IS_ERR(isert_lid)) { - pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n", - PTR_ERR(isert_lid)); ret = PTR_ERR(isert_lid); goto out; } - ret = rdma_bind_addr(isert_lid, sa); - if (ret) { - pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret); - goto out_lid; - } - - ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG); - if (ret) { - pr_err("rdma_listen() for isert_lid failed: %d\n", ret); - goto out_lid; - } - isert_np->np_cm_id = isert_lid; np->np_context = isert_np; - pr_debug("Setup isert_lid->context: %p\n", isert_lid->context); return 0; -out_lid: - rdma_destroy_id(isert_lid); out: kfree(isert_np); - return ret; -} -static int -isert_check_accept_queue(struct isert_np *isert_np) -{ - int empty; - - mutex_lock(&isert_np->np_accept_mutex); - empty = list_empty(&isert_np->np_accept_list); - mutex_unlock(&isert_np->np_accept_mutex); - - return empty; + return ret; } static int @@ -2550,7 +2690,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) struct isert_conn *isert_conn = (struct isert_conn *)conn->context; int ret; - pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + pr_info("before login_req comp conn: %p\n", isert_conn); + ret = wait_for_completion_interruptible(&isert_conn->login_req_comp); + if (ret) { + pr_err("isert_conn %p interrupted before got login req\n", + isert_conn); + return ret; + } + INIT_COMPLETION(isert_conn->login_req_comp); + /* * For login requests after the first PDU, isert_rx_login_req() will * kick schedule_delayed_work(&conn->login_work) as the packet is @@ -2560,11 +2708,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) if (!login->first_request) return 0; + isert_rx_login_req(isert_conn); + + pr_info("before conn_login_comp conn: %p\n", conn); ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) return ret; - pr_debug("isert_get_login_rx processing login->req: %p\n", login->req); + pr_info("processing login->req: %p\n", login->req); + return 0; } @@ -2610,16 +2762,19 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) int max_accept = 0, ret; accept_wait: - ret = wait_event_interruptible(isert_np->np_accept_wq, - !isert_check_accept_queue(isert_np) || - np->np_thread_state == ISCSI_NP_THREAD_RESET); + ret = down_interruptible(&isert_np->np_sem); if (max_accept > 5) return -ENODEV; spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) { spin_unlock_bh(&np->np_thread_lock); - pr_err("ISCSI_NP_THREAD_RESET for isert_accept_np\n"); + pr_debug("np_thread_state %d for isert_accept_np\n", + np->np_thread_state); + /** + * No point in stalling here when np_thread + * is in state RESET/SHUTDOWN/EXIT - bail + **/ return -ENODEV; } spin_unlock_bh(&np->np_thread_lock); @@ -2639,17 +2794,10 @@ accept_wait: isert_conn->conn = conn; max_accept = 0; - ret = isert_rdma_post_recvl(isert_conn); - if (ret) - return ret; - - ret = isert_rdma_accept(isert_conn); - if (ret) - return ret; - isert_set_conn_info(np, conn, isert_conn); - pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn); + pr_debug("Processing isert_conn: %p\n", isert_conn); + return 0; } @@ -2658,29 +2806,38 @@ isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = (struct isert_np *)np->np_context; - rdma_destroy_id(isert_np->np_cm_id); + if (isert_np->np_cm_id) + rdma_destroy_id(isert_np->np_cm_id); np->np_context = NULL; kfree(isert_np); } +static void isert_release_work(struct work_struct *work) +{ + struct isert_conn *isert_conn = container_of(work, + struct isert_conn, + release_work); + + pr_info("Starting release conn %p\n", isert_conn); + + wait_for_completion(&isert_conn->conn_wait); + + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); + + pr_info("Destroying conn %p\n", isert_conn); + isert_put_conn(isert_conn); +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; pr_debug("isert_wait_conn: Starting \n"); - /* - * Decrement post_send_buf_count for special case when called - * from isert_do_control_comp() -> iscsit_logout_post_handler() - */ - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->logout_posted) - atomic_dec(&isert_conn->post_send_buf_count); - if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { - pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); - rdma_disconnect(isert_conn->conn_cm_id); - } + mutex_lock(&isert_conn->conn_mutex); /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. @@ -2689,13 +2846,13 @@ static void isert_wait_conn(struct iscsi_conn *conn) mutex_unlock(&isert_conn->conn_mutex); return; } - if (isert_conn->state == ISER_CONN_UP) - isert_conn->state = ISER_CONN_TERMINATING; + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); wait_for_completion(&isert_conn->conn_wait_comp_err); - wait_for_completion(&isert_conn->conn_wait); + INIT_WORK(&isert_conn->release_work, isert_release_work); + queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsi_conn *conn) @@ -2741,10 +2898,21 @@ static int __init isert_init(void) goto destroy_rx_wq; } + isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!isert_release_wq) { + pr_err("Unable to allocate isert_release_wq\n"); + ret = -ENOMEM; + goto destroy_comp_wq; + } + iscsit_register_transport(&iser_target_transport); - pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); + pr_info("iSER_TARGET[0] - Loaded iser_target_transport\n"); + return 0; +destroy_comp_wq: + destroy_workqueue(isert_comp_wq); destroy_rx_wq: destroy_workqueue(isert_rx_wq); return ret; @@ -2752,6 +2920,8 @@ destroy_rx_wq: static void __exit isert_exit(void) { + flush_scheduled_work(); + destroy_workqueue(isert_release_wq); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 52f4bf0..1ea5279 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -23,6 +23,7 @@ enum iser_ib_op_code { enum iser_conn_state { ISER_CONN_INIT, ISER_CONN_UP, + ISER_CONN_FULL_FEATURE, ISER_CONN_TERMINATING, ISER_CONN_DOWN, }; @@ -90,7 +91,6 @@ struct isert_device; struct isert_conn { enum iser_conn_state state; - bool logout_posted; int post_recv_buf_count; atomic_t post_send_buf_count; u32 responder_resources; @@ -100,6 +100,7 @@ struct isert_conn { char *login_req_buf; char *login_rsp_buf; u64 login_req_dma; + int login_req_len; u64 login_rsp_dma; unsigned int conn_rx_desc_head; struct iser_rx_desc *conn_rx_descs; @@ -107,13 +108,13 @@ struct isert_conn { struct iscsi_conn *conn; struct list_head conn_accept_node; struct completion conn_login_comp; + struct completion login_req_comp; struct iser_tx_desc conn_login_tx_desc; struct rdma_cm_id *conn_cm_id; struct ib_pd *conn_pd; struct ib_mr *conn_mr; struct ib_qp *conn_qp; struct isert_device *conn_device; - struct work_struct conn_logout_work; struct mutex conn_mutex; struct completion conn_wait; struct completion conn_wait_comp_err; @@ -122,6 +123,7 @@ struct isert_conn { int conn_frwr_pool_size; /* lock to protect frwr_pool */ spinlock_t conn_lock; + struct work_struct release_work; }; #define ISERT_MAX_CQ 64 @@ -154,7 +156,8 @@ struct isert_device { }; struct isert_np { - wait_queue_head_t np_accept_wq; + struct iscsi_np *np; + struct semaphore np_sem; struct rdma_cm_id *np_cm_id; struct mutex np_accept_mutex; struct list_head np_accept_list; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 17b58f4..15984e1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -93,6 +93,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -458,7 +459,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -1412,6 +1413,12 @@ err_unmap: err_iu: srp_put_tx_iu(target, iu, SRP_IU_CMD); + /* + * Avoid that the loops that iterate over the request ring can + * encounter a dangling SCSI command pointer. + */ + req->scmnd = NULL; + spin_lock_irqsave(&target->lock, flags); list_add(&req->list, &target->free_reqs); @@ -2596,9 +2603,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -2643,16 +2651,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (IS_ERR(srp_remove_wq)) { + ret = PTR_ERR(srp_remove_wq); + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -2660,13 +2674,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -2675,6 +2698,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 6c923c7..1ed08cc 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1078,6 +1078,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { + struct ib_device *dev = ch->sport->sdev->device; struct se_cmd *cmd; struct scatterlist *sg, *sg_orig; int sg_cnt; @@ -1124,7 +1125,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, db = ioctx->rbufs; tsize = cmd->data_length; - dma_len = sg_dma_len(&sg[0]); + dma_len = ib_sg_dma_len(dev, &sg[0]); riu = ioctx->rdma_ius; /* @@ -1155,7 +1156,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ++j; if (j < count) { sg = sg_next(sg); - dma_len = sg_dma_len(sg); + dma_len = ib_sg_dma_len( + dev, sg); } } } else { @@ -1192,8 +1194,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, tsize = cmd->data_length; riu = ioctx->rdma_ius; sg = sg_orig; - dma_len = sg_dma_len(&sg[0]); - dma_addr = sg_dma_address(&sg[0]); + dma_len = ib_sg_dma_len(dev, &sg[0]); + dma_addr = ib_sg_dma_address(dev, &sg[0]); /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ for (i = 0, j = 0; @@ -1216,8 +1218,10 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ++j; if (j < count) { sg = sg_next(sg); - dma_len = sg_dma_len(sg); - dma_addr = sg_dma_address(sg); + dma_len = ib_sg_dma_len( + dev, sg); + dma_addr = ib_sg_dma_address( + dev, sg); } } } else { @@ -2097,6 +2101,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) if (!qp_init) goto out; +retry: ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, ch->rq_size + srp_sq_size, 0); if (IS_ERR(ch->cq)) { @@ -2120,6 +2125,13 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) ch->qp = ib_create_qp(sdev->pd, qp_init); if (IS_ERR(ch->qp)) { ret = PTR_ERR(ch->qp); + if (ret == -ENOMEM) { + srp_sq_size /= 2; + if (srp_sq_size >= MIN_SRPT_SQ_SIZE) { + ib_destroy_cq(ch->cq); + goto retry; + } + } printk(KERN_ERR "failed to create_qp ret= %d\n", ret); goto err_destroy_cq; } diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index a06e125..694af49 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -757,20 +757,23 @@ static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p) */ static int evdev_handle_get_val(struct evdev_client *client, struct input_dev *dev, unsigned int type, - unsigned long *bits, unsigned int max, - unsigned int size, void __user *p, int compat) + unsigned long *bits, unsigned int maxbit, + unsigned int maxlen, void __user *p, + int compat) { int ret; unsigned long *mem; + size_t len; - mem = kmalloc(sizeof(unsigned long) * max, GFP_KERNEL); + len = BITS_TO_LONGS(maxbit) * sizeof(unsigned long); + mem = kmalloc(len, GFP_KERNEL); if (!mem) return -ENOMEM; spin_lock_irq(&dev->event_lock); spin_lock(&client->buffer_lock); - memcpy(mem, bits, sizeof(unsigned long) * max); + memcpy(mem, bits, len); spin_unlock(&dev->event_lock); @@ -778,7 +781,7 @@ static int evdev_handle_get_val(struct evdev_client *client, spin_unlock_irq(&client->buffer_lock); - ret = bits_to_user(mem, max, size, p, compat); + ret = bits_to_user(mem, maxbit, maxlen, p, compat); if (ret < 0) evdev_queue_syn_dropped(client); diff --git a/drivers/input/input.c b/drivers/input/input.c index 74f4798..fcf77af 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -257,9 +257,10 @@ static int input_handle_abs_event(struct input_dev *dev, } static int input_get_disposition(struct input_dev *dev, - unsigned int type, unsigned int code, int value) + unsigned int type, unsigned int code, int *pval) { int disposition = INPUT_IGNORE_EVENT; + int value = *pval; switch (type) { @@ -357,6 +358,7 @@ static int input_get_disposition(struct input_dev *dev, break; } + *pval = value; return disposition; } @@ -365,7 +367,7 @@ static void input_handle_event(struct input_dev *dev, { int disposition; - disposition = input_get_disposition(dev, type, code, value); + disposition = input_get_disposition(dev, type, code, &value); if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 75e3b10..90c7f97 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -125,6 +125,10 @@ static const struct xpad_device { { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, + { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, + { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, + { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, + { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, { 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 }, { 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX }, { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX }, @@ -137,10 +141,17 @@ static const struct xpad_device { { 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, + { 0x0738, 0x4718, "Mad Catz Street Fighter IV FightStick SE", 0, XTYPE_XBOX360 }, + { 0x0738, 0x4726, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 }, { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, + { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, + { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, + { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, + { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX }, { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX }, @@ -153,28 +164,51 @@ static const struct xpad_device { { 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, + { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, + { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, + { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX }, + { 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX }, + { 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, - { 0x1689, 0xfd00, "Razer Onza Tournament Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, - { 0x1689, 0xfd01, "Razer Onza Classic Edition", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 }, + { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, + { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, + { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, + { 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 }, + { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, + { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, + { 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 }, + { 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 }, + { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, + { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; @@ -258,6 +292,9 @@ static struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA Controllers */ + XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ + XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */ + XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */ { } }; @@ -1002,9 +1039,19 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id } ep_irq_in = &intf->cur_altsetting->endpoint[1].desc; - usb_fill_bulk_urb(xpad->bulk_out, udev, - usb_sndbulkpipe(udev, ep_irq_in->bEndpointAddress), - xpad->bdata, XPAD_PKT_LEN, xpad_bulk_out, xpad); + if (usb_endpoint_is_bulk_out(ep_irq_in)) { + usb_fill_bulk_urb(xpad->bulk_out, udev, + usb_sndbulkpipe(udev, + ep_irq_in->bEndpointAddress), + xpad->bdata, XPAD_PKT_LEN, + xpad_bulk_out, xpad); + } else { + usb_fill_int_urb(xpad->bulk_out, udev, + usb_sndintpipe(udev, + ep_irq_in->bEndpointAddress), + xpad->bdata, XPAD_PKT_LEN, + xpad_bulk_out, xpad, 0); + } /* * Submit the int URB immediately rather than waiting for open diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 2626773..6f5d795 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -243,6 +243,12 @@ static void (*atkbd_platform_fixup)(struct atkbd *, const void *data); static void *atkbd_platform_fixup_data; static unsigned int (*atkbd_platform_scancode_fixup)(struct atkbd *, unsigned int); +/* + * Certain keyboards to not like ATKBD_CMD_RESET_DIS and stop responding + * to many commands until full reset (ATKBD_CMD_RESET_BAT) is performed. + */ +static bool atkbd_skip_deactivate; + static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf, ssize_t (*handler)(struct atkbd *, char *)); static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count, @@ -768,7 +774,8 @@ static int atkbd_probe(struct atkbd *atkbd) * Make sure nothing is coming from the keyboard and disturbs our * internal state. */ - atkbd_deactivate(atkbd); + if (!atkbd_skip_deactivate) + atkbd_deactivate(atkbd); return 0; } @@ -1638,6 +1645,12 @@ static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id) return 1; } +static int __init atkbd_deactivate_fixup(const struct dmi_system_id *id) +{ + atkbd_skip_deactivate = true; + return 1; +} + static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { { .matches = { @@ -1775,6 +1788,12 @@ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { .callback = atkbd_setup_scancode_fixup, .driver_data = atkbd_oqo_01plus_scancode_fixup, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), + }, + .callback = atkbd_deactivate_fixup, + }, { } }; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 7c5d72a..642a42f 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -873,7 +873,13 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) { struct alps_data *priv = psmouse->private; - if ((psmouse->packet[0] & 0xc8) == 0x08) { /* PS/2 packet */ + /* + * Check if we are dealing with a bare PS/2 packet, presumably from + * a device connected to the external PS/2 port. Because bare PS/2 + * protocol does not have enough constant bits to self-synchronize + * properly we only do this if the device is fully synchronized. + */ + if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) { if (psmouse->pktcnt == 3) { alps_report_bare_ps2_packet(psmouse, psmouse->packet, true); @@ -903,6 +909,21 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) psmouse_dbg(psmouse, "refusing packet[%i] = %x\n", psmouse->pktcnt - 1, psmouse->packet[psmouse->pktcnt - 1]); + + if (priv->proto_version == ALPS_PROTO_V3 && + psmouse->pktcnt == psmouse->pktsize) { + /* + * Some Dell boxes, such as Latitude E6440 or E7440 + * with closed lid, quite often smash last byte of + * otherwise valid packet with 0xff. Given that the + * next packet is very likely to be valid let's + * report PSMOUSE_FULL_PACKET but not process data, + * rather than reporting PSMOUSE_BAD_DATA and + * filling the logs. + */ + return PSMOUSE_FULL_PACKET; + } + return PSMOUSE_BAD_DATA; } @@ -1816,6 +1837,9 @@ int alps_init(struct psmouse *psmouse) /* We are having trouble resyncing ALPS touchpads so disable it for now */ psmouse->resync_time = 0; + /* Allow 2 invalid packets without resetting device */ + psmouse->resetafter = psmouse->pktsize * 2; + return 0; init_fail: diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index ef1cf52..0b75b57 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -11,6 +11,7 @@ */ #include <linux/delay.h> +#include <linux/dmi.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/input.h> @@ -472,8 +473,15 @@ static void elantech_report_absolute_v3(struct psmouse *psmouse, input_report_key(dev, BTN_TOOL_FINGER, fingers == 1); input_report_key(dev, BTN_TOOL_DOUBLETAP, fingers == 2); input_report_key(dev, BTN_TOOL_TRIPLETAP, fingers == 3); - input_report_key(dev, BTN_LEFT, packet[0] & 0x01); - input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); + + /* For clickpads map both buttons to BTN_LEFT */ + if (etd->fw_version & 0x001000) { + input_report_key(dev, BTN_LEFT, packet[0] & 0x03); + } else { + input_report_key(dev, BTN_LEFT, packet[0] & 0x01); + input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); + } + input_report_abs(dev, ABS_PRESSURE, pres); input_report_abs(dev, ABS_TOOL_WIDTH, width); @@ -483,10 +491,17 @@ static void elantech_report_absolute_v3(struct psmouse *psmouse, static void elantech_input_sync_v4(struct psmouse *psmouse) { struct input_dev *dev = psmouse->dev; + struct elantech_data *etd = psmouse->private; unsigned char *packet = psmouse->packet; - input_report_key(dev, BTN_LEFT, packet[0] & 0x01); - input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); + /* For clickpads map both buttons to BTN_LEFT */ + if (etd->fw_version & 0x001000) { + input_report_key(dev, BTN_LEFT, packet[0] & 0x03); + } else { + input_report_key(dev, BTN_LEFT, packet[0] & 0x01); + input_report_key(dev, BTN_RIGHT, packet[0] & 0x02); + } + input_mt_report_pointer_emulation(dev, true); input_sync(dev); } @@ -831,7 +846,11 @@ static int elantech_set_absolute_mode(struct psmouse *psmouse) break; case 3: - etd->reg_10 = 0x0b; + if (etd->set_hw_resolution) + etd->reg_10 = 0x0b; + else + etd->reg_10 = 0x01; + if (elantech_write_reg(psmouse, 0x10, etd->reg_10)) rc = -1; @@ -1234,6 +1253,13 @@ static bool elantech_is_signature_valid(const unsigned char *param) if (param[1] == 0) return true; + /* + * Some models have a revision higher then 20. Meaning param[2] may + * be 10 or 20, skip the rates check for these. + */ + if (param[0] == 0x46 && (param[1] & 0xef) == 0x0f && param[2] < 40) + return true; + for (i = 0; i < ARRAY_SIZE(rates); i++) if (param[2] == rates[i]) return false; @@ -1331,6 +1357,23 @@ static int elantech_reconnect(struct psmouse *psmouse) } /* + * Some hw_version 3 models go into error state when we try to set + * bit 3 and/or bit 1 of r10. + */ +static const struct dmi_system_id no_hw_res_dmi_table[] = { +#if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + /* Gigabyte U2442 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "U2442"), + }, + }, +#endif + { } +}; + +/* * determine hardware version and set some properties according to it. */ static int elantech_set_properties(struct elantech_data *etd) @@ -1389,6 +1432,9 @@ static int elantech_set_properties(struct elantech_data *etd) */ etd->crc_enabled = ((etd->fw_version & 0x4000) == 0x4000); + /* Enable real hardware resolution on hw_version 3 ? */ + etd->set_hw_resolution = !dmi_check_system(no_hw_res_dmi_table); + return 0; } diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h index 036a04a..9e0e2a1 100644 --- a/drivers/input/mouse/elantech.h +++ b/drivers/input/mouse/elantech.h @@ -130,6 +130,7 @@ struct elantech_data { bool jumpy_cursor; bool reports_pressure; bool crc_enabled; + bool set_hw_resolution; unsigned char hw_version; unsigned int fw_version; unsigned int single_finger_reports; diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index d8d49d1..a3769cf 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -117,6 +117,87 @@ void synaptics_reset(struct psmouse *psmouse) } #ifdef CONFIG_MOUSE_PS2_SYNAPTICS +struct min_max_quirk { + const char * const *pnp_ids; + int x_min, x_max, y_min, y_max; +}; + +static const struct min_max_quirk min_max_pnpid_table[] = { + { + (const char * const []){"LEN0033", NULL}, + 1024, 5052, 2258, 4832 + }, + { + (const char * const []){"LEN0035", "LEN0042", NULL}, + 1232, 5710, 1156, 4696 + }, + { + (const char * const []){"LEN0034", "LEN0036", "LEN0039", + "LEN2002", "LEN2004", NULL}, + 1024, 5112, 2024, 4832 + }, + { + (const char * const []){"LEN2001", NULL}, + 1024, 5022, 2508, 4832 + }, + { + (const char * const []){"LEN2006", NULL}, + 1264, 5675, 1171, 4688 + }, + { } +}; + +/* This list has been kindly provided by Synaptics. */ +static const char * const topbuttonpad_pnp_ids[] = { + "LEN0017", + "LEN0018", + "LEN0019", + "LEN0023", + "LEN002A", + "LEN002B", + "LEN002C", + "LEN002D", + "LEN002E", + "LEN0033", /* Helix */ + "LEN0034", /* T431s, L440, L540, T540, W540, X1 Carbon 2nd */ + "LEN0035", /* X240 */ + "LEN0036", /* T440 */ + "LEN0037", + "LEN0038", + "LEN0039", /* T440s */ + "LEN0041", + "LEN0042", /* Yoga */ + "LEN0045", + "LEN0046", + "LEN0047", + "LEN0048", + "LEN0049", + "LEN2000", + "LEN2001", /* Edge E431 */ + "LEN2002", /* Edge E531 */ + "LEN2003", + "LEN2004", /* L440 */ + "LEN2005", + "LEN2006", + "LEN2007", + "LEN2008", + "LEN2009", + "LEN200A", + "LEN200B", + NULL +}; + +static bool matches_pnp_id(struct psmouse *psmouse, const char * const ids[]) +{ + int i; + + if (!strncmp(psmouse->ps2dev.serio->firmware_id, "PNP:", 4)) + for (i = 0; ids[i]; i++) + if (strstr(psmouse->ps2dev.serio->firmware_id, ids[i])) + return true; + + return false; +} /***************************************************************************** * Synaptics communications functions @@ -266,20 +347,11 @@ static int synaptics_identify(struct psmouse *psmouse) * Resolution is left zero if touchpad does not support the query */ -static const int *quirk_min_max; - static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; unsigned char resp[3]; - - if (quirk_min_max) { - priv->x_min = quirk_min_max[0]; - priv->x_max = quirk_min_max[1]; - priv->y_min = quirk_min_max[2]; - priv->y_max = quirk_min_max[3]; - return 0; - } + int i; if (SYN_ID_MAJOR(priv->identity) < 4) return 0; @@ -291,6 +363,16 @@ static int synaptics_resolution(struct psmouse *psmouse) } } + for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) { + if (matches_pnp_id(psmouse, min_max_pnpid_table[i].pnp_ids)) { + priv->x_min = min_max_pnpid_table[i].x_min; + priv->x_max = min_max_pnpid_table[i].x_max; + priv->y_min = min_max_pnpid_table[i].y_min; + priv->y_max = min_max_pnpid_table[i].y_max; + return 0; + } + } + if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 && SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) { if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) { @@ -549,10 +631,61 @@ static int synaptics_parse_hw_state(const unsigned char buf[], ((buf[0] & 0x04) >> 1) | ((buf[3] & 0x04) >> 2)); + if ((SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) || + SYN_CAP_IMAGE_SENSOR(priv->ext_cap_0c)) && + hw->w == 2) { + synaptics_parse_agm(buf, priv, hw); + return 1; + } + + hw->x = (((buf[3] & 0x10) << 8) | + ((buf[1] & 0x0f) << 8) | + buf[4]); + hw->y = (((buf[3] & 0x20) << 7) | + ((buf[1] & 0xf0) << 4) | + buf[5]); + hw->z = buf[2]; + hw->left = (buf[0] & 0x01) ? 1 : 0; hw->right = (buf[0] & 0x02) ? 1 : 0; - if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) { + if (SYN_CAP_FORCEPAD(priv->ext_cap_0c)) { + /* + * ForcePads, like Clickpads, use middle button + * bits to report primary button clicks. + * Unfortunately they report primary button not + * only when user presses on the pad above certain + * threshold, but also when there are more than one + * finger on the touchpad, which interferes with + * out multi-finger gestures. + */ + if (hw->z == 0) { + /* No contacts */ + priv->press = priv->report_press = false; + } else if (hw->w >= 4 && ((buf[0] ^ buf[3]) & 0x01)) { + /* + * Single-finger touch with pressure above + * the threshold. If pressure stays long + * enough, we'll start reporting primary + * button. We rely on the device continuing + * sending data even if finger does not + * move. + */ + if (!priv->press) { + priv->press_start = jiffies; + priv->press = true; + } else if (time_after(jiffies, + priv->press_start + + msecs_to_jiffies(50))) { + priv->report_press = true; + } + } else { + priv->press = false; + } + + hw->left = priv->report_press; + + } else if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) { /* * Clickpad's button is transmitted as middle button, * however, since it is primary button, we will report @@ -571,21 +704,6 @@ static int synaptics_parse_hw_state(const unsigned char buf[], hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0; } - if ((SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) || - SYN_CAP_IMAGE_SENSOR(priv->ext_cap_0c)) && - hw->w == 2) { - synaptics_parse_agm(buf, priv, hw); - return 1; - } - - hw->x = (((buf[3] & 0x10) << 8) | - ((buf[1] & 0x0f) << 8) | - buf[4]); - hw->y = (((buf[3] & 0x20) << 7) | - ((buf[1] & 0xf0) << 4) | - buf[5]); - hw->z = buf[2]; - if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) && ((buf[0] ^ buf[3]) & 0x02)) { switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) { @@ -1255,8 +1373,10 @@ static void set_abs_position_params(struct input_dev *dev, input_abs_set_res(dev, y_code, priv->y_res); } -static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) +static void set_input_params(struct psmouse *psmouse, + struct synaptics_data *priv) { + struct input_dev *dev = psmouse->dev; int i; /* Things that apply to both modes */ @@ -1325,6 +1445,8 @@ static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) { __set_bit(INPUT_PROP_BUTTONPAD, dev->propbit); + if (matches_pnp_id(psmouse, topbuttonpad_pnp_ids)) + __set_bit(INPUT_PROP_TOPBUTTONPAD, dev->propbit); /* Clickpads report only left button */ __clear_bit(BTN_RIGHT, dev->keybit); __clear_bit(BTN_MIDDLE, dev->keybit); @@ -1496,54 +1618,10 @@ static const struct dmi_system_id olpc_dmi_table[] __initconst = { { } }; -static const struct dmi_system_id min_max_dmi_table[] __initconst = { -#if defined(CONFIG_DMI) - { - /* Lenovo ThinkPad Helix */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix"), - }, - .driver_data = (int []){1024, 5052, 2258, 4832}, - }, - { - /* Lenovo ThinkPad X240 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X240"), - }, - .driver_data = (int []){1232, 5710, 1156, 4696}, - }, - { - /* Lenovo ThinkPad T440s */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T440"), - }, - .driver_data = (int []){1024, 5112, 2024, 4832}, - }, - { - /* Lenovo ThinkPad T540p */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T540"), - }, - .driver_data = (int []){1024, 5056, 2058, 4832}, - }, -#endif - { } -}; - void __init synaptics_module_init(void) { - const struct dmi_system_id *min_max_dmi; - impaired_toshiba_kbc = dmi_check_system(toshiba_dmi_table); broken_olpc_ec = dmi_check_system(olpc_dmi_table); - - min_max_dmi = dmi_first_match(min_max_dmi_table); - if (min_max_dmi) - quirk_min_max = min_max_dmi->driver_data; } static int __synaptics_init(struct psmouse *psmouse, bool absolute_mode) @@ -1593,7 +1671,7 @@ static int __synaptics_init(struct psmouse *psmouse, bool absolute_mode) priv->capabilities, priv->ext_cap, priv->ext_cap_0c, priv->board_id, priv->firmware_id); - set_input_params(psmouse->dev, priv); + set_input_params(psmouse, priv); /* * Encode touchpad model so that it can be used to set diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h index e594af0..fb2e076 100644 --- a/drivers/input/mouse/synaptics.h +++ b/drivers/input/mouse/synaptics.h @@ -78,6 +78,11 @@ * 2 0x08 image sensor image sensor tracks 5 fingers, but only * reports 2. * 2 0x20 report min query 0x0f gives min coord reported + * 2 0x80 forcepad forcepad is a variant of clickpad that + * does not have physical buttons but rather + * uses pressure above certain threshold to + * report primary clicks. Forcepads also have + * clickpad bit set. */ #define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */ #define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */ @@ -86,6 +91,7 @@ #define SYN_CAP_ADV_GESTURE(ex0c) ((ex0c) & 0x080000) #define SYN_CAP_REDUCED_FILTERING(ex0c) ((ex0c) & 0x000400) #define SYN_CAP_IMAGE_SENSOR(ex0c) ((ex0c) & 0x000800) +#define SYN_CAP_FORCEPAD(ex0c) ((ex0c) & 0x008000) /* synaptics modes query bits */ #define SYN_MODE_ABSOLUTE(m) ((m) & (1 << 7)) @@ -177,6 +183,11 @@ struct synaptics_data { */ struct synaptics_hw_state agm; bool agm_pending; /* new AGM packet received */ + + /* ForcePad handling */ + unsigned long press_start; + bool press; + bool report_press; }; void synaptics_module_init(void); diff --git a/drivers/input/serio/altera_ps2.c b/drivers/input/serio/altera_ps2.c index 4777a73..b6d370b 100644 --- a/drivers/input/serio/altera_ps2.c +++ b/drivers/input/serio/altera_ps2.c @@ -75,7 +75,7 @@ static void altera_ps2_close(struct serio *io) { struct ps2if *ps2if = io->port_data; - writel(0, ps2if->base); /* disable rx irq */ + writel(0, ps2if->base + 4); /* disable rx irq */ } /* diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 0ec9abb..c1d156a 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -101,6 +101,12 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { }, { .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X750LN"), + }, + }, + { + .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), DMI_MATCH(DMI_PRODUCT_NAME , "ProLiant"), DMI_MATCH(DMI_PRODUCT_VERSION, "8500"), @@ -402,6 +408,20 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { }, }, { + /* Acer Aspire 5710 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710"), + }, + }, + { + /* Acer Aspire 7738 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7738"), + }, + }, + { /* Gericom Bellagio */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Gericom"), @@ -458,6 +478,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"), }, }, + { + /* Avatar AVIU-145A6 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel"), + DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"), + }, + }, { } }; @@ -601,6 +628,30 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dv4 Notebook PC"), }, }, + { + /* Fujitsu A544 laptop */ + /* https://bugzilla.redhat.com/show_bug.cgi?id=1111138 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"), + }, + }, + { + /* Fujitsu AH544 laptop */ + /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), + }, + }, + { + /* Fujitsu U574 laptop */ + /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK U574"), + }, + }, { } }; @@ -684,6 +735,35 @@ static const struct dmi_system_id __initconst i8042_dmi_dritek_table[] = { { } }; +/* + * Some laptops need keyboard reset before probing for the trackpad to get + * it detected, initialised & finally work. + */ +static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { + { + /* Gigabyte P35 v2 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P35V2"), + }, + }, + { + /* Aorus branded Gigabyte X3 Plus - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "X3"), + }, + }, + { + /* Gigabyte P34 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P34"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -702,6 +782,17 @@ static int i8042_pnp_aux_irq; static char i8042_pnp_kbd_name[32]; static char i8042_pnp_aux_name[32]; +static void i8042_pnp_id_to_string(struct pnp_id *id, char *dst, int dst_size) +{ + strlcpy(dst, "PNP:", dst_size); + + while (id) { + strlcat(dst, " ", dst_size); + strlcat(dst, id->id, dst_size); + id = id->next; + } +} + static int i8042_pnp_kbd_probe(struct pnp_dev *dev, const struct pnp_device_id *did) { if (pnp_port_valid(dev, 0) && pnp_port_len(dev, 0) == 1) @@ -718,6 +809,8 @@ static int i8042_pnp_kbd_probe(struct pnp_dev *dev, const struct pnp_device_id * strlcat(i8042_pnp_kbd_name, ":", sizeof(i8042_pnp_kbd_name)); strlcat(i8042_pnp_kbd_name, pnp_dev_name(dev), sizeof(i8042_pnp_kbd_name)); } + i8042_pnp_id_to_string(dev->id, i8042_kbd_firmware_id, + sizeof(i8042_kbd_firmware_id)); /* Keyboard ports are always supposed to be wakeup-enabled */ device_set_wakeup_enable(&dev->dev, true); @@ -742,6 +835,8 @@ static int i8042_pnp_aux_probe(struct pnp_dev *dev, const struct pnp_device_id * strlcat(i8042_pnp_aux_name, ":", sizeof(i8042_pnp_aux_name)); strlcat(i8042_pnp_aux_name, pnp_dev_name(dev), sizeof(i8042_pnp_aux_name)); } + i8042_pnp_id_to_string(dev->id, i8042_aux_firmware_id, + sizeof(i8042_aux_firmware_id)); i8042_pnp_aux_devices++; return 0; @@ -964,6 +1059,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_dritek_table)) i8042_dritek = true; + if (dmi_check_system(i8042_dmi_kbdreset_table)) + i8042_kbdreset = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 52c9ebf..e38024c 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -67,6 +67,10 @@ static bool i8042_notimeout; module_param_named(notimeout, i8042_notimeout, bool, 0); MODULE_PARM_DESC(notimeout, "Ignore timeouts signalled by i8042"); +static bool i8042_kbdreset; +module_param_named(kbdreset, i8042_kbdreset, bool, 0); +MODULE_PARM_DESC(kbdreset, "Reset device connected to KBD port"); + #ifdef CONFIG_X86 static bool i8042_dritek; module_param_named(dritek, i8042_dritek, bool, 0); @@ -87,6 +91,8 @@ MODULE_PARM_DESC(debug, "Turn i8042 debugging mode on and off"); #endif static bool i8042_bypass_aux_irq_test; +static char i8042_kbd_firmware_id[128]; +static char i8042_aux_firmware_id[128]; #include "i8042.h" @@ -788,6 +794,16 @@ static int __init i8042_check_aux(void) return -1; /* + * Reset keyboard (needed on some laptops to successfully detect + * touchpad, e.g., some Gigabyte laptop models with Elantech + * touchpads). + */ + if (i8042_kbdreset) { + pr_warn("Attempting to reset device connected to KBD port\n"); + i8042_kbd_write(NULL, (unsigned char) 0xff); + } + +/* * Test AUX IRQ delivery to make sure BIOS did not grab the IRQ and * used it for a PCI card or somethig else. */ @@ -1218,6 +1234,8 @@ static int __init i8042_create_kbd_port(void) serio->dev.parent = &i8042_platform_device->dev; strlcpy(serio->name, "i8042 KBD port", sizeof(serio->name)); strlcpy(serio->phys, I8042_KBD_PHYS_DESC, sizeof(serio->phys)); + strlcpy(serio->firmware_id, i8042_kbd_firmware_id, + sizeof(serio->firmware_id)); port->serio = serio; port->irq = I8042_KBD_IRQ; @@ -1244,6 +1262,8 @@ static int __init i8042_create_aux_port(int idx) if (idx < 0) { strlcpy(serio->name, "i8042 AUX port", sizeof(serio->name)); strlcpy(serio->phys, I8042_AUX_PHYS_DESC, sizeof(serio->phys)); + strlcpy(serio->firmware_id, i8042_aux_firmware_id, + sizeof(serio->firmware_id)); serio->close = i8042_port_close; } else { snprintf(serio->name, sizeof(serio->name), "i8042 AUX%d port", idx); diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 2b56855..fffc0a6 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -474,11 +474,19 @@ static ssize_t serio_set_bind_mode(struct device *dev, struct device_attribute * return retval; } +static ssize_t firmware_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct serio *serio = to_serio_port(dev); + + return sprintf(buf, "%s\n", serio->firmware_id); +} + static struct device_attribute serio_device_attrs[] = { __ATTR(description, S_IRUGO, serio_show_description, NULL), __ATTR(modalias, S_IRUGO, serio_show_modalias, NULL), __ATTR(drvctl, S_IWUSR, NULL, serio_rebind_driver), __ATTR(bind_mode, S_IWUSR | S_IRUGO, serio_show_bind_mode, serio_set_bind_mode), + __ATTR(firmware_id, S_IRUGO, firmware_id_show, NULL), __ATTR_NULL }; @@ -506,8 +514,8 @@ static void serio_init_port(struct serio *serio) spin_lock_init(&serio->lock); mutex_init(&serio->drv_mutex); device_initialize(&serio->dev); - dev_set_name(&serio->dev, "serio%ld", - (long)atomic_inc_return(&serio_no) - 1); + dev_set_name(&serio->dev, "serio%lu", + (unsigned long)atomic_inc_return(&serio_no) - 1); serio->dev.bus = &serio_bus; serio->dev.release = serio_release_port; serio->dev.groups = serio_device_attr_groups; @@ -912,9 +920,14 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env) SERIO_ADD_UEVENT_VAR("SERIO_PROTO=%02x", serio->id.proto); SERIO_ADD_UEVENT_VAR("SERIO_ID=%02x", serio->id.id); SERIO_ADD_UEVENT_VAR("SERIO_EXTRA=%02x", serio->id.extra); + SERIO_ADD_UEVENT_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X", serio->id.type, serio->id.proto, serio->id.id, serio->id.extra); + if (serio->firmware_id[0]) + SERIO_ADD_UEVENT_VAR("SERIO_FIRMWARE_ID=%s", + serio->firmware_id); + return 0; } #undef SERIO_ADD_UEVENT_VAR diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index 8755f5f..e4ecf3b 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -21,6 +21,7 @@ #include <linux/init.h> #include <linux/serio.h> #include <linux/tty.h> +#include <linux/compat.h> MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>"); MODULE_DESCRIPTION("Input device TTY line discipline"); @@ -196,28 +197,55 @@ static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, u return 0; } +static void serport_set_type(struct tty_struct *tty, unsigned long type) +{ + struct serport *serport = tty->disc_data; + + serport->id.proto = type & 0x000000ff; + serport->id.id = (type & 0x0000ff00) >> 8; + serport->id.extra = (type & 0x00ff0000) >> 16; +} + /* * serport_ldisc_ioctl() allows to set the port protocol, and device ID */ -static int serport_ldisc_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg) +static int serport_ldisc_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) { - struct serport *serport = (struct serport*) tty->disc_data; - unsigned long type; - if (cmd == SPIOCSTYPE) { + unsigned long type; + if (get_user(type, (unsigned long __user *) arg)) return -EFAULT; - serport->id.proto = type & 0x000000ff; - serport->id.id = (type & 0x0000ff00) >> 8; - serport->id.extra = (type & 0x00ff0000) >> 16; + serport_set_type(tty, type); + return 0; + } + + return -EINVAL; +} + +#ifdef CONFIG_COMPAT +#define COMPAT_SPIOCSTYPE _IOW('q', 0x01, compat_ulong_t) +static long serport_ldisc_compat_ioctl(struct tty_struct *tty, + struct file *file, + unsigned int cmd, unsigned long arg) +{ + if (cmd == COMPAT_SPIOCSTYPE) { + void __user *uarg = compat_ptr(arg); + compat_ulong_t compat_type; + + if (get_user(compat_type, (compat_ulong_t __user *)uarg)) + return -EFAULT; + serport_set_type(tty, compat_type); return 0; } return -EINVAL; } +#endif static void serport_ldisc_write_wakeup(struct tty_struct * tty) { @@ -241,6 +269,9 @@ static struct tty_ldisc_ops serport_ldisc = { .close = serport_ldisc_close, .read = serport_ldisc_read, .ioctl = serport_ldisc_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = serport_ldisc_compat_ioctl, +#endif .receive_buf = serport_ldisc_receive, .write_wakeup = serport_ldisc_write_wakeup }; diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index 7e45c9f..b08c16b 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -70,11 +70,11 @@ * Documentation/input/input-programming.txt for more details. */ -static int abs_x[3] = {350, 3900, 5}; +static int abs_x[3] = {150, 4000, 5}; module_param_array(abs_x, int, NULL, 0); MODULE_PARM_DESC(abs_x, "Touchscreen absolute X min, max, fuzz"); -static int abs_y[3] = {320, 3750, 40}; +static int abs_y[3] = {200, 4000, 40}; module_param_array(abs_y, int, NULL, 0); MODULE_PARM_DESC(abs_y, "Touchscreen absolute Y min, max, fuzz"); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5d2edb4..67644e9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3227,14 +3227,16 @@ free_domains: static void cleanup_domain(struct protection_domain *domain) { - struct iommu_dev_data *dev_data, *next; + struct iommu_dev_data *entry; unsigned long flags; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { - __detach_device(dev_data); - atomic_set(&dev_data->bind, 0); + while (!list_empty(&domain->dev_list)) { + entry = list_first_entry(&domain->dev_list, + struct iommu_dev_data, list); + __detach_device(entry); + atomic_set(&entry->bind, 0); } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); @@ -3999,7 +4001,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) iommu_flush_dte(iommu, devid); if (devid != alias) { irq_lookup_table[alias] = table; - set_dte_irq_entry(devid, table); + set_dte_irq_entry(alias, table); iommu_flush_dte(iommu, alias); } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 24a60b99..e26905c 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -767,8 +767,11 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) reg |= TTBCR_EAE | (TTBCR_SH_IS << TTBCR_SH0_SHIFT) | (TTBCR_RGN_WBWA << TTBCR_ORGN0_SHIFT) | - (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT) | - (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT); + (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT); + + if (!stage1) + reg |= (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT); + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); /* MAIR0 (stage-1 only) */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cae5a08..fd0516c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1796,7 +1796,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct dma_pte *first_pte = NULL, *pte = NULL; phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long sg_res; + unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; @@ -1807,10 +1807,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; - if (sg) - sg_res = 0; - else { - sg_res = nr_pages + 1; + if (!sg) { + sg_res = nr_pages; pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } @@ -4117,7 +4115,7 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - int order; + int order, iommu_id; order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, (iova + size - 1) >> VTD_PAGE_SHIFT); @@ -4125,6 +4123,22 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; + for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { + struct intel_iommu *iommu = g_iommus[iommu_id]; + int num, ndomains; + + /* + * find bit position of dmar_domain + */ + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == dmar_domain) + iommu_flush_iotlb_psi(iommu, num, + iova >> VTD_PAGE_SHIFT, + 1 << order, 0); + } + } + return PAGE_SIZE << order; } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d0e9480..0fcbf92 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -42,6 +42,7 @@ #include <linux/irqchip/chained_irq.h> #include <linux/irqchip/arm-gic.h> +#include <asm/cputype.h> #include <asm/irq.h> #include <asm/exception.h> #include <asm/smp_plat.h> @@ -246,10 +247,14 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3); - unsigned int shift = (gic_irq(d) % 4) * 8; - unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); + unsigned int cpu, shift = (gic_irq(d) % 4) * 8; u32 val, mask, bit; + if (!force) + cpu = cpumask_any_and(mask_val, cpu_online_mask); + else + cpu = cpumask_first(mask_val); + if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; @@ -756,7 +761,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, } for_each_possible_cpu(cpu) { - unsigned long offset = percpu_offset * cpu_logical_map(cpu); + u32 mpidr = cpu_logical_map(cpu); + u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + unsigned long offset = percpu_offset * core_id; *per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset; *per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset; } @@ -860,6 +867,7 @@ int __init gic_of_init(struct device_node *node, struct device_node *parent) } IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init); IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init); +IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init); IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init); IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init); diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index 8527743..391b9ce 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -125,7 +125,7 @@ static struct spear_shirq spear320_shirq_ras2 = { }; static struct spear_shirq spear320_shirq_ras3 = { - .irq_nr = 3, + .irq_nr = 7, .irq_bit_off = 0, .invalid_irq = 1, .regs = { diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c index bb6f948..305e88a 100644 --- a/drivers/leds/leds-pwm.c +++ b/drivers/leds/leds-pwm.c @@ -82,6 +82,15 @@ static inline size_t sizeof_pwm_leds_priv(int num_leds) (sizeof(struct led_pwm_data) * num_leds); } +static void led_pwm_cleanup(struct led_pwm_priv *priv) +{ + while (priv->num_leds--) { + led_classdev_unregister(&priv->leds[priv->num_leds].cdev); + if (priv->leds[priv->num_leds].can_sleep) + cancel_work_sync(&priv->leds[priv->num_leds].work); + } +} + static struct led_pwm_priv *led_pwm_create_of(struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; @@ -139,8 +148,7 @@ static struct led_pwm_priv *led_pwm_create_of(struct platform_device *pdev) return priv; err: - while (priv->num_leds--) - led_classdev_unregister(&priv->leds[priv->num_leds].cdev); + led_pwm_cleanup(priv); return NULL; } @@ -200,8 +208,8 @@ static int led_pwm_probe(struct platform_device *pdev) return 0; err: - while (i--) - led_classdev_unregister(&priv->leds[i].cdev); + priv->num_leds = i; + led_pwm_cleanup(priv); return ret; } @@ -209,13 +217,8 @@ err: static int led_pwm_remove(struct platform_device *pdev) { struct led_pwm_priv *priv = platform_get_drvdata(pdev); - int i; - for (i = 0; i < priv->num_leds; i++) { - led_classdev_unregister(&priv->leds[i].cdev); - if (priv->leds[i].can_sleep) - cancel_work_sync(&priv->leds[i].work); - } + led_pwm_cleanup(priv); return 0; } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 0f12382..7552207 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -663,9 +663,13 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). + * + * CACHE_SET_RUNNING means all cache devices have been registered and journal + * replay is complete. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 +#define CACHE_SET_RUNNING 2 struct cache_set { struct closure cl; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index d1734d9..26ca4db 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -141,7 +141,7 @@ static void bch_btree_node_read_done(struct btree *b) struct bset *i = b->sets[0].data; struct btree_iter *iter; - iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter = mempool_alloc(b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8435f81..c494379 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -42,11 +42,11 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, int ret = 0; sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); - pr_debug("reading %llu", (uint64_t) bucket); + pr_debug("reading %u", bucket_index); while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; - len = min_t(unsigned, left, PAGE_SECTORS * 8); + len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS); bio_reset(bio); bio->bi_sector = bucket + offset; @@ -72,17 +72,26 @@ reread: left = ca->sb.bucket_size - offset; struct list_head *where; size_t blocks, bytes = set_bytes(j); - if (j->magic != jset_magic(ca->set)) + if (j->magic != jset_magic(ca->set)) { + pr_debug("%u: bad magic", bucket_index); return ret; + } - if (bytes > left << 9) + if (bytes > left << 9 || + bytes > PAGE_SIZE << JSET_BITS) { + pr_info("%u: too big, %zu bytes, offset %u", + bucket_index, bytes, offset); return ret; + } if (bytes > len << 9) goto reread; - if (j->csum != csum_set(j)) + if (j->csum != csum_set(j)) { + pr_info("%u: bad csum, %zu bytes, offset %u", + bucket_index, bytes, offset); return ret; + } blocks = set_blocks(j, ca->set); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 547c4c5..f5004c5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1235,6 +1235,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) if (test_bit(CACHE_SET_STOPPING, &c->flags)) return -EINTR; + if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + return -EPERM; + u = uuid_find_empty(c); if (!u) { pr_err("Can't create volume, no room for UUID"); @@ -1300,8 +1303,11 @@ static void cache_set_free(struct closure *cl) bch_journal_free(c); for_each_cache(ca, c, i) - if (ca) + if (ca) { + ca->set = NULL; + c->cache[ca->sb.nr_this_dev] = NULL; kobject_put(&ca->kobj); + } free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); free_pages((unsigned long) c->sort, ilog2(bucket_pages(c))); @@ -1637,6 +1643,7 @@ static void run_cache_set(struct cache_set *c) flash_devs_run(c); + set_bit(CACHE_SET_RUNNING, &c->flags); return; err_unlock_gc: closure_set_stopped(&c->gc.cl); @@ -1722,8 +1729,10 @@ void bch_cache_release(struct kobject *kobj) { struct cache *ca = container_of(kobj, struct cache, kobj); - if (ca->set) + if (ca->set) { + BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); ca->set->cache[ca->sb.nr_this_dev] = NULL; + } bch_cache_allocator_exit(ca); @@ -1794,7 +1803,7 @@ err: } static void register_cache(struct cache_sb *sb, struct page *sb_page, - struct block_device *bdev, struct cache *ca) + struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ea345c6..4dd1e2c 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -417,8 +417,8 @@ do { \ average_frequency, frequency_units); \ __print_time_stat(stats, name, \ average_duration, duration_units); \ - __print_time_stat(stats, name, \ - max_duration, duration_units); \ + sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \ + div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\ \ sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ ? div_s64(local_clock() - (stats)->last, \ diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 54bdd923..93edd89 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -463,6 +463,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty) c->n_buffers[dirty]++; b->list_mode = dirty; list_move(&b->lru_list, &c->lru[dirty]); + b->last_accessed = jiffies; } /*---------------------------------------------------------------- @@ -529,6 +530,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, end_io(&b->bio, r); } +static void inline_endio(struct bio *bio, int error) +{ + bio_end_io_t *end_fn = bio->bi_private; + + /* + * Reset the bio to free any attached resources + * (e.g. bio integrity profiles). + */ + bio_reset(bio); + + end_fn(bio, error); +} + static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) { @@ -540,7 +554,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; b->bio.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; - b->bio.bi_end_io = end_io; + b->bio.bi_end_io = inline_endio; + /* + * Use of .bi_private isn't a problem here because + * the dm_buffer's inline bio is local to bufio. + */ + b->bio.bi_private = end_io; /* * We assume that if len >= PAGE_SIZE ptr is page-aligned. @@ -1417,9 +1436,9 @@ static void drop_buffers(struct dm_bufio_client *c) /* * Test if the buffer is unused and too old, and commit it. - * At if noio is set, we must not do any I/O because we hold - * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to - * different bufio client. + * And if GFP_NOFS is used, we must not do any I/O because we hold + * dm_bufio_clients_lock and we would risk deadlock if the I/O gets + * rerouted to different bufio client. */ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, unsigned long max_jiffies) @@ -1427,7 +1446,7 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp, if (jiffies - b->last_accessed < max_jiffies) return 0; - if (!(gfp & __GFP_IO)) { + if (!(gfp & __GFP_FS)) { if (test_bit(B_READING, &b->state) || test_bit(B_WRITING, &b->state) || test_bit(B_DIRTY, &b->state)) @@ -1455,9 +1474,9 @@ static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { freed += __cleanup_old_buffer(b, gfp_mask, 0); if (!--nr_to_scan) - break; + return freed; + dm_bufio_cond_resched(); } - dm_bufio_cond_resched(); } return freed; } @@ -1469,7 +1488,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_IO) + if (sc->gfp_mask & __GFP_FS) dm_bufio_lock(c); else if (!dm_bufio_trylock(c)) return SHRINK_STOP; @@ -1486,7 +1505,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc) unsigned long count; c = container_of(shrink, struct dm_bufio_client, shrinker); - if (sc->gfp_mask & __GFP_IO) + if (sc->gfp_mask & __GFP_FS) dm_bufio_lock(c); else if (!dm_bufio_trylock(c)) return 0; @@ -1511,7 +1530,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign BUG_ON(block_size < 1 << SECTOR_SHIFT || (block_size & (block_size - 1))); - c = kmalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { r = -ENOMEM; goto bad_client; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 1af7255..0bfd9c0 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -88,6 +88,9 @@ struct cache_disk_superblock { } __packed; struct dm_cache_metadata { + atomic_t ref_count; + struct list_head list; + struct block_device *bdev; struct dm_block_manager *bm; struct dm_space_map *metadata_sm; @@ -114,6 +117,12 @@ struct dm_cache_metadata { unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; size_t policy_hint_size; struct dm_cache_statistics stats; + + /* + * Reading the space map root can fail, so we read it into this + * buffer before the superblock is locked and updated. + */ + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; }; /*------------------------------------------------------------------- @@ -242,11 +251,31 @@ static void __setup_mapping_info(struct dm_cache_metadata *cmd) } } +static int __save_sm_root(struct dm_cache_metadata *cmd) +{ + int r; + size_t metadata_len; + + r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root, + metadata_len); +} + +static void __copy_sm_root(struct dm_cache_metadata *cmd, + struct cache_disk_superblock *disk_super) +{ + memcpy(&disk_super->metadata_space_map_root, + &cmd->metadata_space_map_root, + sizeof(cmd->metadata_space_map_root)); +} + static int __write_initial_superblock(struct dm_cache_metadata *cmd) { int r; struct dm_block *sblock; - size_t metadata_len; struct cache_disk_superblock *disk_super; sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; @@ -254,12 +283,16 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) bdev_size = DM_CACHE_METADATA_MAX_SECTORS; - r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + r = dm_tm_pre_commit(cmd->tm); if (r < 0) return r; - r = dm_tm_pre_commit(cmd->tm); - if (r < 0) + /* + * dm_sm_copy_root() can fail. So we need to do it before we start + * updating the superblock. + */ + r = __save_sm_root(cmd); + if (r) return r; r = superblock_lock_zero(cmd, &sblock); @@ -275,10 +308,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version)); disk_super->policy_hint_size = 0; - r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto bad_locked; + __copy_sm_root(cmd, disk_super); disk_super->mapping_root = cpu_to_le64(cmd->root); disk_super->hint_root = cpu_to_le64(cmd->hint_root); @@ -295,10 +325,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->write_misses = cpu_to_le32(0); return dm_tm_commit(cmd->tm, sblock); - -bad_locked: - dm_bm_unlock(sblock); - return r; } static int __format_metadata(struct dm_cache_metadata *cmd) @@ -384,6 +410,15 @@ static int __open_metadata(struct dm_cache_metadata *cmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)cmd->data_block_size); + r = -EINVAL; + goto bad; + } + r = __check_incompat_features(disk_super, cmd); if (r < 0) goto bad; @@ -511,8 +546,9 @@ static int __begin_transaction_flags(struct dm_cache_metadata *cmd, disk_super = dm_block_data(sblock); update_flags(disk_super, mutator); read_superblock_fields(cmd, disk_super); + dm_bm_unlock(sblock); - return dm_bm_flush_and_unlock(cmd->bm, sblock); + return dm_bm_flush(cmd->bm); } static int __begin_transaction(struct dm_cache_metadata *cmd) @@ -540,7 +576,6 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, flags_mutator mutator) { int r; - size_t metadata_len; struct cache_disk_superblock *disk_super; struct dm_block *sblock; @@ -558,8 +593,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, if (r < 0) return r; - r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); - if (r < 0) + r = __save_sm_root(cmd); + if (r) return r; r = superblock_lock(cmd, &sblock); @@ -586,13 +621,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); - - r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) { - dm_bm_unlock(sblock); - return r; - } + __copy_sm_root(cmd, disk_super); return dm_tm_commit(cmd->tm, sblock); } @@ -624,10 +653,10 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) /*----------------------------------------------------------------*/ -struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, - size_t policy_hint_size) +static struct dm_cache_metadata *metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) { int r; struct dm_cache_metadata *cmd; @@ -638,6 +667,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return NULL; } + atomic_set(&cmd->ref_count, 1); init_rwsem(&cmd->root_lock); cmd->bdev = bdev; cmd->data_block_size = data_block_size; @@ -660,10 +690,95 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return cmd; } +/* + * We keep a little list of ref counted metadata objects to prevent two + * different target instances creating separate bufio instances. This is + * an issue if a table is reloaded before the suspend. + */ +static DEFINE_MUTEX(table_lock); +static LIST_HEAD(table); + +static struct dm_cache_metadata *lookup(struct block_device *bdev) +{ + struct dm_cache_metadata *cmd; + + list_for_each_entry(cmd, &table, list) + if (cmd->bdev == bdev) { + atomic_inc(&cmd->ref_count); + return cmd; + } + + return NULL; +} + +static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd, *cmd2; + + mutex_lock(&table_lock); + cmd = lookup(bdev); + mutex_unlock(&table_lock); + + if (cmd) + return cmd; + + cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); + if (cmd) { + mutex_lock(&table_lock); + cmd2 = lookup(bdev); + if (cmd2) { + mutex_unlock(&table_lock); + __destroy_persistent_data_objects(cmd); + kfree(cmd); + return cmd2; + } + list_add(&cmd->list, &table); + mutex_unlock(&table_lock); + } + + return cmd; +} + +static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) +{ + if (cmd->data_block_size != data_block_size) { + DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", + (unsigned long long) data_block_size, + (unsigned long long) cmd->data_block_size); + return false; + } + + return true; +} + +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, + may_format_device, policy_hint_size); + if (cmd && !same_params(cmd, data_block_size)) { + dm_cache_metadata_close(cmd); + return NULL; + } + + return cmd; +} + void dm_cache_metadata_close(struct dm_cache_metadata *cmd) { - __destroy_persistent_data_objects(cmd); - kfree(cmd); + if (atomic_dec_and_test(&cmd->ref_count)) { + mutex_lock(&table_lock); + list_del(&cmd->list); + mutex_unlock(&table_lock); + + __destroy_persistent_data_objects(cmd); + kfree(cmd); + } } int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 6ab68e0..1771845 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -146,7 +146,13 @@ struct cache { struct list_head need_commit_migrations; sector_t migration_threshold; wait_queue_head_t migration_wait; - atomic_t nr_migrations; + atomic_t nr_allocated_migrations; + + /* + * The number of in flight migrations that are performing + * background io. eg, promotion, writeback. + */ + atomic_t nr_io_migrations; wait_queue_head_t quiescing_wait; atomic_t quiescing_ack; @@ -154,7 +160,7 @@ struct cache { /* * cache_size entries, dirty if set */ - dm_cblock_t nr_dirty; + atomic_t nr_dirty; unsigned long *dirty_bitset; /* @@ -162,7 +168,7 @@ struct cache { */ dm_dblock_t discard_nr_blocks; unsigned long *discard_bitset; - uint32_t discard_block_size; /* a power of 2 times sectors per block */ + uint32_t discard_block_size; /* * Rather than reconstructing the table line for the status we just @@ -182,7 +188,6 @@ struct cache { struct dm_deferred_set *all_io_ds; mempool_t *migration_pool; - struct dm_cache_migration *next_migration; struct dm_cache_policy *policy; unsigned policy_nr_args; @@ -265,10 +270,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel dm_bio_prison_free_cell(cache->prison, cell); } +static struct dm_cache_migration *alloc_migration(struct cache *cache) +{ + struct dm_cache_migration *mg; + + mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + if (mg) { + mg->cache = cache; + atomic_inc(&mg->cache->nr_allocated_migrations); + } + + return mg; +} + +static void free_migration(struct dm_cache_migration *mg) +{ + if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations)) + wake_up(&mg->cache->migration_wait); + + mempool_free(mg, mg->cache->migration_pool); +} + static int prealloc_data_structs(struct cache *cache, struct prealloc *p) { if (!p->mg) { - p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + p->mg = alloc_migration(cache); if (!p->mg) return -ENOMEM; } @@ -297,7 +323,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p) free_prison_cell(cache, p->cell1); if (p->mg) - mempool_free(p->mg, cache->migration_pool); + free_migration(p->mg); } static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) @@ -408,7 +434,7 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b) static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) { if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); + atomic_inc(&cache->nr_dirty); policy_set_dirty(cache->policy, oblock); } } @@ -417,8 +443,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl { if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { policy_clear_dirty(cache->policy, oblock); - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); - if (!from_cblock(cache->nr_dirty)) + if (atomic_dec_return(&cache->nr_dirty) == 0) dm_table_event(cache->ti->table); } } @@ -709,24 +734,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, * Migration covers moving data from the origin device to the cache, or * vice versa. *--------------------------------------------------------------*/ -static void free_migration(struct dm_cache_migration *mg) +static void inc_io_migrations(struct cache *cache) { - mempool_free(mg, mg->cache->migration_pool); + atomic_inc(&cache->nr_io_migrations); } -static void inc_nr_migrations(struct cache *cache) +static void dec_io_migrations(struct cache *cache) { - atomic_inc(&cache->nr_migrations); -} - -static void dec_nr_migrations(struct cache *cache) -{ - atomic_dec(&cache->nr_migrations); - - /* - * Wake the worker in case we're suspending the target. - */ - wake_up(&cache->migration_wait); + atomic_dec(&cache->nr_io_migrations); } static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, @@ -749,11 +764,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, wake_worker(cache); } -static void cleanup_migration(struct dm_cache_migration *mg) +static void free_io_migration(struct dm_cache_migration *mg) { - struct cache *cache = mg->cache; + dec_io_migrations(mg->cache); free_migration(mg); - dec_nr_migrations(cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -778,7 +792,7 @@ static void migration_failure(struct dm_cache_migration *mg) cell_defer(cache, mg->new_ocell, 1); } - cleanup_migration(mg); + free_io_migration(mg); } static void migration_success_pre_commit(struct dm_cache_migration *mg) @@ -787,9 +801,9 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) struct cache *cache = mg->cache; if (mg->writeback) { - cell_defer(cache, mg->old_ocell, false); clear_dirty(cache, mg->old_oblock, mg->cblock); - cleanup_migration(mg); + cell_defer(cache, mg->old_ocell, false); + free_io_migration(mg); return; } else if (mg->demote) { @@ -799,14 +813,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) mg->old_oblock); if (mg->promote) cell_defer(cache, mg->new_ocell, true); - cleanup_migration(mg); + free_io_migration(mg); return; } } else { if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); policy_remove_mapping(cache->policy, mg->new_oblock); - cleanup_migration(mg); + free_io_migration(mg); return; } } @@ -837,12 +851,12 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) spin_unlock_irqrestore(&cache->lock, flags); } else - cleanup_migration(mg); + free_io_migration(mg); } else { - cell_defer(cache, mg->new_ocell, true); clear_dirty(cache, mg->new_oblock, mg->cblock); - cleanup_migration(mg); + cell_defer(cache, mg->new_ocell, true); + free_io_migration(mg); } } @@ -1003,7 +1017,7 @@ static void promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = cell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1024,7 +1038,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, mg->new_ocell = NULL; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1048,7 +1062,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = new_ocell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1109,7 +1123,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio) static bool spare_migration_bandwidth(struct cache *cache) { - sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * + sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; return current_volume < cache->migration_threshold; } @@ -1400,7 +1414,7 @@ static void stop_quiescing(struct cache *cache) static void wait_for_migrations(struct cache *cache) { - wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); + wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); } static void stop_worker(struct cache *cache) @@ -1509,9 +1523,6 @@ static void destroy(struct cache *cache) { unsigned i; - if (cache->next_migration) - mempool_free(cache->next_migration, cache->migration_pool); - if (cache->migration_pool) mempool_destroy(cache->migration_pool); @@ -1908,35 +1919,6 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, return 0; } -/* - * We want the discard block size to be a power of two, at least the size - * of the cache block size, and have no more than 2^14 discard blocks - * across the origin. - */ -#define MAX_DISCARD_BLOCKS (1 << 14) - -static bool too_many_discard_blocks(sector_t discard_block_size, - sector_t origin_size) -{ - (void) sector_div(origin_size, discard_block_size); - - return origin_size > MAX_DISCARD_BLOCKS; -} - -static sector_t calculate_discard_block_size(sector_t cache_block_size, - sector_t origin_size) -{ - sector_t discard_block_size; - - discard_block_size = roundup_pow_of_two(cache_block_size); - - if (origin_size) - while (too_many_discard_blocks(discard_block_size, origin_size)) - discard_block_size *= 2; - - return discard_block_size; -} - #define DEFAULT_MIGRATION_THRESHOLD 2048 static int cache_create(struct cache_args *ca, struct cache **result) @@ -1961,6 +1943,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; ti->discard_zeroes_data_unsupported = true; + /* Discard bios must be split on a block boundary */ + ti->split_discard_bios = true; cache->features = ca->features; ti->per_bio_data_size = get_per_bio_data_size(cache); @@ -2026,14 +2010,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations); - atomic_set(&cache->nr_migrations, 0); + atomic_set(&cache->nr_allocated_migrations, 0); + atomic_set(&cache->nr_io_migrations, 0); init_waitqueue_head(&cache->migration_wait); init_waitqueue_head(&cache->quiescing_wait); atomic_set(&cache->quiescing_ack, 0); r = -ENOMEM; - cache->nr_dirty = 0; + atomic_set(&cache->nr_dirty, 0); cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); if (!cache->dirty_bitset) { *error = "could not allocate dirty bitset"; @@ -2041,9 +2026,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) } clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); - cache->discard_block_size = - calculate_discard_block_size(cache->sectors_per_block, - cache->origin_sectors); + cache->discard_block_size = cache->sectors_per_block; cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); if (!cache->discard_bitset) { @@ -2087,8 +2070,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - cache->next_migration = NULL; - cache->need_tick_bio = true; cache->sized = false; cache->quiescing = false; @@ -2531,7 +2512,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", + DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %lu ", (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, (unsigned) atomic_read(&cache->stats.read_hit), @@ -2541,7 +2522,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.demotion), (unsigned) atomic_read(&cache->stats.promotion), (unsigned long long) from_cblock(residency), - cache->nr_dirty); + (unsigned long) atomic_read(&cache->nr_dirty)); if (cache->features.write_through) DMEMIT("1 writethrough "); @@ -2630,7 +2611,7 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) /* * FIXME: these limits may be incompatible with the cache device */ - limits->max_discard_sectors = cache->discard_block_size * 1024; + limits->max_discard_sectors = cache->discard_block_size; limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0fce0bc..0f64dc5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -18,7 +18,6 @@ #include <linux/crypto.h> #include <linux/workqueue.h> #include <linux/backing-dev.h> -#include <linux/percpu.h> #include <linux/atomic.h> #include <linux/scatterlist.h> #include <asm/page.h> @@ -44,6 +43,7 @@ struct convert_context { unsigned int idx_out; sector_t cc_sector; atomic_t cc_pending; + struct ablkcipher_request *req; }; /* @@ -105,15 +105,7 @@ struct iv_lmk_private { enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; /* - * Duplicated per-CPU state for cipher. - */ -struct crypt_cpu { - struct ablkcipher_request *req; -}; - -/* - * The fields in here must be read only after initialization, - * changing state should be in crypt_cpu. + * The fields in here must be read only after initialization. */ struct crypt_config { struct dm_dev *dev; @@ -143,12 +135,6 @@ struct crypt_config { sector_t iv_offset; unsigned int iv_size; - /* - * Duplicated per cpu state. Access through - * per_cpu_ptr() only. - */ - struct crypt_cpu __percpu *cpu; - /* ESSIV: struct crypto_cipher *essiv_tfm */ void *iv_private; struct crypto_ablkcipher **tfms; @@ -184,11 +170,6 @@ static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq); -static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) -{ - return this_cpu_ptr(cc->cpu); -} - /* * Use this to access cipher attributes that are the same for each CPU. */ @@ -738,16 +719,15 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { - struct crypt_cpu *this_cc = this_crypt_config(cc); unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!this_cc->req) - this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); + if (!ctx->req) + ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); - ablkcipher_request_set_callback(this_cc->req, + ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); + ablkcipher_request_set_callback(ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); + kcryptd_async_done, dmreq_of_req(cc, ctx->req)); } /* @@ -756,7 +736,6 @@ static void crypt_alloc_req(struct crypt_config *cc, static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { - struct crypt_cpu *this_cc = this_crypt_config(cc); int r; atomic_set(&ctx->cc_pending, 1); @@ -768,7 +747,7 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); - r = crypt_convert_block(cc, ctx, this_cc->req); + r = crypt_convert_block(cc, ctx, ctx->req); switch (r) { /* async */ @@ -777,7 +756,7 @@ static int crypt_convert(struct crypt_config *cc, INIT_COMPLETION(ctx->restart); /* fall through*/ case -EINPROGRESS: - this_cc->req = NULL; + ctx->req = NULL; ctx->cc_sector++; continue; @@ -876,6 +855,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, io->sector = sector; io->error = 0; io->base_io = NULL; + io->ctx.req = NULL; atomic_set(&io->io_pending, 0); return io; @@ -901,6 +881,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (!atomic_dec_and_test(&io->io_pending)) return; + if (io->ctx.req) + mempool_free(io->ctx.req, cc->req_pool); mempool_free(io, cc->io_pool); if (likely(!base_io)) @@ -1326,8 +1308,6 @@ static int crypt_wipe_key(struct crypt_config *cc) static void crypt_dtr(struct dm_target *ti) { struct crypt_config *cc = ti->private; - struct crypt_cpu *cpu_cc; - int cpu; ti->private = NULL; @@ -1339,13 +1319,6 @@ static void crypt_dtr(struct dm_target *ti) if (cc->crypt_queue) destroy_workqueue(cc->crypt_queue); - if (cc->cpu) - for_each_possible_cpu(cpu) { - cpu_cc = per_cpu_ptr(cc->cpu, cpu); - if (cpu_cc->req) - mempool_free(cpu_cc->req, cc->req_pool); - } - crypt_free_tfms(cc); if (cc->bs) @@ -1364,9 +1337,6 @@ static void crypt_dtr(struct dm_target *ti) if (cc->dev) dm_put_device(ti, cc->dev); - if (cc->cpu) - free_percpu(cc->cpu); - kzfree(cc->cipher); kzfree(cc->cipher_string); @@ -1421,13 +1391,6 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); - cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), - __alignof__(struct crypt_cpu)); - if (!cc->cpu) { - ti->error = "Cannot allocate per cpu state"; - goto bad_mem; - } - /* * For compatibility with the original dm-crypt mapping format, if * only the cipher name is supplied, use cbc-plain. @@ -1543,6 +1506,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned int key_size, opt_params; unsigned long long tmpll; int ret; + size_t iv_size_padding; struct dm_arg_set as; const char *opt_string; char dummy; @@ -1579,12 +1543,23 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) cc->dmreq_start = sizeof(struct ablkcipher_request); cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); - cc->dmreq_start = ALIGN(cc->dmreq_start, crypto_tfm_ctx_alignment()); - cc->dmreq_start += crypto_ablkcipher_alignmask(any_tfm(cc)) & - ~(crypto_tfm_ctx_alignment() - 1); + cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); + + if (crypto_ablkcipher_alignmask(any_tfm(cc)) < CRYPTO_MINALIGN) { + /* Allocate the padding exactly */ + iv_size_padding = -(cc->dmreq_start + sizeof(struct dm_crypt_request)) + & crypto_ablkcipher_alignmask(any_tfm(cc)); + } else { + /* + * If the cipher requires greater alignment than kmalloc + * alignment, we don't know the exact position of the + * initialization vector. We must assume worst case. + */ + iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); + } cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + - sizeof(struct dm_crypt_request) + cc->iv_size); + sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); if (!cc->req_pool) { ti->error = "Cannot allocate crypt request mempool"; goto bad; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 2a20986..e60c2ea 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -10,6 +10,7 @@ #include <linux/device-mapper.h> #include <linux/bio.h> +#include <linux/completion.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/sched.h> @@ -32,7 +33,7 @@ struct dm_io_client { struct io { unsigned long error_bits; atomic_t count; - struct task_struct *sleeper; + struct completion *wait; struct dm_io_client *client; io_notify_fn callback; void *context; @@ -121,8 +122,8 @@ static void dec_count(struct io *io, unsigned int region, int error) invalidate_kernel_vmap_range(io->vma_invalidate_address, io->vma_invalidate_size); - if (io->sleeper) - wake_up_process(io->sleeper); + if (io->wait) + complete(io->wait); else { unsigned long r = io->error_bits; @@ -385,6 +386,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, */ volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); + DECLARE_COMPLETION_ONSTACK(wait); if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); @@ -393,7 +395,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = current; + io->wait = &wait; io->client = client; io->vma_invalidate_address = dp->vma_invalidate_address; @@ -401,15 +403,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, dispatch_io(rw, num_regions, where, dp, io, 1); - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - - if (!atomic_read(&io->count)) - break; - - io_schedule(); - } - set_current_state(TASK_RUNNING); + wait_for_completion_io(&wait); if (error_bits) *error_bits = io->error_bits; @@ -432,7 +426,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = NULL; + io->wait = NULL; io->client = client; io->callback = fn; io->context = context; diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c index 08d9a20..c69d0b7 100644 --- a/drivers/md/dm-log-userspace-transfer.c +++ b/drivers/md/dm-log-userspace-transfer.c @@ -272,7 +272,7 @@ int dm_ulog_tfr_init(void) r = cn_add_callback(&ulog_cn_id, "dmlogusr", cn_ulog_callback); if (r) { - cn_del_callback(&ulog_cn_id); + kfree(prealloced_cn_msg); return r; } diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4880b69..5971538 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -785,8 +785,7 @@ struct dm_raid_superblock { __le32 layout; __le32 stripe_sectors; - __u8 pad[452]; /* Round struct to 512 bytes. */ - /* Always set to 0 when writing. */ + /* Remainder of a logical block is zero-filled when writing (see super_sync()). */ } __packed; static int read_disk_sb(struct md_rdev *rdev, int size) @@ -823,7 +822,7 @@ static void super_sync(struct mddev *mddev, struct md_rdev *rdev) test_bit(Faulty, &(rs->dev[i].rdev.flags))) failed_devices |= (1ULL << i); - memset(sb, 0, sizeof(*sb)); + memset(sb + 1, 0, rdev->sb_size - sizeof(*sb)); sb->magic = cpu_to_le32(DM_RAID_MAGIC); sb->features = cpu_to_le32(0); /* No features yet */ @@ -858,7 +857,11 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) uint64_t events_sb, events_refsb; rdev->sb_start = 0; - rdev->sb_size = sizeof(*sb); + rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev); + if (rdev->sb_size < sizeof(*sb) || rdev->sb_size > PAGE_SIZE) { + DMERR("superblock size of a logical block is no longer valid"); + return -EINVAL; + } ret = read_disk_sb(rdev, rdev->sb_size); if (ret) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 3bb4506..b63095c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -192,6 +192,13 @@ struct dm_pool_metadata { * operation possible in this state is the closing of the device. */ bool fail_io:1; + + /* + * Reading the space map roots can fail, so we read it into these + * buffers before the superblock is locked and updated. + */ + __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; }; struct dm_thin_device { @@ -431,26 +438,53 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } +static int save_sm_roots(struct dm_pool_metadata *pmd) +{ + int r; + size_t len; + + r = dm_sm_root_size(pmd->metadata_sm, &len); + if (r < 0) + return r; + + r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len); + if (r < 0) + return r; + + r = dm_sm_root_size(pmd->data_sm, &len); + if (r < 0) + return r; + + return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len); +} + +static void copy_sm_roots(struct dm_pool_metadata *pmd, + struct thin_disk_superblock *disk) +{ + memcpy(&disk->metadata_space_map_root, + &pmd->metadata_space_map_root, + sizeof(pmd->metadata_space_map_root)); + + memcpy(&disk->data_space_map_root, + &pmd->data_space_map_root, + sizeof(pmd->data_space_map_root)); +} + static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; struct dm_block *sblock; - size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; if (bdev_size > THIN_METADATA_MAX_SECTORS) bdev_size = THIN_METADATA_MAX_SECTORS; - r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); - if (r < 0) - return r; - - r = dm_sm_root_size(pmd->data_sm, &data_len); + r = dm_sm_commit(pmd->data_sm); if (r < 0) return r; - r = dm_sm_commit(pmd->data_sm); + r = save_sm_roots(pmd); if (r < 0) return r; @@ -471,15 +505,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super->trans_id = 0; disk_super->held_root = 0; - r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto bad_locked; - - r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, - data_len); - if (r < 0) - goto bad_locked; + copy_sm_roots(pmd, disk_super); disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->device_details_root = cpu_to_le64(pmd->details_root); @@ -488,10 +514,6 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); return dm_tm_commit(pmd->tm, sblock); - -bad_locked: - dm_bm_unlock(sblock); - return r; } static int __format_metadata(struct dm_pool_metadata *pmd) @@ -591,6 +613,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)pmd->data_block_size); + r = -EINVAL; + goto bad_unlock_sblock; + } + r = __check_incompat_features(disk_super, pmd); if (r < 0) goto bad_unlock_sblock; @@ -769,6 +800,10 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; + r = save_sm_roots(pmd); + if (r < 0) + return r; + r = superblock_lock(pmd, &sblock); if (r) return r; @@ -780,21 +815,9 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) disk_super->trans_id = cpu_to_le64(pmd->trans_id); disk_super->flags = cpu_to_le32(pmd->flags); - r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto out_locked; - - r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, - data_len); - if (r < 0) - goto out_locked; + copy_sm_roots(pmd, disk_super); return dm_tm_commit(pmd->tm, sblock); - -out_locked: - dm_bm_unlock(sblock); - return r; } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e9587101..0396d7f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1332,9 +1332,9 @@ static void process_deferred_bios(struct pool *pool) */ if (ensure_next_mapping(pool)) { spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_bios, bio); bio_list_merge(&pool->deferred_bios, &bios); spin_unlock_irqrestore(&pool->lock, flags); - break; } @@ -1504,6 +1504,14 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } + /* + * We must hold the virtual cell before doing the lookup, otherwise + * there's a race with discard. + */ + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) + return DM_MAPIO_SUBMITTED; + r = dm_thin_find_block(td, block, 0, &result); /* @@ -1527,13 +1535,10 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * shared flag will be set in their case. */ thin_defer_bio(tc, bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } - build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) - return DM_MAPIO_SUBMITTED; - build_data_key(tc->td, result.block, &key); if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { cell_defer_no_holder_no_free(tc, &cell1); @@ -1554,6 +1559,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * of doing so. Just error it. */ bio_io_error(bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1564,6 +1570,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; default: @@ -1573,6 +1580,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) * pool is switched to fail-io mode. */ bio_io_error(bio); + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } } @@ -2695,7 +2703,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) */ if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; - limits->discard_granularity = data_limits->discard_granularity; + limits->discard_granularity = max(data_limits->discard_granularity, + pool->sectors_per_block << SECTOR_SHIFT); } else limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 015bc45..bf030d4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7371,8 +7371,10 @@ void md_do_sync(struct md_thread *thread) /* just incase thread restarts... */ if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) return; - if (mddev->ro) /* never try to sync a read-only array */ + if (mddev->ro) {/* never try to sync a read-only array */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); return; + } if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { @@ -7482,6 +7484,19 @@ void md_do_sync(struct md_thread *thread) rdev->recovery_offset < j) j = rdev->recovery_offset; rcu_read_unlock(); + + /* If there is a bitmap, we need to make sure all + * writes that started before we added a spare + * complete before we start doing a recovery. + * Otherwise the write might complete and (via + * bitmap_endwrite) set a bit in the bitmap after the + * recovery has checked that bit and skipped that + * region. + */ + if (mddev->bitmap) { + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); + } } printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); @@ -7825,6 +7840,7 @@ void md_check_recovery(struct mddev *mddev) /* There is no thread, but we need to call * ->spare_active and clear saved_raid_disk */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); goto unlock; @@ -8520,7 +8536,8 @@ static int md_notify_reboot(struct notifier_block *this, if (mddev_trylock(mddev)) { if (mddev->pers) __md_stop_writes(mddev); - mddev->safemode = 2; + if (mddev->persistent) + mddev->safemode = 2; mddev_unlock(mddev); } need_delay = 1; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 064a3c2..30597f3 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -595,25 +595,14 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock) +int dm_bm_flush(struct dm_block_manager *bm) { - int r; - if (bm->read_only) return -EPERM; - r = dm_bufio_write_dirty_buffers(bm->bufio); - if (unlikely(r)) { - dm_bm_unlock(superblock); - return r; - } - - dm_bm_unlock(superblock); - return dm_bufio_write_dirty_buffers(bm->bufio); } -EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); +EXPORT_SYMBOL_GPL(dm_bm_flush); void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) { diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 13cd58e..1b95dfc 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -105,8 +105,7 @@ int dm_bm_unlock(struct dm_block *b); * * This method always blocks. */ -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock); +int dm_bm_flush(struct dm_block_manager *bm); /* * Request data is prefetched into the cache. diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 37d367b..bf2b80d 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -42,6 +42,12 @@ struct btree_node { } __packed; +/* + * Locks a block using the btree node validator. + */ +int bn_read_lock(struct dm_btree_info *info, dm_block_t b, + struct dm_block **result); + void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index cf9fd67..1b5e13e 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -92,7 +92,7 @@ struct dm_block_validator btree_node_validator = { /*----------------------------------------------------------------*/ -static int bn_read_lock(struct dm_btree_info *info, dm_block_t b, +int bn_read_lock(struct dm_btree_info *info, dm_block_t b, struct dm_block **result) { return dm_tm_read_lock(info->tm, b, &btree_node_validator, result); diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 468e371..9701d29 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -828,22 +828,26 @@ EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); * FIXME: We shouldn't use a recursive algorithm when we have limited stack * space. Also this only works for single level trees. */ -static int walk_node(struct ro_spine *s, dm_block_t block, +static int walk_node(struct dm_btree_info *info, dm_block_t block, int (*fn)(void *context, uint64_t *keys, void *leaf), void *context) { int r; unsigned i, nr; + struct dm_block *node; struct btree_node *n; uint64_t keys; - r = ro_step(s, block); - n = ro_node(s); + r = bn_read_lock(info, block, &node); + if (r) + return r; + + n = dm_block_data(node); nr = le32_to_cpu(n->header.nr_entries); for (i = 0; i < nr; i++) { if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { - r = walk_node(s, value64(n, i), fn, context); + r = walk_node(info, value64(n, i), fn, context); if (r) goto out; } else { @@ -855,7 +859,7 @@ static int walk_node(struct ro_spine *s, dm_block_t block, } out: - ro_pop(s); + dm_tm_unlock(info->tm, node); return r; } @@ -863,15 +867,7 @@ int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, int (*fn)(void *context, uint64_t *keys, void *leaf), void *context) { - int r; - struct ro_spine spine; - BUG_ON(info->levels > 1); - - init_ro_spine(&spine, info); - r = walk_node(&spine, root, fn, context); - exit_ro_spine(&spine); - - return r; + return walk_node(info, root, fn, context); } EXPORT_SYMBOL_GPL(dm_btree_walk); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 579b582..d9a5aa5 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -564,7 +564,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - return smm->ll.nr_blocks; + *count = smm->ll.nr_blocks; + + return 0; } static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 81da1a2..3bc30a0 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm) if (r < 0) return r; - return 0; + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_pre_commit); @@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) return -EWOULDBLOCK; wipe_shadow_table(tm); + dm_bm_unlock(root); - return dm_bm_flush_and_unlock(tm->bm, root); + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_commit); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index b5b1390..2772ed2 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac /* * We use a 2-phase commit here. * - * i) In the first phase the block manager is told to start flushing, and - * the changes to the space map are written to disk. You should interrogate - * your particular space map to get detail of its root node etc. to be - * included in your superblock. + * i) Make all changes for the transaction *except* for the superblock. + * Then call dm_tm_pre_commit() to flush them to disk. * - * ii) @root will be committed last. You shouldn't use more than the - * first 512 bytes of @root if you wish the transaction to survive a power - * failure. You *must* have a write lock held on @root for both stage (i) - * and (ii). The commit will drop the write lock. + * ii) Lock your superblock. Update. Then call dm_tm_commit() which will + * unlock the superblock and flush it. No other blocks should be updated + * during this period. Care should be taken to never unlock a partially + * updated superblock; perform any operations that could fail *before* you + * take the superblock lock. */ int dm_tm_pre_commit(struct dm_transaction_manager *tm); -int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock); /* * These methods are the only way to get hold of a writeable block. diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6edc2db..6564eeb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -94,6 +94,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) struct pool_info *pi = data; struct r1bio *r1_bio; struct bio *bio; + int need_pages; int i, j; r1_bio = r1bio_pool_alloc(gfp_flags, pi); @@ -116,15 +117,15 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) * RESYNC_PAGES for each bio. */ if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) - j = pi->raid_disks; + need_pages = pi->raid_disks; else - j = 1; - while(j--) { + need_pages = 1; + for (j = 0; j < need_pages; j++) { bio = r1_bio->bios[j]; bio->bi_vcnt = RESYNC_PAGES; if (bio_alloc_pages(bio, gfp_flags)) - goto out_free_bio; + goto out_free_pages; } /* If not user-requests, copy the page pointers to all bios */ if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) { @@ -138,6 +139,14 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; +out_free_pages: + while (--j >= 0) { + struct bio_vec *bv; + + bio_for_each_segment_all(bv, r1_bio->bios[j], i) + __free_page(bv->bv_page); + } + out_free_bio: while (++j < pi->raid_disks) bio_put(r1_bio->bios[j]); @@ -1397,12 +1406,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) mddev->degraded++; set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } else set_bit(Faulty, &rdev->flags); + /* + * if recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" @@ -2043,7 +2052,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, d--; rdev = conf->mirrors[d].rdev; if (rdev && - test_bit(In_sync, &rdev->flags)) + !test_bit(Faulty, &rdev->flags)) r1_sync_page_io(rdev, sect, s, conf->tmppage, WRITE); } @@ -2055,7 +2064,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, d--; rdev = conf->mirrors[d].rdev; if (rdev && - test_bit(In_sync, &rdev->flags)) { + !test_bit(Faulty, &rdev->flags)) { if (r1_sync_page_io(rdev, sect, s, conf->tmppage, READ)) { atomic_add(s, &rdev->corrected_errors); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 308575d..9ccb107 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1698,13 +1698,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_unlock_irqrestore(&conf->device_lock, flags); return; } - if (test_and_clear_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) mddev->degraded++; - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); @@ -2970,6 +2969,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, */ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { end_reshape(conf); + close_sync(conf); return 0; } @@ -4420,7 +4420,7 @@ read_more: read_bio->bi_private = r10_bio; read_bio->bi_end_io = end_sync_read; read_bio->bi_rw = READ; - read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); + read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); read_bio->bi_flags |= 1 << BIO_UPTODATE; read_bio->bi_vcnt = 0; read_bio->bi_size = 0; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ab00c1e..44d6dcf 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -64,6 +64,10 @@ #define cpu_to_group(cpu) cpu_to_node(cpu) #define ANY_GROUP NUMA_NO_NODE +static bool devices_handle_discard_safely = false; +module_param(devices_handle_discard_safely, bool, 0644); +MODULE_PARM_DESC(devices_handle_discard_safely, + "Set to Y if all devices in each array reliably return zeroes on reads from discarded regions"); static struct workqueue_struct *raid5_wq; /* * Stripe cache @@ -2787,7 +2791,8 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, (s->failed >= 2 && fdev[1]->toread) || (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || - (sh->raid_conf->level == 6 && s->failed && s->to_write))) { + ((sh->raid_conf->level == 6 || sh->sector >= sh->raid_conf->mddev->recovery_cp) + && s->failed && s->to_write))) { /* we would like to get this block, possibly by computing it, * otherwise read it if the backing disk is insync */ @@ -3674,6 +3679,8 @@ static void handle_stripe(struct stripe_head *sh) set_bit(R5_Wantwrite, &dev->flags); if (prexor) continue; + if (s.failed > 1) + continue; if (!test_bit(R5_Insync, &dev->flags) || ((i == sh->pd_idx || i == sh->qd_idx) && s.failed == 0)) @@ -5939,7 +5946,7 @@ static int run(struct mddev *mddev) mddev->queue->limits.discard_granularity = stripe; /* * unaligned part of discard request will be ignored, so can't - * guarantee discard_zerors_data + * guarantee discard_zeroes_data */ mddev->queue->limits.discard_zeroes_data = 0; @@ -5964,6 +5971,18 @@ static int run(struct mddev *mddev) !bdev_get_queue(rdev->bdev)-> limits.discard_zeroes_data) discard_supported = false; + /* Unfortunately, discard_zeroes_data is not currently + * a guarantee - just a hint. So we only allow DISCARD + * if the sysadmin has confirmed that only safe devices + * are in use by setting a module parameter. + */ + if (!devices_handle_discard_safely) { + if (discard_supported) { + pr_info("md/raid456: discard support disabled due to uncertainty.\n"); + pr_info("Set raid456.devices_handle_discard_safely=Y to override.\n"); + } + discard_supported = false; + } } if (discard_supported && diff --git a/drivers/media/common/siano/Kconfig b/drivers/media/common/siano/Kconfig index f953d33..4bfbd5f 100644 --- a/drivers/media/common/siano/Kconfig +++ b/drivers/media/common/siano/Kconfig @@ -22,8 +22,7 @@ config SMS_SIANO_DEBUGFS bool "Enable debugfs for smsdvb" depends on SMS_SIANO_MDTV depends on DEBUG_FS - depends on SMS_USB_DRV - depends on CONFIG_SMS_USB_DRV = CONFIG_SMS_SDIO_DRV + depends on SMS_USB_DRV = SMS_SDIO_DRV ---help--- Choose Y to enable visualizing a dump of the frontend diff --git a/drivers/media/dvb-core/dvb-usb-ids.h b/drivers/media/dvb-core/dvb-usb-ids.h index 7e0f619..d58fad3 100644 --- a/drivers/media/dvb-core/dvb-usb-ids.h +++ b/drivers/media/dvb-core/dvb-usb-ids.h @@ -257,6 +257,7 @@ #define USB_PID_TERRATEC_T5 0x10a1 #define USB_PID_NOXON_DAB_STICK 0x00b3 #define USB_PID_NOXON_DAB_STICK_REV2 0x00e0 +#define USB_PID_NOXON_DAB_STICK_REV3 0x00b4 #define USB_PID_PINNACLE_EXPRESSCARD_320CX 0x022e #define USB_PID_PINNACLE_PCTV2000E 0x022c #define USB_PID_PINNACLE_PCTV_DVB_T_FLASH 0x0228 @@ -318,6 +319,7 @@ #define USB_PID_WINFAST_DTV_DONGLE_H 0x60f6 #define USB_PID_WINFAST_DTV_DONGLE_STK7700P_2 0x6f01 #define USB_PID_WINFAST_DTV_DONGLE_GOLD 0x6029 +#define USB_PID_WINFAST_DTV_DONGLE_MINID 0x6f0f #define USB_PID_GENPIX_8PSK_REV_1_COLD 0x0200 #define USB_PID_GENPIX_8PSK_REV_1_WARM 0x0201 #define USB_PID_GENPIX_8PSK_REV_2 0x0202 @@ -359,6 +361,7 @@ #define USB_PID_FRIIO_WHITE 0x0001 #define USB_PID_TVWAY_PLUS 0x0002 #define USB_PID_SVEON_STV20 0xe39d +#define USB_PID_SVEON_STV20_RTL2832U 0xd39d #define USB_PID_SVEON_STV22 0xe401 #define USB_PID_SVEON_STV22_IT9137 0xe411 #define USB_PID_AZUREWAVE_AZ6027 0x3275 @@ -372,4 +375,5 @@ #define USB_PID_CTVDIGDUAL_V2 0xe410 #define USB_PID_PCTV_2002E 0x025c #define USB_PID_PCTV_2002E_SE 0x025d +#define USB_PID_SVEON_STV27 0xd3af #endif diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c index 1e344b0..22e8c20 100644 --- a/drivers/media/dvb-frontends/ds3000.c +++ b/drivers/media/dvb-frontends/ds3000.c @@ -864,6 +864,13 @@ struct dvb_frontend *ds3000_attach(const struct ds3000_config *config, memcpy(&state->frontend.ops, &ds3000_ops, sizeof(struct dvb_frontend_ops)); state->frontend.demodulator_priv = state; + + /* + * Some devices like T480 starts with voltage on. Be sure + * to turn voltage off during init, as this can otherwise + * interfere with Unicable SCR systems. + */ + ds3000_set_voltage(&state->frontend, SEC_VOLTAGE_OFF); return &state->frontend; error3: diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c index 02699c1..c7a1c8e 100644 --- a/drivers/media/dvb-frontends/m88rs2000.c +++ b/drivers/media/dvb-frontends/m88rs2000.c @@ -712,6 +712,22 @@ static int m88rs2000_get_frontend(struct dvb_frontend *fe) return 0; } +static int m88rs2000_get_tune_settings(struct dvb_frontend *fe, + struct dvb_frontend_tune_settings *tune) +{ + struct dtv_frontend_properties *c = &fe->dtv_property_cache; + + if (c->symbol_rate > 3000000) + tune->min_delay_ms = 2000; + else + tune->min_delay_ms = 3000; + + tune->step_size = c->symbol_rate / 16000; + tune->max_drift = c->symbol_rate / 2000; + + return 0; +} + static int m88rs2000_i2c_gate_ctrl(struct dvb_frontend *fe, int enable) { struct m88rs2000_state *state = fe->demodulator_priv; @@ -743,7 +759,7 @@ static struct dvb_frontend_ops m88rs2000_ops = { .symbol_rate_tolerance = 500, /* ppm */ .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | - FE_CAN_QPSK | + FE_CAN_QPSK | FE_CAN_INVERSION_AUTO | FE_CAN_FEC_AUTO }, @@ -763,6 +779,7 @@ static struct dvb_frontend_ops m88rs2000_ops = { .set_frontend = m88rs2000_set_frontend, .get_frontend = m88rs2000_get_frontend, + .get_tune_settings = m88rs2000_get_tune_settings, }; struct dvb_frontend *m88rs2000_attach(const struct m88rs2000_config *config, diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c index 8ad3a57..287b977 100644 --- a/drivers/media/dvb-frontends/tda10071.c +++ b/drivers/media/dvb-frontends/tda10071.c @@ -667,6 +667,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) struct dtv_frontend_properties *c = &fe->dtv_property_cache; int ret, i; u8 mode, rolloff, pilot, inversion, div; + fe_modulation_t modulation; dev_dbg(&priv->i2c->dev, "%s: delivery_system=%d modulation=%d " \ "frequency=%d symbol_rate=%d inversion=%d pilot=%d " \ @@ -701,10 +702,13 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) switch (c->delivery_system) { case SYS_DVBS: + modulation = QPSK; rolloff = 0; pilot = 2; break; case SYS_DVBS2: + modulation = c->modulation; + switch (c->rolloff) { case ROLLOFF_20: rolloff = 2; @@ -749,7 +753,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe) for (i = 0, mode = 0xff; i < ARRAY_SIZE(TDA10071_MODCOD); i++) { if (c->delivery_system == TDA10071_MODCOD[i].delivery_system && - c->modulation == TDA10071_MODCOD[i].modulation && + modulation == TDA10071_MODCOD[i].modulation && c->fec_inner == TDA10071_MODCOD[i].fec) { mode = TDA10071_MODCOD[i].val; dev_dbg(&priv->i2c->dev, "%s: mode found=%02x\n", diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index fbfdd2f..afbc0d7 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -1752,7 +1752,7 @@ static int adv7604_log_status(struct v4l2_subdev *sd) v4l2_info(sd, "HDCP keys read: %s%s\n", (hdmi_read(sd, 0x04) & 0x20) ? "yes" : "no", (hdmi_read(sd, 0x04) & 0x10) ? "ERROR" : ""); - if (!is_hdmi(sd)) { + if (is_hdmi(sd)) { bool audio_pll_locked = hdmi_read(sd, 0x04) & 0x01; bool audio_sample_packet_detect = hdmi_read(sd, 0x18) & 0x01; bool audio_mute = io_read(sd, 0x65) & 0x40; diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c index e8a1ce2..cdd7c1b 100644 --- a/drivers/media/i2c/ov7670.c +++ b/drivers/media/i2c/ov7670.c @@ -1109,7 +1109,7 @@ static int ov7670_enum_framesizes(struct v4l2_subdev *sd, * windows that fall outside that. */ for (i = 0; i < n_win_sizes; i++) { - struct ov7670_win_size *win = &info->devtype->win_sizes[index]; + struct ov7670_win_size *win = &info->devtype->win_sizes[i]; if (info->min_width && win->width < info->min_width) continue; if (info->min_height && win->height < info->min_height) diff --git a/drivers/media/i2c/smiapp-pll.c b/drivers/media/i2c/smiapp-pll.c index 2335529..ab5d9a3 100644 --- a/drivers/media/i2c/smiapp-pll.c +++ b/drivers/media/i2c/smiapp-pll.c @@ -67,7 +67,7 @@ static void print_pll(struct device *dev, struct smiapp_pll *pll) { dev_dbg(dev, "pre_pll_clk_div\t%d\n", pll->pre_pll_clk_div); dev_dbg(dev, "pll_multiplier \t%d\n", pll->pll_multiplier); - if (pll->flags != SMIAPP_PLL_FLAG_NO_OP_CLOCKS) { + if (!(pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS)) { dev_dbg(dev, "op_sys_clk_div \t%d\n", pll->op_sys_clk_div); dev_dbg(dev, "op_pix_clk_div \t%d\n", pll->op_pix_clk_div); } @@ -77,7 +77,7 @@ static void print_pll(struct device *dev, struct smiapp_pll *pll) dev_dbg(dev, "ext_clk_freq_hz \t%d\n", pll->ext_clk_freq_hz); dev_dbg(dev, "pll_ip_clk_freq_hz \t%d\n", pll->pll_ip_clk_freq_hz); dev_dbg(dev, "pll_op_clk_freq_hz \t%d\n", pll->pll_op_clk_freq_hz); - if (pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS) { + if (!(pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS)) { dev_dbg(dev, "op_sys_clk_freq_hz \t%d\n", pll->op_sys_clk_freq_hz); dev_dbg(dev, "op_pix_clk_freq_hz \t%d\n", diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index ae66d91..4bfe83f 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -2139,7 +2139,7 @@ static int smiapp_set_selection(struct v4l2_subdev *subdev, ret = smiapp_set_compose(subdev, fh, sel); break; default: - BUG(); + ret = -EINVAL; } mutex_unlock(&sensor->mutex); @@ -2625,7 +2625,9 @@ static int smiapp_registered(struct v4l2_subdev *subdev) pll->flags |= SMIAPP_PLL_FLAG_OP_PIX_CLOCK_PER_LANE; pll->scale_n = sensor->limits[SMIAPP_LIMIT_SCALER_N_MIN]; + mutex_lock(&sensor->mutex); rval = smiapp_update_mode(sensor); + mutex_unlock(&sensor->mutex); if (rval) { dev_err(&client->dev, "update mode failed\n"); goto out_nvm_release; diff --git a/drivers/media/i2c/tda7432.c b/drivers/media/i2c/tda7432.c index 72af644..cf93021 100644 --- a/drivers/media/i2c/tda7432.c +++ b/drivers/media/i2c/tda7432.c @@ -293,7 +293,7 @@ static int tda7432_s_ctrl(struct v4l2_ctrl *ctrl) if (t->mute->val) { lf |= TDA7432_MUTE; lr |= TDA7432_MUTE; - lf |= TDA7432_MUTE; + rf |= TDA7432_MUTE; rr |= TDA7432_MUTE; } /* Mute & update balance*/ diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c index d5a7a13..88c1606 100644 --- a/drivers/media/media-device.c +++ b/drivers/media/media-device.c @@ -93,6 +93,7 @@ static long media_device_enum_entities(struct media_device *mdev, struct media_entity *ent; struct media_entity_desc u_ent; + memset(&u_ent, 0, sizeof(u_ent)); if (copy_from_user(&u_ent.id, &uent->id, sizeof(u_ent.id))) return -EFAULT; @@ -105,8 +106,6 @@ static long media_device_enum_entities(struct media_device *mdev, if (ent->name) { strncpy(u_ent.name, ent->name, sizeof(u_ent.name)); u_ent.name[sizeof(u_ent.name) - 1] = '\0'; - } else { - memset(u_ent.name, 0, sizeof(u_ent.name)); } u_ent.type = ent->type; u_ent.revision = ent->revision; diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index 91c694b..1aa7ecd 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -1092,6 +1092,7 @@ static int cx18_probe(struct pci_dev *pci_dev, setup.addr = ADDR_UNSET; setup.type = cx->options.tuner; setup.mode_mask = T_ANALOG_TV; /* matches TV tuners */ + setup.config = NULL; if (cx->options.radio > 0) setup.mode_mask |= T_RADIO; setup.tuner_callback = (setup.type == TUNER_XC2028) ? diff --git a/drivers/media/pci/ivtv/ivtv-alsa-pcm.c b/drivers/media/pci/ivtv/ivtv-alsa-pcm.c index e1863db..7a9b98b 100644 --- a/drivers/media/pci/ivtv/ivtv-alsa-pcm.c +++ b/drivers/media/pci/ivtv/ivtv-alsa-pcm.c @@ -159,6 +159,12 @@ static int snd_ivtv_pcm_capture_open(struct snd_pcm_substream *substream) /* Instruct the CX2341[56] to start sending packets */ snd_ivtv_lock(itvsc); + + if (ivtv_init_on_first_open(itv)) { + snd_ivtv_unlock(itvsc); + return -ENXIO; + } + s = &itv->streams[IVTV_ENC_STREAM_TYPE_PCM]; v4l2_fh_init(&item.fh, s->vdev); diff --git a/drivers/media/pci/saa7134/saa7134-cards.c b/drivers/media/pci/saa7134/saa7134-cards.c index d45e7f6..e87a734 100644 --- a/drivers/media/pci/saa7134/saa7134-cards.c +++ b/drivers/media/pci/saa7134/saa7134-cards.c @@ -8045,8 +8045,8 @@ int saa7134_board_init2(struct saa7134_dev *dev) break; } /* switch() */ - /* initialize tuner */ - if (TUNER_ABSENT != dev->tuner_type) { + /* initialize tuner (don't do this when resuming) */ + if (!dev->insuspend && TUNER_ABSENT != dev->tuner_type) { int has_demod = (dev->tda9887_conf & TDA9887_PRESENT); /* Note: radio tuner address is always filled in, diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c index cd8831a..e2e4610 100644 --- a/drivers/media/platform/omap3isp/isppreview.c +++ b/drivers/media/platform/omap3isp/isppreview.c @@ -1079,6 +1079,7 @@ static void preview_config_input_format(struct isp_prev_device *prev, */ static void preview_config_input_size(struct isp_prev_device *prev, u32 active) { + const struct v4l2_mbus_framefmt *format = &prev->formats[PREV_PAD_SINK]; struct isp_device *isp = to_isp_device(prev); unsigned int sph = prev->crop.left; unsigned int eph = prev->crop.left + prev->crop.width - 1; @@ -1086,6 +1087,14 @@ static void preview_config_input_size(struct isp_prev_device *prev, u32 active) unsigned int elv = prev->crop.top + prev->crop.height - 1; u32 features; + if (format->code != V4L2_MBUS_FMT_Y8_1X8 && + format->code != V4L2_MBUS_FMT_Y10_1X10) { + sph -= 2; + eph += 2; + slv -= 2; + elv += 2; + } + features = (prev->params.params[0].features & active) | (prev->params.params[1].features & ~active); diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c index 714c53e..2960ff1 100644 --- a/drivers/media/platform/vsp1/vsp1_video.c +++ b/drivers/media/platform/vsp1/vsp1_video.c @@ -622,8 +622,6 @@ static int vsp1_video_buffer_prepare(struct vb2_buffer *vb) if (vb->num_planes < format->num_planes) return -EINVAL; - buf->video = video; - for (i = 0; i < vb->num_planes; ++i) { buf->addr[i] = vb2_dma_contig_plane_dma_addr(vb, i); buf->length[i] = vb2_plane_size(vb, i); diff --git a/drivers/media/platform/vsp1/vsp1_video.h b/drivers/media/platform/vsp1/vsp1_video.h index d8612a3..47b7a8a 100644 --- a/drivers/media/platform/vsp1/vsp1_video.h +++ b/drivers/media/platform/vsp1/vsp1_video.h @@ -89,7 +89,6 @@ static inline struct vsp1_pipeline *to_vsp1_pipeline(struct media_entity *e) } struct vsp1_video_buffer { - struct vsp1_video *video; struct vb2_buffer buf; struct list_head queue; diff --git a/drivers/media/tuners/fc2580.c b/drivers/media/tuners/fc2580.c index 3aecaf4..f0c9c42 100644 --- a/drivers/media/tuners/fc2580.c +++ b/drivers/media/tuners/fc2580.c @@ -195,7 +195,7 @@ static int fc2580_set_params(struct dvb_frontend *fe) f_ref = 2UL * priv->cfg->clock / r_val; n_val = div_u64_rem(f_vco, f_ref, &k_val); - k_val_reg = 1UL * k_val * (1 << 20) / f_ref; + k_val_reg = div_u64(1ULL * k_val * (1 << 20), f_ref); ret = fc2580_wr_reg(priv, 0x18, r18_val | ((k_val_reg >> 16) & 0xff)); if (ret < 0) @@ -348,8 +348,8 @@ static int fc2580_set_params(struct dvb_frontend *fe) if (ret < 0) goto err; - ret = fc2580_wr_reg(priv, 0x37, 1UL * priv->cfg->clock * \ - fc2580_if_filter_lut[i].mul / 1000000000); + ret = fc2580_wr_reg(priv, 0x37, div_u64(1ULL * priv->cfg->clock * + fc2580_if_filter_lut[i].mul, 1000000000)); if (ret < 0) goto err; diff --git a/drivers/media/tuners/fc2580_priv.h b/drivers/media/tuners/fc2580_priv.h index be38a9e..646c994 100644 --- a/drivers/media/tuners/fc2580_priv.h +++ b/drivers/media/tuners/fc2580_priv.h @@ -22,6 +22,7 @@ #define FC2580_PRIV_H #include "fc2580.h" +#include <linux/math64.h> struct fc2580_reg_val { u8 reg; diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c index 2018bef..e71decb 100644 --- a/drivers/media/tuners/xc4000.c +++ b/drivers/media/tuners/xc4000.c @@ -93,7 +93,7 @@ struct xc4000_priv { struct firmware_description *firm; int firm_size; u32 if_khz; - u32 freq_hz; + u32 freq_hz, freq_offset; u32 bandwidth; u8 video_standard; u8 rf_mode; @@ -1157,14 +1157,14 @@ static int xc4000_set_params(struct dvb_frontend *fe) case SYS_ATSC: dprintk(1, "%s() VSB modulation\n", __func__); priv->rf_mode = XC_RF_MODE_AIR; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; priv->video_standard = XC4000_DTV6; type = DTV6; break; case SYS_DVBC_ANNEX_B: dprintk(1, "%s() QAM modulation\n", __func__); priv->rf_mode = XC_RF_MODE_CABLE; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; priv->video_standard = XC4000_DTV6; type = DTV6; break; @@ -1173,23 +1173,23 @@ static int xc4000_set_params(struct dvb_frontend *fe) dprintk(1, "%s() OFDM\n", __func__); if (bw == 0) { if (c->frequency < 400000000) { - priv->freq_hz = c->frequency - 2250000; + priv->freq_offset = 2250000; } else { - priv->freq_hz = c->frequency - 2750000; + priv->freq_offset = 2750000; } priv->video_standard = XC4000_DTV7_8; type = DTV78; } else if (bw <= 6000000) { priv->video_standard = XC4000_DTV6; - priv->freq_hz = c->frequency - 1750000; + priv->freq_offset = 1750000; type = DTV6; } else if (bw <= 7000000) { priv->video_standard = XC4000_DTV7; - priv->freq_hz = c->frequency - 2250000; + priv->freq_offset = 2250000; type = DTV7; } else { priv->video_standard = XC4000_DTV8; - priv->freq_hz = c->frequency - 2750000; + priv->freq_offset = 2750000; type = DTV8; } priv->rf_mode = XC_RF_MODE_AIR; @@ -1200,6 +1200,8 @@ static int xc4000_set_params(struct dvb_frontend *fe) goto fail; } + priv->freq_hz = c->frequency - priv->freq_offset; + dprintk(1, "%s() frequency=%d (compensated)\n", __func__, priv->freq_hz); @@ -1520,7 +1522,7 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc4000_priv *priv = fe->tuner_priv; - *freq = priv->freq_hz; + *freq = priv->freq_hz + priv->freq_offset; if (debug) { mutex_lock(&priv->lock); diff --git a/drivers/media/tuners/xc5000.c b/drivers/media/tuners/xc5000.c index 5cd09a6..b2d9e9c 100644 --- a/drivers/media/tuners/xc5000.c +++ b/drivers/media/tuners/xc5000.c @@ -55,7 +55,7 @@ struct xc5000_priv { u32 if_khz; u16 xtal_khz; - u32 freq_hz; + u32 freq_hz, freq_offset; u32 bandwidth; u8 video_standard; u8 rf_mode; @@ -755,13 +755,13 @@ static int xc5000_set_params(struct dvb_frontend *fe) case SYS_ATSC: dprintk(1, "%s() VSB modulation\n", __func__); priv->rf_mode = XC_RF_MODE_AIR; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; priv->video_standard = DTV6; break; case SYS_DVBC_ANNEX_B: dprintk(1, "%s() QAM modulation\n", __func__); priv->rf_mode = XC_RF_MODE_CABLE; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; priv->video_standard = DTV6; break; case SYS_ISDBT: @@ -776,15 +776,15 @@ static int xc5000_set_params(struct dvb_frontend *fe) switch (bw) { case 6000000: priv->video_standard = DTV6; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; break; case 7000000: priv->video_standard = DTV7; - priv->freq_hz = freq - 2250000; + priv->freq_offset = 2250000; break; case 8000000: priv->video_standard = DTV8; - priv->freq_hz = freq - 2750000; + priv->freq_offset = 2750000; break; default: printk(KERN_ERR "xc5000 bandwidth not set!\n"); @@ -798,15 +798,15 @@ static int xc5000_set_params(struct dvb_frontend *fe) priv->rf_mode = XC_RF_MODE_CABLE; if (bw <= 6000000) { priv->video_standard = DTV6; - priv->freq_hz = freq - 1750000; + priv->freq_offset = 1750000; b = 6; } else if (bw <= 7000000) { priv->video_standard = DTV7; - priv->freq_hz = freq - 2250000; + priv->freq_offset = 2250000; b = 7; } else { priv->video_standard = DTV7_8; - priv->freq_hz = freq - 2750000; + priv->freq_offset = 2750000; b = 8; } dprintk(1, "%s() Bandwidth %dMHz (%d)\n", __func__, @@ -817,6 +817,8 @@ static int xc5000_set_params(struct dvb_frontend *fe) return -EINVAL; } + priv->freq_hz = freq - priv->freq_offset; + dprintk(1, "%s() frequency=%d (compensated to %d)\n", __func__, freq, priv->freq_hz); @@ -1067,7 +1069,7 @@ static int xc5000_get_frequency(struct dvb_frontend *fe, u32 *freq) { struct xc5000_priv *priv = fe->tuner_priv; dprintk(1, "%s()\n", __func__); - *freq = priv->freq_hz; + *freq = priv->freq_hz + priv->freq_offset; return 0; } diff --git a/drivers/media/usb/au0828/au0828-cards.c b/drivers/media/usb/au0828/au0828-cards.c index dd32dec..1d4b110 100644 --- a/drivers/media/usb/au0828/au0828-cards.c +++ b/drivers/media/usb/au0828/au0828-cards.c @@ -36,6 +36,11 @@ static void hvr950q_cs5340_audio(void *priv, int enable) au0828_clear(dev, REG_000, 0x10); } +/* + * WARNING: There's a quirks table at sound/usb/quirks-table.h + * that should also be updated every time a new device with V4L2 support + * is added here. + */ struct au0828_board au0828_boards[] = { [AU0828_BOARD_UNKNOWN] = { .name = "Unknown board", diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c index f615454..7ed75ef 100644 --- a/drivers/media/usb/au0828/au0828-video.c +++ b/drivers/media/usb/au0828/au0828-video.c @@ -787,11 +787,27 @@ static int au0828_i2s_init(struct au0828_dev *dev) /* * Auvitek au0828 analog stream enable - * Please set interface0 to AS5 before enable the stream */ static int au0828_analog_stream_enable(struct au0828_dev *d) { + struct usb_interface *iface; + int ret; + dprintk(1, "au0828_analog_stream_enable called\n"); + + iface = usb_ifnum_to_if(d->usbdev, 0); + if (iface && iface->cur_altsetting->desc.bAlternateSetting != 5) { + dprintk(1, "Changing intf#0 to alt 5\n"); + /* set au0828 interface0 to AS5 here again */ + ret = usb_set_interface(d->usbdev, 0, 5); + if (ret < 0) { + printk(KERN_INFO "Au0828 can't set alt setting to 5!\n"); + return -EBUSY; + } + } + + /* FIXME: size should be calculated using d->width, d->height */ + au0828_writereg(d, AU0828_SENSORCTRL_VBI_103, 0x00); au0828_writereg(d, 0x106, 0x00); /* set x position */ @@ -1002,15 +1018,6 @@ static int au0828_v4l2_open(struct file *filp) return -ERESTARTSYS; } if (dev->users == 0) { - /* set au0828 interface0 to AS5 here again */ - ret = usb_set_interface(dev->usbdev, 0, 5); - if (ret < 0) { - mutex_unlock(&dev->lock); - printk(KERN_INFO "Au0828 can't set alternate to 5!\n"); - kfree(fh); - return -EBUSY; - } - au0828_analog_stream_enable(dev); au0828_analog_stream_reset(dev); @@ -1252,13 +1259,6 @@ static int au0828_set_format(struct au0828_dev *dev, unsigned int cmd, } } - /* set au0828 interface0 to AS5 here again */ - ret = usb_set_interface(dev->usbdev, 0, 5); - if (ret < 0) { - printk(KERN_INFO "Au0828 can't set alt setting to 5!\n"); - return -EBUSY; - } - au0828_analog_stream_enable(dev); return 0; diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index c0cd084..481dd24 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -1343,6 +1343,7 @@ static const struct dvb_usb_device_properties rtl2832u_props = { }; static const struct usb_device_id rtl28xxu_id_table[] = { + /* RTL2831U devices: */ { DVB_USB_DEVICE(USB_VID_REALTEK, USB_PID_REALTEK_RTL2831U, &rtl2831u_props, "Realtek RTL2831U reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_WIDEVIEW, USB_PID_FREECOM_DVBT, @@ -1350,6 +1351,7 @@ static const struct usb_device_id rtl28xxu_id_table[] = { { DVB_USB_DEVICE(USB_VID_WIDEVIEW, USB_PID_FREECOM_DVBT_2, &rtl2831u_props, "Freecom USB2.0 DVB-T", NULL) }, + /* RTL2832U devices: */ { DVB_USB_DEVICE(USB_VID_REALTEK, 0x2832, &rtl2832u_props, "Realtek RTL2832U reference design", NULL) }, { DVB_USB_DEVICE(USB_VID_REALTEK, 0x2838, @@ -1362,12 +1364,16 @@ static const struct usb_device_id rtl28xxu_id_table[] = { &rtl2832u_props, "TerraTec NOXON DAB Stick", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_NOXON_DAB_STICK_REV2, &rtl2832u_props, "TerraTec NOXON DAB Stick (rev 2)", NULL) }, + { DVB_USB_DEVICE(USB_VID_TERRATEC, USB_PID_NOXON_DAB_STICK_REV3, + &rtl2832u_props, "TerraTec NOXON DAB Stick (rev 3)", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_TREKSTOR_TERRES_2_0, &rtl2832u_props, "Trekstor DVB-T Stick Terres 2.0", NULL) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x1101, &rtl2832u_props, "Dexatek DK DVB-T Dongle", NULL) }, { DVB_USB_DEVICE(USB_VID_LEADTEK, 0x6680, &rtl2832u_props, "DigitalNow Quad DVB-T Receiver", NULL) }, + { DVB_USB_DEVICE(USB_VID_LEADTEK, USB_PID_WINFAST_DTV_DONGLE_MINID, + &rtl2832u_props, "Leadtek Winfast DTV Dongle Mini D", NULL) }, { DVB_USB_DEVICE(USB_VID_TERRATEC, 0x00d3, &rtl2832u_props, "TerraTec Cinergy T Stick RC (Rev. 3)", NULL) }, { DVB_USB_DEVICE(USB_VID_DEXATEK, 0x1102, @@ -1388,6 +1394,18 @@ static const struct usb_device_id rtl28xxu_id_table[] = { &rtl2832u_props, "Leadtek WinFast DTV Dongle mini", NULL) }, { DVB_USB_DEVICE(USB_VID_GTEK, USB_PID_CPYTO_REDI_PC50A, &rtl2832u_props, "Crypto ReDi PC 50 A", NULL) }, + { DVB_USB_DEVICE(USB_VID_KYE, 0x707f, + &rtl2832u_props, "Genius TVGo DVB-T03", NULL) }, + { DVB_USB_DEVICE(USB_VID_KWORLD_2, 0xd395, + &rtl2832u_props, "Peak DVB-T USB", NULL) }, + { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV20_RTL2832U, + &rtl2832u_props, "Sveon STV20", NULL) }, + { DVB_USB_DEVICE(USB_VID_KWORLD_2, USB_PID_SVEON_STV27, + &rtl2832u_props, "Sveon STV27", NULL) }, + + /* RTL2832P devices: */ + { DVB_USB_DEVICE(USB_VID_HANFTEK, 0x0131, + &rtl2832u_props, "Astrometa DVB-T2", NULL) }, { } }; MODULE_DEVICE_TABLE(usb, rtl28xxu_id_table); diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index af176b6..e6d3561 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -1081,9 +1081,12 @@ static int __init af9005_usb_module_init(void) err("usb_register failed. (%d)", result); return result; } +#if IS_MODULE(CONFIG_DVB_USB_AF9005) || defined(CONFIG_DVB_USB_AF9005_REMOTE) + /* FIXME: convert to todays kernel IR infrastructure */ rc_decode = symbol_request(af9005_rc_decode); rc_keys = symbol_request(rc_map_af9005_table); rc_keys_size = symbol_request(rc_map_af9005_table_size); +#endif if (rc_decode == NULL || rc_keys == NULL || rc_keys_size == NULL) { err("af9005_rc_decode function not found, disabling remote"); af9005_properties.rc.legacy.rc_query = NULL; diff --git a/drivers/media/usb/em28xx/em28xx-dvb.c b/drivers/media/usb/em28xx/em28xx-dvb.c index bb1e8dc..069b7f0 100644 --- a/drivers/media/usb/em28xx/em28xx-dvb.c +++ b/drivers/media/usb/em28xx/em28xx-dvb.c @@ -673,7 +673,8 @@ static void pctv_520e_init(struct em28xx *dev) static int em28xx_pctv_290e_set_lna(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; - struct em28xx *dev = fe->dvb->priv; + struct em28xx_i2c_bus *i2c_bus = fe->dvb->priv; + struct em28xx *dev = i2c_bus->dev; #ifdef CONFIG_GPIOLIB struct em28xx_dvb *dvb = dev->dvb; int ret; diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c index 9d10334..81b21d9 100644 --- a/drivers/media/usb/em28xx/em28xx-video.c +++ b/drivers/media/usb/em28xx/em28xx-video.c @@ -695,13 +695,16 @@ static int em28xx_stop_streaming(struct vb2_queue *vq) } spin_lock_irqsave(&dev->slock, flags); + if (dev->usb_ctl.vid_buf != NULL) { + vb2_buffer_done(&dev->usb_ctl.vid_buf->vb, VB2_BUF_STATE_ERROR); + dev->usb_ctl.vid_buf = NULL; + } while (!list_empty(&vidq->active)) { struct em28xx_buffer *buf; buf = list_entry(vidq->active.next, struct em28xx_buffer, list); list_del(&buf->list); vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR); } - dev->usb_ctl.vid_buf = NULL; spin_unlock_irqrestore(&dev->slock, flags); return 0; @@ -723,13 +726,16 @@ int em28xx_stop_vbi_streaming(struct vb2_queue *vq) } spin_lock_irqsave(&dev->slock, flags); + if (dev->usb_ctl.vbi_buf != NULL) { + vb2_buffer_done(&dev->usb_ctl.vbi_buf->vb, VB2_BUF_STATE_ERROR); + dev->usb_ctl.vbi_buf = NULL; + } while (!list_empty(&vbiq->active)) { struct em28xx_buffer *buf; buf = list_entry(vbiq->active.next, struct em28xx_buffer, list); list_del(&buf->list); vb2_buffer_done(&buf->vb, VB2_BUF_STATE_ERROR); } - dev->usb_ctl.vbi_buf = NULL; spin_unlock_irqrestore(&dev->slock, flags); return 0; diff --git a/drivers/media/usb/gspca/pac7302.c b/drivers/media/usb/gspca/pac7302.c index a915096..0d4be1d 100644 --- a/drivers/media/usb/gspca/pac7302.c +++ b/drivers/media/usb/gspca/pac7302.c @@ -928,6 +928,7 @@ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2620)}, {USB_DEVICE(0x093a, 0x2621)}, {USB_DEVICE(0x093a, 0x2622), .driver_info = FL_VFLIP}, + {USB_DEVICE(0x093a, 0x2623), .driver_info = FL_VFLIP}, {USB_DEVICE(0x093a, 0x2624), .driver_info = FL_VFLIP}, {USB_DEVICE(0x093a, 0x2625)}, {USB_DEVICE(0x093a, 0x2626)}, diff --git a/drivers/media/usb/gspca/sn9c20x.c b/drivers/media/usb/gspca/sn9c20x.c index f4453d5..ceb5404 100644 --- a/drivers/media/usb/gspca/sn9c20x.c +++ b/drivers/media/usb/gspca/sn9c20x.c @@ -2359,6 +2359,7 @@ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x045e, 0x00f4), SN9C20X(OV9650, 0x30, 0)}, {USB_DEVICE(0x145f, 0x013d), SN9C20X(OV7660, 0x21, 0)}, {USB_DEVICE(0x0458, 0x7029), SN9C20X(HV7131R, 0x11, 0)}, + {USB_DEVICE(0x0458, 0x7045), SN9C20X(MT9M112, 0x5d, LED_REVERSE)}, {USB_DEVICE(0x0458, 0x704a), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0x0458, 0x704c), SN9C20X(MT9M112, 0x5d, 0)}, {USB_DEVICE(0xa168, 0x0610), SN9C20X(HV7131R, 0x11, 0)}, diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index 0500c417..6bce01a 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -82,7 +82,7 @@ static void hdpvr_read_bulk_callback(struct urb *urb) } /*=========================================================================*/ -/* bufffer bits */ +/* buffer bits */ /* function expects dev->io_mutex to be hold by caller */ int hdpvr_cancel_queue(struct hdpvr_device *dev) @@ -926,7 +926,7 @@ static int hdpvr_s_ctrl(struct v4l2_ctrl *ctrl) case V4L2_CID_MPEG_AUDIO_ENCODING: if (dev->flags & HDPVR_FLAG_AC3_CAP) { opt->audio_codec = ctrl->val; - return hdpvr_set_audio(dev, opt->audio_input, + return hdpvr_set_audio(dev, opt->audio_input + 1, opt->audio_codec); } return 0; @@ -1198,7 +1198,7 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent, v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops, V4L2_CID_MPEG_AUDIO_ENCODING, ac3 ? V4L2_MPEG_AUDIO_ENCODING_AC3 : V4L2_MPEG_AUDIO_ENCODING_AAC, - 0x7, V4L2_MPEG_AUDIO_ENCODING_AAC); + 0x7, ac3 ? dev->options.audio_codec : V4L2_MPEG_AUDIO_ENCODING_AAC); v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops, V4L2_CID_MPEG_VIDEO_ENCODING, V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC, 0x3, diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c index 34a26e0..03504dc 100644 --- a/drivers/media/usb/stk1160/stk1160-core.c +++ b/drivers/media/usb/stk1160/stk1160-core.c @@ -67,17 +67,25 @@ int stk1160_read_reg(struct stk1160 *dev, u16 reg, u8 *value) { int ret; int pipe = usb_rcvctrlpipe(dev->udev, 0); + u8 *buf; *value = 0; + + buf = kmalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; ret = usb_control_msg(dev->udev, pipe, 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, reg, value, sizeof(u8), HZ); + 0x00, reg, buf, sizeof(u8), HZ); if (ret < 0) { stk1160_err("read failed on reg 0x%x (%d)\n", reg, ret); + kfree(buf); return ret; } + *value = *buf; + kfree(buf); return 0; } diff --git a/drivers/media/usb/stk1160/stk1160.h b/drivers/media/usb/stk1160/stk1160.h index 05b05b1..abdea48 100644 --- a/drivers/media/usb/stk1160/stk1160.h +++ b/drivers/media/usb/stk1160/stk1160.h @@ -143,7 +143,6 @@ struct stk1160 { int num_alt; struct stk1160_isoc_ctl isoc_ctl; - char urb_buf[255]; /* urb control msg buffer */ /* frame properties */ int width; /* current frame width */ diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c index 5c45c9d..9c29552 100644 --- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c +++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c @@ -156,6 +156,9 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + if (cmd->msg_len > sizeof(b) - 4) + return -EINVAL; + memcpy(&b[4], cmd->msg, cmd->msg_len); state->config->send_command(fe, 0x72, diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index 5c9e312..661f7f2 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -446,6 +446,7 @@ static int usbvision_v4l2_close(struct file *file) if (usbvision->remove_pending) { printk(KERN_INFO "%s: Final disconnect\n", __func__); usbvision_release(usbvision); + return 0; } mutex_unlock(&usbvision->v4l2_lock); @@ -1221,6 +1222,7 @@ static int usbvision_radio_close(struct file *file) if (usbvision->remove_pending) { printk(KERN_INFO "%s: Final disconnect\n", __func__); usbvision_release(usbvision); + return err_code; } mutex_unlock(&usbvision->v4l2_lock); diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index c3bb250..4531441 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1603,12 +1603,12 @@ static void uvc_delete(struct uvc_device *dev) { struct list_head *p, *n; - usb_put_intf(dev->intf); - usb_put_dev(dev->udev); - uvc_status_cleanup(dev); uvc_ctrl_cleanup_device(dev); + usb_put_intf(dev->intf); + usb_put_dev(dev->udev); + if (dev->vdev.dev) v4l2_device_unregister(&dev->vdev); #ifdef CONFIG_MEDIA_CONTROLLER @@ -2210,6 +2210,15 @@ static struct usb_device_id uvc_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = UVC_QUIRK_PROBE_DEF }, + /* Dell XPS M1330 (OmniVision OV7670 webcam) */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x05a9, + .idProduct = 0x7670, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_QUIRK_PROBE_DEF }, /* Apple Built-In iSight */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 3394c34..c081812 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -361,6 +361,14 @@ static int uvc_commit_video(struct uvc_streaming *stream, * Clocks and timestamps */ +static inline void uvc_video_get_ts(struct timespec *ts) +{ + if (uvc_clock_param == CLOCK_MONOTONIC) + ktime_get_ts(ts); + else + ktime_get_real_ts(ts); +} + static void uvc_video_clock_decode(struct uvc_streaming *stream, struct uvc_buffer *buf, const __u8 *data, int len) @@ -420,7 +428,7 @@ uvc_video_clock_decode(struct uvc_streaming *stream, struct uvc_buffer *buf, stream->clock.last_sof = dev_sof; host_sof = usb_get_current_frame_number(stream->dev->udev); - ktime_get_ts(&ts); + uvc_video_get_ts(&ts); /* The UVC specification allows device implementations that can't obtain * the USB frame number to keep their own frame counters as long as they @@ -1010,10 +1018,7 @@ static int uvc_video_decode_start(struct uvc_streaming *stream, return -ENODATA; } - if (uvc_clock_param == CLOCK_MONOTONIC) - ktime_get_ts(&ts); - else - ktime_get_real_ts(&ts); + uvc_video_get_ts(&ts); buf->buf.v4l2_buf.sequence = stream->sequence; buf->buf.v4l2_buf.timestamp.tv_sec = ts.tv_sec; @@ -1846,7 +1851,25 @@ int uvc_video_enable(struct uvc_streaming *stream, int enable) if (!enable) { uvc_uninit_video(stream, 1); - usb_set_interface(stream->dev->udev, stream->intfnum, 0); + if (stream->intf->num_altsetting > 1) { + usb_set_interface(stream->dev->udev, + stream->intfnum, 0); + } else { + /* UVC doesn't specify how to inform a bulk-based device + * when the video stream is stopped. Windows sends a + * CLEAR_FEATURE(HALT) request to the video streaming + * bulk endpoint, mimic the same behaviour. + */ + unsigned int epnum = stream->header.bEndpointAddress + & USB_ENDPOINT_NUMBER_MASK; + unsigned int dir = stream->header.bEndpointAddress + & USB_ENDPOINT_DIR_MASK; + unsigned int pipe; + + pipe = usb_sndbulkpipe(stream->dev->udev, epnum) | dir; + usb_clear_halt(stream->dev->udev, pipe); + } + uvc_queue_enable(&stream->queue, 0); uvc_video_clock_cleanup(stream); return 0; diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c index 037d7a5..767abc9 100644 --- a/drivers/media/v4l2-core/v4l2-common.c +++ b/drivers/media/v4l2-core/v4l2-common.c @@ -431,16 +431,13 @@ static unsigned int clamp_align(unsigned int x, unsigned int min, /* Bits that must be zero to be aligned */ unsigned int mask = ~((1 << align) - 1); + /* Clamp to aligned min and max */ + x = clamp(x, (min + ~mask) & mask, max & mask); + /* Round to nearest aligned value */ if (align) x = (x + (1 << (align - 1))) & mask; - /* Clamp to aligned value of min and max */ - if (x < min) - x = (min + ~mask) & mask; - else if (x > max) - x = max & mask; - return x; } diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 8f7a6a4..fca336b 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -178,6 +178,9 @@ struct v4l2_create_buffers32 { static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { + if (get_user(kp->type, &up->type)) + return -EFAULT; + switch (kp->type) { case V4L2_BUF_TYPE_VIDEO_CAPTURE: case V4L2_BUF_TYPE_VIDEO_OUTPUT: @@ -204,17 +207,16 @@ static int __get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __us static int get_v4l2_format32(struct v4l2_format *kp, struct v4l2_format32 __user *up) { - if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32)) || - get_user(kp->type, &up->type)) - return -EFAULT; + if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_format32))) + return -EFAULT; return __get_v4l2_format32(kp, up); } static int get_v4l2_create32(struct v4l2_create_buffers *kp, struct v4l2_create_buffers32 __user *up) { if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_create_buffers32)) || - copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format.fmt))) - return -EFAULT; + copy_from_user(kp, up, offsetof(struct v4l2_create_buffers32, format))) + return -EFAULT; return __get_v4l2_format32(&kp->format, &up->format); } @@ -787,8 +789,8 @@ static int put_v4l2_subdev_edid32(struct v4l2_subdev_edid *kp, struct v4l2_subde #define VIDIOC_DQBUF32 _IOWR('V', 17, struct v4l2_buffer32) #define VIDIOC_ENUMSTD32 _IOWR('V', 25, struct v4l2_standard32) #define VIDIOC_ENUMINPUT32 _IOWR('V', 26, struct v4l2_input32) -#define VIDIOC_SUBDEV_G_EDID32 _IOWR('V', 63, struct v4l2_subdev_edid32) -#define VIDIOC_SUBDEV_S_EDID32 _IOWR('V', 64, struct v4l2_subdev_edid32) +#define VIDIOC_SUBDEV_G_EDID32 _IOWR('V', 40, struct v4l2_subdev_edid32) +#define VIDIOC_SUBDEV_S_EDID32 _IOWR('V', 41, struct v4l2_subdev_edid32) #define VIDIOC_TRY_FMT32 _IOWR('V', 64, struct v4l2_format32) #define VIDIOC_G_EXT_CTRLS32 _IOWR('V', 71, struct v4l2_ext_controls32) #define VIDIOC_S_EXT_CTRLS32 _IOWR('V', 72, struct v4l2_ext_controls32) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index ee52b9f4..9f2ac58 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -26,6 +26,10 @@ #include <linux/v4l2-dv-timings.h> #include <media/v4l2-dv-timings.h> +MODULE_AUTHOR("Hans Verkuil"); +MODULE_DESCRIPTION("V4L2 DV Timings Helper Functions"); +MODULE_LICENSE("GPL"); + const struct v4l2_dv_timings v4l2_dv_timings_presets[] = { V4L2_DV_BT_CEA_640X480P59_94, V4L2_DV_BT_CEA_720X480I59_94, @@ -590,10 +594,10 @@ struct v4l2_fract v4l2_calc_aspect_ratio(u8 hor_landscape, u8 vert_portrait) aspect.denominator = 9; } else if (ratio == 34) { aspect.numerator = 4; - aspect.numerator = 3; + aspect.denominator = 3; } else if (ratio == 68) { aspect.numerator = 15; - aspect.numerator = 9; + aspect.denominator = 9; } else { aspect.numerator = hor_landscape + 99; aspect.denominator = 100; diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index de0e87f..c96bf946 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -703,6 +703,7 @@ static int __reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) * to the userspace. */ req->count = allocated_buffers; + q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type); return 0; } @@ -751,6 +752,7 @@ static int __create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create memset(q->plane_sizes, 0, sizeof(q->plane_sizes)); memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx)); q->memory = create->memory; + q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type); } num_buffers = min(create->count, VIDEO_MAX_FRAME - q->num_buffers); @@ -1371,6 +1373,7 @@ static int __vb2_qbuf(struct vb2_queue *q, struct v4l2_buffer *b, * dequeued in dqbuf. */ list_add_tail(&vb->queued_entry, &q->queued_list); + q->waiting_for_buffers = false; vb->state = VB2_BUF_STATE_QUEUED; /* @@ -1755,6 +1758,7 @@ int vb2_streamoff(struct vb2_queue *q, enum v4l2_buf_type type) * and videobuf, effectively returning control over them to userspace. */ __vb2_queue_cancel(q); + q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type); dprintk(3, "Streamoff successful\n"); return 0; @@ -2040,9 +2044,16 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) } /* - * There is nothing to wait for if no buffers have already been queued. + * There is nothing to wait for if the queue isn't streaming. */ - if (list_empty(&q->queued_list)) + if (!vb2_is_streaming(q)) + return res | POLLERR; + /* + * For compatibility with vb1: if QBUF hasn't been called yet, then + * return POLLERR as well. This only affects capture queues, output + * queues will always initialize waiting_for_buffers to false. + */ + if (q->waiting_for_buffers) return res | POLLERR; if (list_empty(&q->done_list)) diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c index 110c036..b59a17f 100644 --- a/drivers/memory/mvebu-devbus.c +++ b/drivers/memory/mvebu-devbus.c @@ -108,8 +108,19 @@ static int devbus_set_timing_params(struct devbus *devbus, node->full_name); return err; } - /* Convert bit width to byte width */ - r.bus_width /= 8; + + /* + * The bus width is encoded into the register as 0 for 8 bits, + * and 1 for 16 bits, so we do the necessary conversion here. + */ + if (r.bus_width == 8) + r.bus_width = 0; + else if (r.bus_width == 16) + r.bus_width = 1; + else { + dev_err(devbus->dev, "invalid bus width %d\n", r.bus_width); + return -EINVAL; + } err = get_timing_param_ps(devbus, node, "devbus,badr-skew-ps", &r.badr_skew); diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c index 25f8f93..8d70fcf 100644 --- a/drivers/memstick/host/rtsx_pci_ms.c +++ b/drivers/memstick/host/rtsx_pci_ms.c @@ -591,6 +591,7 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev) pcr->slots[RTSX_MS_CARD].card_event = NULL; msh = host->msh; host->eject = true; + cancel_work_sync(&host->handle_req); mutex_lock(&host->host_mutex); if (host->req) { diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 5653e50..424f51d 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1422,6 +1422,11 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto out_mptspi_probe; } + /* VMWare emulation doesn't properly implement WRITE_SAME + */ + if (pdev->subsystem_vendor == 0x15AD) + sh->no_write_same = 1; + spin_lock_irqsave(&ioc->FreeQlock, flags); /* Attach the SCSI Host to the IOC structure diff --git a/drivers/mfd/88pm800.c b/drivers/mfd/88pm800.c index a65447d..da1ef32 100644 --- a/drivers/mfd/88pm800.c +++ b/drivers/mfd/88pm800.c @@ -571,7 +571,7 @@ static int pm800_probe(struct i2c_client *client, ret = pm800_pages_init(chip); if (ret) { dev_err(&client->dev, "pm800_pages_init failed!\n"); - goto err_page_init; + goto err_device_init; } ret = device_800_init(chip, pdata); @@ -587,7 +587,6 @@ static int pm800_probe(struct i2c_client *client, err_device_init: pm800_pages_exit(chip); -err_page_init: err_subchip_alloc: pm80x_deinit(); out_init: diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c index 7ebe9ef..a141b46 100644 --- a/drivers/mfd/88pm860x-core.c +++ b/drivers/mfd/88pm860x-core.c @@ -1179,12 +1179,18 @@ static int pm860x_probe(struct i2c_client *client, chip->companion_addr = pdata->companion_addr; chip->companion = i2c_new_dummy(chip->client->adapter, chip->companion_addr); + if (!chip->companion) { + dev_err(&client->dev, + "Failed to allocate I2C companion device\n"); + return -ENODEV; + } chip->regmap_companion = regmap_init_i2c(chip->companion, &pm860x_regmap_config); if (IS_ERR(chip->regmap_companion)) { ret = PTR_ERR(chip->regmap_companion); dev_err(&chip->companion->dev, "Failed to allocate register map: %d\n", ret); + i2c_unregister_device(chip->companion); return ret; } i2c_set_clientdata(chip->companion, chip); diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index d3e2327..38917a8 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -322,9 +322,12 @@ static int kempld_detect_device(struct kempld_device_data *pld) return -ENODEV; } - /* Release hardware mutex if aquired */ - if (!(index_reg & KEMPLD_MUTEX_KEY)) + /* Release hardware mutex if acquired */ + if (!(index_reg & KEMPLD_MUTEX_KEY)) { iowrite8(KEMPLD_MUTEX_KEY, pld->io_index); + /* PXT and COMe-cPC2 boards may require a second release */ + iowrite8(KEMPLD_MUTEX_KEY, pld->io_index); + } mutex_unlock(&pld->lock); diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c index 108453b..fc2e0b9 100644 --- a/drivers/mfd/max77686.c +++ b/drivers/mfd/max77686.c @@ -120,6 +120,10 @@ static int max77686_i2c_probe(struct i2c_client *i2c, dev_info(max77686->dev, "device found\n"); max77686->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC); + if (!max77686->rtc) { + dev_err(max77686->dev, "Failed to allocate I2C device for RTC\n"); + return -ENODEV; + } i2c_set_clientdata(max77686->rtc, max77686); max77686_irq_init(max77686); diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c index c04723e..8abfd3f 100644 --- a/drivers/mfd/max77693.c +++ b/drivers/mfd/max77693.c @@ -149,9 +149,18 @@ static int max77693_i2c_probe(struct i2c_client *i2c, dev_info(max77693->dev, "device ID: 0x%x\n", reg_data); max77693->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC); + if (!max77693->muic) { + dev_err(max77693->dev, "Failed to allocate I2C device for MUIC\n"); + return -ENODEV; + } i2c_set_clientdata(max77693->muic, max77693); max77693->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC); + if (!max77693->haptic) { + dev_err(max77693->dev, "Failed to allocate I2C device for Haptic\n"); + ret = -ENODEV; + goto err_i2c_haptic; + } i2c_set_clientdata(max77693->haptic, max77693); /* @@ -187,8 +196,9 @@ err_mfd: max77693_irq_exit(max77693); err_irq: err_regmap_muic: - i2c_unregister_device(max77693->muic); i2c_unregister_device(max77693->haptic); +err_i2c_haptic: + i2c_unregister_device(max77693->muic); return ret; } diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c index de7fb80..afd0771 100644 --- a/drivers/mfd/max8925-i2c.c +++ b/drivers/mfd/max8925-i2c.c @@ -181,9 +181,18 @@ static int max8925_probe(struct i2c_client *client, mutex_init(&chip->io_lock); chip->rtc = i2c_new_dummy(chip->i2c->adapter, RTC_I2C_ADDR); + if (!chip->rtc) { + dev_err(chip->dev, "Failed to allocate I2C device for RTC\n"); + return -ENODEV; + } i2c_set_clientdata(chip->rtc, chip); chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR); + if (!chip->adc) { + dev_err(chip->dev, "Failed to allocate I2C device for ADC\n"); + i2c_unregister_device(chip->rtc); + return -ENODEV; + } i2c_set_clientdata(chip->adc, chip); device_init_wakeup(&client->dev, 1); diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c index cee098c..20a2005 100644 --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@ -217,10 +217,26 @@ static int max8997_i2c_probe(struct i2c_client *i2c, mutex_init(&max8997->iolock); max8997->rtc = i2c_new_dummy(i2c->adapter, I2C_ADDR_RTC); + if (!max8997->rtc) { + dev_err(max8997->dev, "Failed to allocate I2C device for RTC\n"); + return -ENODEV; + } i2c_set_clientdata(max8997->rtc, max8997); + max8997->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC); + if (!max8997->haptic) { + dev_err(max8997->dev, "Failed to allocate I2C device for Haptic\n"); + ret = -ENODEV; + goto err_i2c_haptic; + } i2c_set_clientdata(max8997->haptic, max8997); + max8997->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC); + if (!max8997->muic) { + dev_err(max8997->dev, "Failed to allocate I2C device for MUIC\n"); + ret = -ENODEV; + goto err_i2c_muic; + } i2c_set_clientdata(max8997->muic, max8997); pm_runtime_set_active(max8997->dev); @@ -247,7 +263,9 @@ static int max8997_i2c_probe(struct i2c_client *i2c, err_mfd: mfd_remove_devices(max8997->dev); i2c_unregister_device(max8997->muic); +err_i2c_muic: i2c_unregister_device(max8997->haptic); +err_i2c_haptic: i2c_unregister_device(max8997->rtc); return ret; } diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c index fe6332d..25e131a 100644 --- a/drivers/mfd/max8998.c +++ b/drivers/mfd/max8998.c @@ -215,6 +215,10 @@ static int max8998_i2c_probe(struct i2c_client *i2c, mutex_init(&max8998->iolock); max8998->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR); + if (!max8998->rtc) { + dev_err(&i2c->dev, "Failed to allocate I2C device for RTC\n"); + return -ENODEV; + } i2c_set_clientdata(max8998->rtc, max8998); max8998_irq_init(max8998); diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index 29ee54d..5dd653f 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -445,7 +445,7 @@ static unsigned omap_usbhs_rev1_hostconfig(struct usbhs_hcd_omap *omap, for (i = 0; i < omap->nports; i++) { if (is_ehci_phy_mode(pdata->port_mode[i])) { - reg &= OMAP_UHH_HOSTCONFIG_ULPI_BYPASS; + reg &= ~OMAP_UHH_HOSTCONFIG_ULPI_BYPASS; break; } } diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c index 6ed83fe..c2a7804 100644 --- a/drivers/mfd/rtsx_pcr.c +++ b/drivers/mfd/rtsx_pcr.c @@ -1172,7 +1172,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, pcr->msi_en = msi_en; if (pcr->msi_en) { ret = pci_enable_msi(pcidev); - if (ret < 0) + if (ret) pcr->msi_en = false; } diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c index f530e4b..d02546b 100644 --- a/drivers/mfd/sec-core.c +++ b/drivers/mfd/sec-core.c @@ -274,6 +274,10 @@ static int sec_pmic_probe(struct i2c_client *i2c, } sec_pmic->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR); + if (!sec_pmic->rtc) { + dev_err(&i2c->dev, "Failed to allocate I2C for RTC\n"); + return -ENODEV; + } i2c_set_clientdata(sec_pmic->rtc, sec_pmic); if (pdata && pdata->cfg_pmic_irq) diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 33f040c..3799a3d 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -1232,7 +1232,7 @@ static ssize_t sm501_dbg_regs(struct device *dev, } -static DEVICE_ATTR(dbg_regs, 0666, sm501_dbg_regs, NULL); +static DEVICE_ATTR(dbg_regs, 0444, sm501_dbg_regs, NULL); /* sm501_init_reg * diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h index ff2b09b..50a5c86 100644 --- a/drivers/mfd/stmpe.h +++ b/drivers/mfd/stmpe.h @@ -269,7 +269,7 @@ int stmpe_remove(struct stmpe *stmpe); #define STMPE24XX_REG_CHIP_ID 0x80 #define STMPE24XX_REG_IEGPIOR_LSB 0x18 #define STMPE24XX_REG_ISGPIOR_MSB 0x19 -#define STMPE24XX_REG_GPMR_LSB 0xA5 +#define STMPE24XX_REG_GPMR_LSB 0xA4 #define STMPE24XX_REG_GPSR_LSB 0x85 #define STMPE24XX_REG_GPCR_LSB 0x88 #define STMPE24XX_REG_GPDR_LSB 0x8B diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 11c19e5..48579e5 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -263,6 +263,17 @@ static int tc6393xb_ohci_disable(struct platform_device *dev) return 0; } +static int tc6393xb_ohci_suspend(struct platform_device *dev) +{ + struct tc6393xb_platform_data *tcpd = dev_get_platdata(dev->dev.parent); + + /* We can't properly store/restore OHCI state, so fail here */ + if (tcpd->resume_restore) + return -EBUSY; + + return tc6393xb_ohci_disable(dev); +} + static int tc6393xb_fb_enable(struct platform_device *dev) { struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent); @@ -403,7 +414,7 @@ static struct mfd_cell tc6393xb_cells[] = { .num_resources = ARRAY_SIZE(tc6393xb_ohci_resources), .resources = tc6393xb_ohci_resources, .enable = tc6393xb_ohci_enable, - .suspend = tc6393xb_ohci_disable, + .suspend = tc6393xb_ohci_suspend, .resume = tc6393xb_ohci_enable, .disable = tc6393xb_ohci_disable, }, diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index d792772..de87eaf 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -254,8 +254,10 @@ static int tps65910_irq_init(struct tps65910 *tps65910, int irq, ret = regmap_add_irq_chip(tps65910->regmap, tps65910->chip_irq, IRQF_ONESHOT, pdata->irq_base, tps6591x_irqs_chip, &tps65910->irq_data); - if (ret < 0) + if (ret < 0) { dev_warn(tps65910->dev, "Failed to add irq_chip %d\n", ret); + tps65910->chip_irq = 0; + } return ret; } diff --git a/drivers/mfd/viperboard.c b/drivers/mfd/viperboard.c index af2a670..7bf6dd9 100644 --- a/drivers/mfd/viperboard.c +++ b/drivers/mfd/viperboard.c @@ -93,8 +93,9 @@ static int vprbrd_probe(struct usb_interface *interface, version >> 8, version & 0xff, vb->usb_dev->bus->busnum, vb->usb_dev->devnum); - ret = mfd_add_devices(&interface->dev, -1, vprbrd_devs, - ARRAY_SIZE(vprbrd_devs), NULL, 0, NULL); + ret = mfd_add_devices(&interface->dev, PLATFORM_DEVID_AUTO, + vprbrd_devs, ARRAY_SIZE(vprbrd_devs), NULL, 0, + NULL); if (ret != 0) { dev_err(&interface->dev, "Failed to add mfd devices to core."); goto error; diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index cd2033c..72b6823 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -71,7 +71,7 @@ static int mei_cl_device_probe(struct device *dev) dev_dbg(dev, "Device probe\n"); - strncpy(id.name, dev_name(dev), sizeof(id.name)); + strlcpy(id.name, dev_name(dev), sizeof(id.name)); return driver->probe(device, &id); } diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index b66cec9..e9ea08d 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -74,23 +74,69 @@ int mei_me_cl_by_id(struct mei_device *dev, u8 client_id) /** - * mei_io_list_flush - removes list entry belonging to cl. + * mei_cl_cmp_id - tells if the clients are the same * - * @list: An instance of our list structure - * @cl: host client + * @cl1: host client 1 + * @cl2: host client 2 + * + * returns true - if the clients has same host and me ids + * false - otherwise + */ +static inline bool mei_cl_cmp_id(const struct mei_cl *cl1, + const struct mei_cl *cl2) +{ + return cl1 && cl2 && + (cl1->host_client_id == cl2->host_client_id) && + (cl1->me_client_id == cl2->me_client_id); +} + +/** + * mei_io_list_flush - removes cbs belonging to cl. + * + * @list: an instance of our list structure + * @cl: host client, can be NULL for flushing the whole list + * @free: whether to free the cbs */ -void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl) +static void __mei_io_list_flush(struct mei_cl_cb *list, + struct mei_cl *cl, bool free) { struct mei_cl_cb *cb; struct mei_cl_cb *next; + /* enable removing everything if no cl is specified */ list_for_each_entry_safe(cb, next, &list->list, list) { - if (cb->cl && mei_cl_cmp_id(cl, cb->cl)) + if (!cl || (cb->cl && mei_cl_cmp_id(cl, cb->cl))) { list_del(&cb->list); + if (free) + mei_io_cb_free(cb); + } } } /** + * mei_io_list_flush - removes list entry belonging to cl. + * + * @list: An instance of our list structure + * @cl: host client + */ +static inline void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl) +{ + __mei_io_list_flush(list, cl, false); +} + + +/** + * mei_io_list_free - removes cb belonging to cl and free them + * + * @list: An instance of our list structure + * @cl: host client + */ +static inline void mei_io_list_free(struct mei_cl_cb *list, struct mei_cl *cl) +{ + __mei_io_list_flush(list, cl, true); +} + +/** * mei_io_cb_free - free mei_cb_private related memory * * @cb: mei callback struct @@ -192,8 +238,8 @@ int mei_cl_flush_queues(struct mei_cl *cl) dev_dbg(&cl->dev->pdev->dev, "remove list entry belonging to cl\n"); mei_io_list_flush(&cl->dev->read_list, cl); - mei_io_list_flush(&cl->dev->write_list, cl); - mei_io_list_flush(&cl->dev->write_waiting_list, cl); + mei_io_list_free(&cl->dev->write_list, cl); + mei_io_list_free(&cl->dev->write_waiting_list, cl); mei_io_list_flush(&cl->dev->ctrl_wr_list, cl); mei_io_list_flush(&cl->dev->ctrl_rd_list, cl); mei_io_list_flush(&cl->dev->amthif_cmd_list, cl); @@ -405,6 +451,7 @@ int mei_cl_disconnect(struct mei_cl *cl) dev_err(&dev->pdev->dev, "failed to disconnect.\n"); goto free; } + cl->timer_count = MEI_CONNECT_TIMEOUT; mdelay(10); /* Wait for hardware disconnection ready */ list_add_tail(&cb->list, &dev->ctrl_rd_list.list); } else { @@ -916,20 +963,8 @@ void mei_cl_all_wakeup(struct mei_device *dev) */ void mei_cl_all_write_clear(struct mei_device *dev) { - struct mei_cl_cb *cb, *next; - struct list_head *list; - - list = &dev->write_list.list; - list_for_each_entry_safe(cb, next, list, list) { - list_del(&cb->list); - mei_io_cb_free(cb); - } - - list = &dev->write_waiting_list.list; - list_for_each_entry_safe(cb, next, list, list) { - list_del(&cb->list); - mei_io_cb_free(cb); - } + mei_io_list_free(&dev->write_list, NULL); + mei_io_list_free(&dev->write_waiting_list, NULL); } diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 892cc42..5d75ab5 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -45,8 +45,6 @@ static inline void mei_io_list_init(struct mei_cl_cb *list) { INIT_LIST_HEAD(&list->list); } -void mei_io_list_flush(struct mei_cl_cb *list, struct mei_cl *cl); - /* * MEI Host Client Functions */ @@ -61,22 +59,6 @@ int mei_cl_unlink(struct mei_cl *cl); int mei_cl_flush_queues(struct mei_cl *cl); struct mei_cl_cb *mei_cl_find_read_cb(struct mei_cl *cl); -/** - * mei_cl_cmp_id - tells if file private data have same id - * - * @fe1: private data of 1. file object - * @fe2: private data of 2. file object - * - * returns true - if ids are the same and not NULL - */ -static inline bool mei_cl_cmp_id(const struct mei_cl *cl1, - const struct mei_cl *cl2) -{ - return cl1 && cl2 && - (cl1->host_client_id == cl2->host_client_id) && - (cl1->me_client_id == cl2->me_client_id); -} - int mei_cl_flow_ctrl_creds(struct mei_cl *cl); diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 66f411a..cabc043 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -115,6 +115,11 @@ #define MEI_DEV_ID_LPT_HR 0x8CBA /* Lynx Point H Refresh */ #define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */ + +/* Host Firmware Status Registers in PCI Config Space */ +#define PCI_CFG_HFS_1 0x40 +#define PCI_CFG_HFS_2 0x48 + /* * MEI HW Section */ diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 3412adc..e513354 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -164,6 +164,9 @@ static void mei_me_hw_reset_release(struct mei_device *dev) hcsr |= H_IG; hcsr &= ~H_RST; mei_hcsr_set(hw, hcsr); + + /* complete this write before we set host ready on another CPU */ + mmiowb(); } /** * mei_me_hw_reset - resets fw via mei csr register. @@ -183,9 +186,22 @@ static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) else hcsr &= ~H_IE; + dev->recvd_hw_ready = false; mei_me_reg_write(hw, H_CSR, hcsr); - if (dev->dev_state == MEI_DEV_POWER_DOWN) + /* + * Host reads the H_CSR once to ensure that the + * posted write to H_CSR completes. + */ + hcsr = mei_hcsr_read(hw); + + if ((hcsr & H_RST) == 0) + dev_warn(&dev->pdev->dev, "H_RST is not set = 0x%08X", hcsr); + + if ((hcsr & H_RDY) == H_RDY) + dev_warn(&dev->pdev->dev, "H_RDY is not cleared 0x%08X", hcsr); + + if (intr_enable == false) mei_me_hw_reset_release(dev); return 0; @@ -201,6 +217,7 @@ static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) static void mei_me_host_set_ready(struct mei_device *dev) { struct mei_me_hw *hw = to_me_hw(dev); + hw->host_hw_state = mei_hcsr_read(hw); hw->host_hw_state |= H_IE | H_IG | H_RDY; mei_hcsr_set(hw, hw->host_hw_state); } @@ -233,10 +250,7 @@ static bool mei_me_hw_is_ready(struct mei_device *dev) static int mei_me_hw_ready_wait(struct mei_device *dev) { int err; - if (mei_me_hw_is_ready(dev)) - return 0; - dev->recvd_hw_ready = false; mutex_unlock(&dev->device_lock); err = wait_event_interruptible_timeout(dev->wait_hw_ready, dev->recvd_hw_ready, @@ -496,19 +510,15 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) /* check if we need to start the dev */ if (!mei_host_is_ready(dev)) { if (mei_hw_is_ready(dev)) { + mei_me_hw_reset_release(dev); dev_dbg(&dev->pdev->dev, "we need to start the dev.\n"); dev->recvd_hw_ready = true; wake_up_interruptible(&dev->wait_hw_ready); - - mutex_unlock(&dev->device_lock); - return IRQ_HANDLED; } else { - dev_dbg(&dev->pdev->dev, "Reset Completed.\n"); - mei_me_hw_reset_release(dev); - mutex_unlock(&dev->device_lock); - return IRQ_HANDLED; + dev_dbg(&dev->pdev->dev, "Spurious Interrupt\n"); } + goto end; } /* check slots available for reading */ slots = mei_count_full_read_slots(dev); diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index 1b922e9..41b4b48 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -420,8 +420,7 @@ int mei_irq_write_handler(struct mei_device *dev, struct mei_cl_cb *cmpl_list) cl->status = 0; list_del(&cb->list); - if (MEI_WRITING == cl->writing_state && - cb->fop_type == MEI_FOP_WRITE && + if (cb->fop_type == MEI_FOP_WRITE && cl != &dev->iamthif_cl) { dev_dbg(&dev->pdev->dev, "MEI WRITE COMPLETE\n"); cl->writing_state = MEI_WRITE_COMPLETE; diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index cabeddd..9558bef 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -648,8 +648,7 @@ static unsigned int mei_poll(struct file *file, poll_table *wait) goto out; } - if (MEI_WRITE_COMPLETE == cl->writing_state) - mask |= (POLLIN | POLLRDNORM); + mask |= (POLLIN | POLLRDNORM); out: mutex_unlock(&dev->device_lock); diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c index 994ca4a..4b7ea3f 100644 --- a/drivers/misc/mei/nfc.c +++ b/drivers/misc/mei/nfc.c @@ -342,9 +342,10 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length) ndev = (struct mei_nfc_dev *) cldev->priv_data; dev = ndev->cl->dev; + err = -ENOMEM; mei_buf = kzalloc(length + MEI_NFC_HEADER_SIZE, GFP_KERNEL); if (!mei_buf) - return -ENOMEM; + goto out; hdr = (struct mei_nfc_hci_hdr *) mei_buf; hdr->cmd = MEI_NFC_CMD_HCI_SEND; @@ -354,12 +355,9 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length) hdr->data_size = length; memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length); - err = __mei_cl_send(ndev->cl, mei_buf, length + MEI_NFC_HEADER_SIZE); if (err < 0) - return err; - - kfree(mei_buf); + goto out; if (!wait_event_interruptible_timeout(ndev->send_wq, ndev->recv_req_id == ndev->req_id, HZ)) { @@ -368,7 +366,8 @@ static int mei_nfc_send(struct mei_cl_device *cldev, u8 *buf, size_t length) } else { ndev->req_id++; } - +out: + kfree(mei_buf); return err; } diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index e637318..20fb058 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -100,15 +100,31 @@ static bool mei_me_quirk_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { u32 reg; - if (ent->device == MEI_DEV_ID_PBG_1) { - pci_read_config_dword(pdev, 0x48, ®); - /* make sure that bit 9 is up and bit 10 is down */ - if ((reg & 0x600) == 0x200) { - dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n"); - return false; - } + /* Cougar Point || Patsburg */ + if (ent->device == MEI_DEV_ID_CPT_1 || + ent->device == MEI_DEV_ID_PBG_1) { + pci_read_config_dword(pdev, PCI_CFG_HFS_2, ®); + /* make sure that bit 9 (NM) is up and bit 10 (DM) is down */ + if ((reg & 0x600) == 0x200) + goto no_mei; } + + /* Lynx Point */ + if (ent->device == MEI_DEV_ID_LPT_H || + ent->device == MEI_DEV_ID_LPT_W || + ent->device == MEI_DEV_ID_LPT_HR) { + /* Read ME FW Status check for SPS Firmware */ + pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + /* if bits [19:16] = 15, running SPS Firmware */ + if ((reg & 0xf0000) == 0xf0000) + goto no_mei; + } + return true; + +no_mei: + dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n"); + return false; } /** * mei_probe - Device Initialization Routine diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 3241cb7..f25ed05 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -263,7 +263,7 @@ static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr, int ret; struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); - ret = snprintf(buf, PAGE_SIZE, "%d", + ret = snprintf(buf, PAGE_SIZE, "%d\n", get_disk_ro(dev_to_disk(dev)) ^ md->read_only); mmc_blk_put(md); diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 375a880e..2985efd 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -246,6 +246,9 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, case MMC_RSP_R1: rsp_type = SD_RSP_TYPE_R1; break; + case MMC_RSP_R1 & ~MMC_RSP_CRC: + rsp_type = SD_RSP_TYPE_R1 | SD_NO_CHECK_CRC7; + break; case MMC_RSP_R1B: rsp_type = SD_RSP_TYPE_R1b; break; @@ -339,6 +342,13 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host, } if (rsp_type == SD_RSP_TYPE_R2) { + /* + * The controller offloads the last byte {CRC-7, end bit 1'b1} + * of response type R2. Assign dummy CRC, 0, and end bit to the + * byte(ptr[16], goes into the LSB of resp[3] later). + */ + ptr[16] = 1; + for (i = 0; i < 4; i++) { cmd->resp[i] = get_unaligned_be32(ptr + 1 + i * 4); dev_dbg(sdmmc_dev(host), "cmd->resp[%d] = 0x%08x\n", diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c index 85472d3..d002eb9 100644 --- a/drivers/mmc/host/sdhci-bcm-kona.c +++ b/drivers/mmc/host/sdhci-bcm-kona.c @@ -314,7 +314,7 @@ err_pltfm_free: return ret; } -static int __exit sdhci_bcm_kona_remove(struct platform_device *pdev) +static int sdhci_bcm_kona_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); int dead; diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 27ae563..b2a4c22 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -37,6 +37,13 @@ #define PCI_DEVICE_ID_INTEL_BYT_SDIO 0x0f15 #define PCI_DEVICE_ID_INTEL_BYT_SD 0x0f16 #define PCI_DEVICE_ID_INTEL_BYT_EMMC2 0x0f50 +#define PCI_DEVICE_ID_INTEL_MRFL_MMC 0x1190 +#define PCI_DEVICE_ID_INTEL_CLV_SDIO0 0x08f9 +#define PCI_DEVICE_ID_INTEL_CLV_SDIO1 0x08fa +#define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb +#define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5 +#define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6 +#define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7 /* * PCI registers @@ -169,6 +176,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; +static const struct sdhci_pci_fixes sdhci_intel_qrk = { + .quirks = SDHCI_QUIRK_NO_HISPD_BIT, +}; + static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot) { slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; @@ -359,6 +370,28 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { .own_cd_for_runtime_pm = true, }; +/* Define Host controllers for Intel Merrifield platform */ +#define INTEL_MRFL_EMMC_0 0 +#define INTEL_MRFL_EMMC_1 1 + +static int intel_mrfl_mmc_probe_slot(struct sdhci_pci_slot *slot) +{ + if ((PCI_FUNC(slot->chip->pdev->devfn) != INTEL_MRFL_EMMC_0) && + (PCI_FUNC(slot->chip->pdev->devfn) != INTEL_MRFL_EMMC_1)) + /* SD support is not ready yet */ + return -ENODEV; + + slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | + MMC_CAP_1_8V_DDR; + + return 0; +} + +static const struct sdhci_pci_fixes sdhci_intel_mrfl_mmc = { + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .probe_slot = intel_mrfl_mmc_probe_slot, +}; + /* O2Micro extra registers */ #define O2_SD_LOCK_WP 0xD3 #define O2_SD_MULTI_VCC3V 0xEE @@ -832,6 +865,14 @@ static const struct pci_device_id pci_ids[] = { { .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_QRK_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_qrk, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_MRST_SD0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, @@ -942,6 +983,54 @@ static const struct pci_device_id pci_ids[] = { .driver_data = (kernel_ulong_t)&sdhci_intel_byt_emmc, }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CLV_SDIO0, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sd, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CLV_SDIO1, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CLV_SDIO2, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_sdio, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CLV_EMMC0, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_CLV_EMMC1, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mfd_emmc, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_MRFL_MMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_mrfl_mmc, + }, { .vendor = PCI_VENDOR_ID_O2, .device = PCI_DEVICE_ID_O2_8120, diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 7efa4d1..7a0baef 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1349,6 +1349,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) sdhci_runtime_pm_get(host); + present = mmc_gpio_get_cd(host->mmc); + spin_lock_irqsave(&host->lock, flags); WARN_ON(host->mrq != NULL); @@ -1377,7 +1379,6 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) * zero: cd-gpio is used, and card is removed * one: cd-gpio is used, and card is present */ - present = mmc_gpio_get_cd(host->mmc); if (present < 0) { /* If polling, assume that the card is always present. */ if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) @@ -2089,15 +2090,18 @@ static void sdhci_card_event(struct mmc_host *mmc) { struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; + int present; /* First check if client has provided their own card event */ if (host->ops->card_event) host->ops->card_event(host); + present = sdhci_do_get_cd(host); + spin_lock_irqsave(&host->lock, flags); /* Check host->mrq first in case we are runtime suspended */ - if (host->mrq && !sdhci_do_get_cd(host)) { + if (host->mrq && !present) { pr_err("%s: Card removed during transfer!\n", mmc_hostname(host->mmc)); pr_err("%s: Resetting controller.\n", @@ -2536,7 +2540,7 @@ out: /* * We have to delay this as it calls back into the driver. */ - if (cardint) + if (cardint && host->mmc->sdio_irqs) mmc_signal_sdio_irq(host->mmc); return result; diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c index d1dd6a3..3059a7a 100644 --- a/drivers/mtd/devices/elm.c +++ b/drivers/mtd/devices/elm.c @@ -428,6 +428,7 @@ static int elm_context_save(struct elm_info *info) ELM_SYNDROME_FRAGMENT_1 + offset); regs->elm_syndrome_fragment_0[i] = elm_read_reg(info, ELM_SYNDROME_FRAGMENT_0 + offset); + break; default: return -EINVAL; } @@ -466,6 +467,7 @@ static int elm_context_restore(struct elm_info *info) regs->elm_syndrome_fragment_1[i]); elm_write_reg(info, ELM_SYNDROME_FRAGMENT_0 + offset, regs->elm_syndrome_fragment_0[i]); + break; default: return -EINVAL; } diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index 19d6372..71e4f6c 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1075,7 +1075,6 @@ static void ftl_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) return; } - ftl_freepart(partition); kfree(partition); } diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 9be0792..1de054a 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -1249,6 +1249,7 @@ static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev, goto err; } + nand_chip->options |= NAND_NO_SUBPAGE_WRITE; nand_chip->ecc.read_page = atmel_nand_pmecc_read_page; nand_chip->ecc.write_page = atmel_nand_pmecc_write_page; diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c index 5211515..2e1d16b 100644 --- a/drivers/mtd/nand/nuc900_nand.c +++ b/drivers/mtd/nand/nuc900_nand.c @@ -225,7 +225,7 @@ static void nuc900_nand_enable(struct nuc900_nand *nand) val = __raw_readl(nand->reg + REG_FMICSR); if (!(val & NAND_EN)) - __raw_writel(val | NAND_EN, REG_FMICSR); + __raw_writel(val | NAND_EN, nand->reg + REG_FMICSR); val = __raw_readl(nand->reg + REG_SMCSR); diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 4ecf0e5..8546628 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -948,7 +948,7 @@ static int omap_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u32 val; val = readl(info->reg.gpmc_ecc_config); - if (((val >> ECC_CONFIG_CS_SHIFT) & ~CS_MASK) != info->gpmc_cs) + if (((val >> ECC_CONFIG_CS_SHIFT) & CS_MASK) != info->gpmc_cs) return -EINVAL; /* read ecc result */ @@ -1463,7 +1463,7 @@ static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, /* Check if any error reported */ if (!is_error_reported) - return 0; + return stat; /* Decode BCH error using ELM module */ elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec); diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 4b8e895..cf49c22 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -59,15 +59,12 @@ static struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl) struct attribute_group *attr_group; struct attribute **attributes; struct sm_sysfs_attribute *vendor_attribute; + char *vendor; - int vendor_len = strnlen(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, - SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET); - - char *vendor = kmalloc(vendor_len, GFP_KERNEL); + vendor = kstrndup(ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, + SM_SMALL_PAGE - SM_CIS_VENDOR_OFFSET, GFP_KERNEL); if (!vendor) goto error1; - memcpy(vendor, ftl->cis_buffer + SM_CIS_VENDOR_OFFSET, vendor_len); - vendor[vendor_len] = 0; /* Initialize sysfs attributes */ vendor_attribute = @@ -78,7 +75,7 @@ static struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl) sysfs_attr_init(&vendor_attribute->dev_attr.attr); vendor_attribute->data = vendor; - vendor_attribute->len = vendor_len; + vendor_attribute->len = strlen(vendor); vendor_attribute->dev_attr.attr.name = "vendor"; vendor_attribute->dev_attr.attr.mode = S_IRUGO; vendor_attribute->dev_attr.show = sm_attr_show; diff --git a/drivers/mtd/tests/torturetest.c b/drivers/mtd/tests/torturetest.c index eeab969..b55bc52 100644 --- a/drivers/mtd/tests/torturetest.c +++ b/drivers/mtd/tests/torturetest.c @@ -264,7 +264,9 @@ static int __init tort_init(void) int i; void *patt; - mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt); + err = mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt); + if (err) + goto out; /* Check if the eraseblocks contain only 0xFF bytes */ if (check) { diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index f5aa4b0..85cd77c 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -330,6 +330,7 @@ static int process_pool_aeb(struct ubi_device *ubi, struct ubi_attach_info *ai, av = tmp_av; else { ubi_err("orphaned volume in fastmap pool!"); + kmem_cache_free(ai->aeb_slab_cache, new_aeb); return UBI_BAD_FASTMAP; } diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index ec2c2dc..2a1b6e0 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -133,6 +133,10 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, ubi_assert(!vol->updating && !vol->changing_leb); vol->updating = 1; + vol->upd_buf = vmalloc(ubi->leb_size); + if (!vol->upd_buf) + return -ENOMEM; + err = set_update_marker(ubi, vol); if (err) return err; @@ -152,14 +156,12 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, err = clear_update_marker(ubi, vol, 0); if (err) return err; + + vfree(vol->upd_buf); vol->updating = 0; return 0; } - vol->upd_buf = vmalloc(ubi->leb_size); - if (!vol->upd_buf) - return -ENOMEM; - vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, vol->usable_leb_size); vol->upd_bytes = bytes; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index c95bfb1..49e570a 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1209,7 +1209,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = do_sync_erase(ubi, e1, vol_id, lnum, 0); if (err) { - kmem_cache_free(ubi_wl_entry_slab, e1); if (e2) kmem_cache_free(ubi_wl_entry_slab, e2); goto out_ro; @@ -1223,10 +1222,8 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", e2->pnum, vol_id, lnum); err = do_sync_erase(ubi, e2, vol_id, lnum, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); + if (err) goto out_ro; - } } dbg_wl("done"); @@ -1262,10 +1259,9 @@ out_not_moved: ubi_free_vid_hdr(ubi, vid_hdr); err = do_sync_erase(ubi, e2, vol_id, lnum, torture); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); + if (err) goto out_ro; - } + mutex_unlock(&ubi->move_mutex); return 0; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fc20d1..1ade9f9 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -302,6 +302,7 @@ config MACVLAN config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN + depends on INET help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -373,6 +374,7 @@ config RIONET_RX_SIZE config TUN tristate "Universal TUN/TAP device driver support" + depends on INET select CRC32 ---help--- TUN/TAP provides packet reception and transmission for user space diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5cd2cf1..24e45d0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4647,6 +4647,7 @@ static int __init bonding_init(void) out: return res; err: + bond_destroy_debugfs(); rtnl_link_unregister(&bond_link_ops); err_link: unregister_pernet_subsys(&bond_net_ops); diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index e59c42b..ae14805 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -830,9 +830,6 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) continue; } - if (msg_ctrl_save & IF_MCONT_EOB) - return num_rx_pkts; - if (!(msg_ctrl_save & IF_MCONT_NEWDAT)) continue; diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 539239d..a4694aa 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -385,7 +385,7 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx) BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { - kfree_skb(priv->echo_skb[idx]); + dev_kfree_skb_any(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; } } @@ -643,10 +643,14 @@ static int can_changelink(struct net_device *dev, if (dev->flags & IFF_UP) return -EBUSY; cm = nla_data(data[IFLA_CAN_CTRLMODE]); - if (cm->flags & ~priv->ctrlmode_supported) + + /* check whether changed bits are allowed to be modified */ + if (cm->mask & ~priv->ctrlmode_supported) return -EOPNOTSUPP; + + /* clear bits to be modified and copy the flag values */ priv->ctrlmode &= ~cm->mask; - priv->ctrlmode |= cm->flags; + priv->ctrlmode |= (cm->flags & cm->mask); } if (data[IFLA_CAN_BITTIMING]) { diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 6b6f0ad..7042f5f 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -551,7 +551,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct sja1000_priv *priv; struct peak_pci_chan *chan; - struct net_device *dev; + struct net_device *dev, *prev_dev; void __iomem *cfg_base, *reg_base; u16 sub_sys_id, icr; int i, err, channels; @@ -687,11 +687,13 @@ failure_remove_channels: writew(0x0, cfg_base + PITA_ICR + 2); chan = NULL; - for (dev = pci_get_drvdata(pdev); dev; dev = chan->prev_dev) { - unregister_sja1000dev(dev); - free_sja1000dev(dev); + for (dev = pci_get_drvdata(pdev); dev; dev = prev_dev) { priv = netdev_priv(dev); chan = priv->priv; + prev_dev = chan->prev_dev; + + unregister_sja1000dev(dev); + free_sja1000dev(dev); } /* free any PCIeC resources too */ @@ -725,10 +727,12 @@ static void peak_pci_remove(struct pci_dev *pdev) /* Loop over all registered devices */ while (1) { + struct net_device *prev_dev = chan->prev_dev; + dev_info(&pdev->dev, "removing device %s\n", dev->name); unregister_sja1000dev(dev); free_sja1000dev(dev); - dev = chan->prev_dev; + dev = prev_dev; if (!dev) { /* do that only for first channel */ diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 25377e5..3c28d1f 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -54,6 +54,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/workqueue.h> #include <linux/can.h> #include <linux/can/skb.h> @@ -87,6 +88,7 @@ struct slcan { struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ spinlock_t lock; + struct work_struct tx_work; /* Flushes transmit buffer */ /* These are pointers to the malloc()ed frame buffers. */ unsigned char rbuff[SLC_MTU]; /* receiver buffer */ @@ -311,34 +313,44 @@ static void slc_encaps(struct slcan *sl, struct can_frame *cf) sl->dev->stats.tx_bytes += cf->can_dlc; } -/* - * Called by the driver when there's room for more data. If we have - * more packets to send, we send them here. - */ -static void slcan_write_wakeup(struct tty_struct *tty) +/* Write out any remaining transmit buffer. Scheduled when tty is writable */ +static void slcan_transmit(struct work_struct *work) { + struct slcan *sl = container_of(work, struct slcan, tx_work); int actual; - struct slcan *sl = (struct slcan *) tty->disc_data; + spin_lock_bh(&sl->lock); /* First make sure we're connected. */ - if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) + if (!sl->tty || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) { + spin_unlock_bh(&sl->lock); return; + } - spin_lock(&sl->lock); if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; - clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - spin_unlock(&sl->lock); + clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); + spin_unlock_bh(&sl->lock); netif_wake_queue(sl->dev); return; } - actual = tty->ops->write(tty, sl->xhead, sl->xleft); + actual = sl->tty->ops->write(sl->tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; - spin_unlock(&sl->lock); + spin_unlock_bh(&sl->lock); +} + +/* + * Called by the driver when there's room for more data. + * Schedule the transmit. + */ +static void slcan_write_wakeup(struct tty_struct *tty) +{ + struct slcan *sl = tty->disc_data; + + schedule_work(&sl->tx_work); } /* Send a can_frame to a TTY queue. */ @@ -524,6 +536,7 @@ static struct slcan *slc_alloc(dev_t line) sl->magic = SLCAN_MAGIC; sl->dev = dev; spin_lock_init(&sl->lock); + INIT_WORK(&sl->tx_work, slcan_transmit); slcan_devs[i] = dev; return sl; @@ -622,8 +635,12 @@ static void slcan_close(struct tty_struct *tty) if (!sl || sl->magic != SLCAN_MAGIC || sl->tty != tty) return; + spin_lock_bh(&sl->lock); tty->disc_data = NULL; sl->tty = NULL; + spin_unlock_bh(&sl->lock); + + flush_work(&sl->tx_work); /* Flush network side */ unregister_netdev(sl->dev); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index ac6177d..91654d0 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -1142,6 +1142,7 @@ static void esd_usb2_disconnect(struct usb_interface *intf) } } unlink_all_urbs(dev); + kfree(dev); } } diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index cc3df8a..63fb90b 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1238,6 +1238,9 @@ static int kvaser_usb_close(struct net_device *netdev) if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); + /* reset tx contexts */ + kvaser_usb_unlink_tx_urbs(priv); + priv->can.state = CAN_STATE_STOPPED; close_candev(priv->netdev); @@ -1286,12 +1289,14 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (!urb) { netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; - goto nourbmem; + dev_kfree_skb(skb); + return NETDEV_TX_OK; } buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC); if (!buf) { stats->tx_dropped++; + dev_kfree_skb(skb); goto nobufmem; } @@ -1326,6 +1331,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, } } + /* This should never happen; it implies a flow control bug */ if (!context) { netdev_warn(netdev, "cannot find free context\n"); ret = NETDEV_TX_BUSY; @@ -1356,9 +1362,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (unlikely(err)) { can_free_echo_skb(netdev, context->echo_index); - skb = NULL; /* set to NULL to avoid double free in - * dev_kfree_skb(skb) */ - atomic_dec(&priv->active_tx_urbs); usb_unanchor_urb(urb); @@ -1380,8 +1383,6 @@ releasebuf: kfree(buf); nobufmem: usb_free_urb(urb); -nourbmem: - dev_kfree_skb(skb); return ret; } @@ -1493,6 +1494,10 @@ static int kvaser_usb_init_one(struct usb_interface *intf, struct kvaser_usb_net_priv *priv; int i, err; + err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel); + if (err) + return err; + netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "Cannot alloc candev\n"); @@ -1596,9 +1601,6 @@ static int kvaser_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); - for (i = 0; i < MAX_NET_DEVICES; i++) - kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i); - err = kvaser_usb_get_software_info(dev); if (err) { dev_err(&intf->dev, diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 0b7a4c3..03e7f0c 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -734,7 +734,7 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; - goto lbl_set_intf_data; + goto lbl_free_candev; } dev->udev = usb_dev; @@ -773,7 +773,7 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, err = register_candev(netdev); if (err) { dev_err(&intf->dev, "couldn't register CAN device: %d\n", err); - goto lbl_free_cmd_buf; + goto lbl_restore_intf_data; } if (dev->prev_siblings) @@ -786,14 +786,14 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, if (dev->adapter->dev_init) { err = dev->adapter->dev_init(dev); if (err) - goto lbl_free_cmd_buf; + goto lbl_unregister_candev; } /* set bus off */ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 0); if (err) - goto lbl_free_cmd_buf; + goto lbl_unregister_candev; } /* get device number early */ @@ -805,11 +805,14 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, return 0; -lbl_free_cmd_buf: - kfree(dev->cmd_buf); +lbl_unregister_candev: + unregister_candev(netdev); -lbl_set_intf_data: +lbl_restore_intf_data: usb_set_intfdata(intf, dev->prev_siblings); + kfree(dev->cmd_buf); + +lbl_free_candev: free_candev(netdev); return err; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index 263dd92..f7f796a 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -333,8 +333,6 @@ static int pcan_usb_pro_send_req(struct peak_usb_device *dev, int req_id, if (!(dev->state & PCAN_USB_STATE_CONNECTED)) return 0; - memset(req_addr, '\0', req_size); - req_type = USB_TYPE_VENDOR | USB_RECIP_OTHER; switch (req_id) { @@ -345,6 +343,7 @@ static int pcan_usb_pro_send_req(struct peak_usb_device *dev, int req_id, default: p = usb_rcvctrlpipe(dev->udev, 0); req_type |= USB_DIR_IN; + memset(req_addr, '\0', req_size); break; } diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 46dfb13..81576c6 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -726,6 +726,7 @@ static int emac_open(struct net_device *dev) ret = emac_mdio_probe(dev); if (ret < 0) { + free_irq(dev->irq, dev); netdev_err(dev, "cannot probe MDIO bus\n"); return ret; } diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 6305a5d..754ac8e 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -184,15 +184,16 @@ static void alx_schedule_reset(struct alx_priv *alx) schedule_work(&alx->reset_wk); } -static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) +static int alx_clean_rx_irq(struct alx_priv *alx, int budget) { struct alx_rx_queue *rxq = &alx->rxq; struct alx_rrd *rrd; struct alx_buffer *rxb; struct sk_buff *skb; u16 length, rfd_cleaned = 0; + int work = 0; - while (budget > 0) { + while (work < budget) { rrd = &rxq->rrd[rxq->rrd_read_idx]; if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT))) break; @@ -203,7 +204,7 @@ static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) ALX_GET_FIELD(le32_to_cpu(rrd->word0), RRD_NOR) != 1) { alx_schedule_reset(alx); - return 0; + return work; } rxb = &rxq->bufs[rxq->read_idx]; @@ -243,7 +244,7 @@ static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) } napi_gro_receive(&alx->napi, skb); - budget--; + work++; next_pkt: if (++rxq->read_idx == alx->rx_ringsz) @@ -258,21 +259,22 @@ next_pkt: if (rfd_cleaned) alx_refill_rx_ring(alx, GFP_ATOMIC); - return budget > 0; + return work; } static int alx_poll(struct napi_struct *napi, int budget) { struct alx_priv *alx = container_of(napi, struct alx_priv, napi); struct alx_hw *hw = &alx->hw; - bool complete = true; unsigned long flags; + bool tx_complete; + int work; - complete = alx_clean_tx_irq(alx) && - alx_clean_rx_irq(alx, budget); + tx_complete = alx_clean_tx_irq(alx); + work = alx_clean_rx_irq(alx, budget); - if (!complete) - return 1; + if (!tx_complete || work == budget) + return budget; napi_complete(&alx->napi); @@ -284,7 +286,7 @@ static int alx_poll(struct napi_struct *napi, int budget) alx_post_write(hw); - return 0; + return work; } static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index c5e375d..930ced0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -337,6 +337,7 @@ struct sw_tx_bd { u8 flags; /* Set on the first BD descriptor when there is a split BD */ #define BNX2X_TSO_SPLIT_BD (1<<0) +#define BNX2X_HAS_SECOND_PBD (1<<1) }; struct sw_rx_page { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 0399458..c3ba4bf 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -186,6 +186,12 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata, --nbd; bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + if (tx_buf->flags & BNX2X_HAS_SECOND_PBD) { + /* Skip second parse bd... */ + --nbd; + bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + } + /* TSO headers+data bds share a common mapping. See bnx2x_tx_split() */ if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) { tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd; @@ -755,7 +761,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, return; } - bnx2x_frag_free(fp, new_data); + if (new_data) + bnx2x_frag_free(fp, new_data); drop: /* drop the packet and keep the buffer in the bin */ DP(NETIF_MSG_RX_STATUS, @@ -3821,6 +3828,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set encapsulation flag in start BD */ SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_TUNNEL_EXIST, 1); + + tx_buf->flags |= BNX2X_HAS_SECOND_PBD; + nbd++; } else if (xmit_type & XMIT_CSUM) { /* Set PBD in checksum offload case w/o encapsulation */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index e8efa1c9..97fe8e6 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2864,9 +2864,16 @@ static void bnx2x_self_test(struct net_device *dev, memset(buf, 0, sizeof(u64) * BNX2X_NUM_TESTS(bp)); + if (bnx2x_test_nvram(bp) != 0) { + if (!IS_MF(bp)) + buf[4] = 1; + else + buf[0] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } + if (!netif_running(dev)) { - DP(BNX2X_MSG_ETHTOOL, - "Can't perform self-test when interface is down\n"); + DP(BNX2X_MSG_ETHTOOL, "Interface is down\n"); return; } @@ -2928,13 +2935,7 @@ static void bnx2x_self_test(struct net_device *dev, /* wait until link state is restored */ bnx2x_wait_for_link(bp, link_up, is_serdes); } - if (bnx2x_test_nvram(bp) != 0) { - if (!IS_MF(bp)) - buf[4] = 1; - else - buf[0] = 1; - etest->flags |= ETH_TEST_FL_FAILED; - } + if (bnx2x_test_intr(bp) != 0) { if (!IS_MF(bp)) buf[5] = 1; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 9fbeee5..32c92ab 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -1217,9 +1217,6 @@ static void bnx2x_set_one_vlan_mac_e1h(struct bnx2x *bp, ETH_VLAN_FILTER_CLASSIFY, config); } -#define list_next_entry(pos, member) \ - list_entry((pos)->member.next, typeof(*(pos)), member) - /** * bnx2x_vlan_mac_restore - reconfigure next MAC/VLAN/VLAN-MAC element * diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index aae7ba6..98ded21 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -337,6 +337,11 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5762)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5725)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5727)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57764)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57767)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57787)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57782)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57786)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, @@ -6893,7 +6898,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) skb->protocol = eth_type_trans(skb, tp->dev); if (len > (tp->dev->mtu + ETH_HLEN) && - skb->protocol != htons(ETH_P_8021Q)) { + skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD)) { dev_kfree_skb(skb); goto drop_it_no_recycle; } @@ -7885,8 +7891,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) entry = tnapi->tx_prod; base_flags = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) - base_flags |= TXD_FLAG_TCPUDP_CSUM; mss = skb_shinfo(skb)->gso_size; if (mss) { @@ -7902,6 +7906,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN; + /* HW/FW can not correctly segment packets that have been + * vlan encapsulated. + */ + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) + return tg3_tso_bug(tp, skb); + if (!skb_is_gso_v6(skb)) { iph->check = 0; iph->tot_len = htons(mss + hdr_len); @@ -7948,6 +7959,17 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) base_flags |= tsflags << 12; } } + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* HW/FW can not correctly checksum packets that have been + * vlan encapsulated. + */ + if (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) { + if (skb_checksum_help(skb)) + goto drop; + } else { + base_flags |= TXD_FLAG_TCPUDP_CSUM; + } } if (tg3_flag(tp, USE_JUMBO_BDFLAG) && @@ -8501,7 +8523,8 @@ static int tg3_init_rings(struct tg3 *tp) if (tnapi->rx_rcb) memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); - if (tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { + if (tnapi->prodring.rx_std && + tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { tg3_free_rings(tp); return -ENOMEM; } @@ -12197,7 +12220,9 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e if (tg3_flag(tp, MAX_RXPEND_64) && tp->rx_pending > 63) tp->rx_pending = 63; - tp->rx_jumbo_pending = ering->rx_jumbo_pending; + + if (tg3_flag(tp, JUMBO_RING_ENABLE)) + tp->rx_jumbo_pending = ering->rx_jumbo_pending; for (i = 0; i < tp->irq_max; i++) tp->napi[i].tx_pending = ering->tx_pending; @@ -15758,9 +15783,12 @@ static void tg3_detect_asic_rev(struct tg3 *tp, u32 misc_ctrl_reg) tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5719 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5720 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57767 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57764 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5762 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5725 || - tp->pdev->device == TG3PCI_DEVICE_TIGON3_5727) + tp->pdev->device == TG3PCI_DEVICE_TIGON3_5727 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57787) reg = TG3PCI_GEN2_PRODID_ASICREV; else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_57781 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_57785 || @@ -17411,9 +17439,12 @@ static int tg3_init_one(struct pci_dev *pdev, tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5719 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5720 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57767 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57764 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5762 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5725 || - tp->pdev->device == TG3PCI_DEVICE_TIGON3_5727) { + tp->pdev->device == TG3PCI_DEVICE_TIGON3_5727 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_57787) { tg3_flag_set(tp, ENABLE_APE); tp->aperegs = pci_ioremap_bar(pdev, BAR_2); if (!tp->aperegs) { @@ -17541,23 +17572,6 @@ static int tg3_init_one(struct pci_dev *pdev, goto err_out_apeunmap; } - /* - * Reset chip in case UNDI or EFI driver did not shutdown - * DMA self test will enable WDMAC and we'll see (spurious) - * pending DMA on the PCI bus at that point. - */ - if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || - (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { - tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); - tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - } - - err = tg3_test_dma(tp); - if (err) { - dev_err(&pdev->dev, "DMA engine test failed, aborting\n"); - goto err_out_apeunmap; - } - intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW; rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW; sndmbx = MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW; @@ -17602,6 +17616,23 @@ static int tg3_init_one(struct pci_dev *pdev, sndmbx += 0xc; } + /* + * Reset chip in case UNDI or EFI driver did not shutdown + * DMA self test will enable WDMAC and we'll see (spurious) + * pending DMA on the PCI bus at that point. + */ + if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || + (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { + tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); + tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); + } + + err = tg3_test_dma(tp); + if (err) { + dev_err(&pdev->dev, "DMA engine test failed, aborting\n"); + goto err_out_apeunmap; + } + tg3_init_coal(tp); pci_set_drvdata(pdev, dev); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index ac50e7c..cf9917b 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -68,6 +68,9 @@ #define TG3PCI_DEVICE_TIGON3_5762 0x1687 #define TG3PCI_DEVICE_TIGON3_5725 0x1643 #define TG3PCI_DEVICE_TIGON3_5727 0x16f3 +#define TG3PCI_DEVICE_TIGON3_57764 0x1642 +#define TG3PCI_DEVICE_TIGON3_57767 0x1683 +#define TG3PCI_DEVICE_TIGON3_57787 0x1641 /* 0x04 --> 0x2c unused */ #define TG3PCI_SUBVENDOR_ID_BROADCOM PCI_VENDOR_ID_BROADCOM #define TG3PCI_SUBDEVICE_ID_BROADCOM_95700A6 0x1644 diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index b78e69e..45ce6e2 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3300,17 +3300,12 @@ bnad_pci_init(struct bnad *bnad, err = pci_request_regions(pdev, BNAD_NAME); if (err) goto disable_device; - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { *using_dac = true; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) - goto release_regions; - } + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + goto release_regions; *using_dac = false; } pci_set_master(pdev); diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 9257869..b020d1c 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -29,7 +29,6 @@ #include <linux/of_device.h> #include <linux/of_mdio.h> #include <linux/of_net.h> -#include <linux/pinctrl/consumer.h> #include "macb.h" @@ -1755,7 +1754,6 @@ static int __init macb_probe(struct platform_device *pdev) struct phy_device *phydev; u32 config; int err = -ENXIO; - struct pinctrl *pinctrl; const char *mac; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1764,15 +1762,6 @@ static int __init macb_probe(struct platform_device *pdev) goto err_out; } - pinctrl = devm_pinctrl_get_select_default(&pdev->dev); - if (IS_ERR(pinctrl)) { - err = PTR_ERR(pinctrl); - if (err == -EPROBE_DEFER) - goto err_out; - - dev_warn(&pdev->dev, "No pinctrl provided\n"); - } - err = -ENOMEM; dev = alloc_etherdev(sizeof(*bp)); if (!dev) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 7b756cf9..c298239 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1043,10 +1043,14 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, skb->l4_rxhash = true; } - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) { - skb->csum = htons(checksum); - skb->ip_summed = CHECKSUM_COMPLETE; - } + /* Hardware does not provide whole packet checksum. It only + * provides pseudo checksum. Since hw validates the packet + * checksum but not provide us the checksum value. use + * CHECSUM_UNNECESSARY. + */ + if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok && + ipv4_csum_ok) + skb->ip_summed = CHECKSUM_UNNECESSARY; if (vlan_stripped) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc4..5226c99 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2632,7 +2632,7 @@ static int be_open(struct net_device *netdev) for_all_evt_queues(adapter, eqo, i) { napi_enable(&eqo->napi); - be_eq_notify(adapter, eqo->q.id, true, false, 0); + be_eq_notify(adapter, eqo->q.id, true, true, 0); } adapter->flags |= BE_FLAGS_NAPI_ENABLED; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 6c0fd8e..895b086 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -293,6 +293,18 @@ failure: atomic_add(buffers_added, &(pool->available)); } +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. + */ +static void ibmveth_update_rx_no_buffer(struct ibmveth_adapter *adapter) +{ + __be64 *p = adapter->buffer_list_addr + 4096 - 8; + + adapter->rx_no_buffer = be64_to_cpup(p); +} + /* replenish routine */ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) { @@ -308,8 +320,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter) ibmveth_replenish_buffer_pool(adapter, pool); } - adapter->rx_no_buffer = *(u64 *)(((char*)adapter->buffer_list_addr) + - 4096 - 8); + ibmveth_update_rx_no_buffer(adapter); } /* empty and free ana buffer pool - also used to do cleanup in error paths */ @@ -699,8 +710,7 @@ static int ibmveth_close(struct net_device *netdev) free_irq(netdev->irq, netdev); - adapter->rx_no_buffer = *(u64 *)(((char *)adapter->buffer_list_addr) + - 4096 - 8); + ibmveth_update_rx_no_buffer(adapter); ibmveth_cleanup(adapter); diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 26d9cd5..d5775ae 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -83,6 +83,11 @@ struct e1000_adapter; #define E1000_MAX_INTR 10 +/* + * Count for polling __E1000_RESET condition every 10-20msec. + */ +#define E1000_CHECK_RESET_COUNT 50 + /* TX/RX descriptor defines */ #define E1000_DEFAULT_TXD 256 #define E1000_MAX_TXD 256 diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 59ad007..15c85d4 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -494,13 +494,20 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) { set_bit(__E1000_DOWN, &adapter->flags); - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); - cancel_delayed_work_sync(&adapter->watchdog_task); + + /* + * Since the watchdog task can reschedule other tasks, we should cancel + * it first, otherwise we can run into the situation when a work is + * still running after the adapter has been turned down. + */ + cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); + + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1445,6 +1452,10 @@ static int e1000_close(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); @@ -3917,8 +3928,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, " next_to_watch <%x>\n" " jiffies <%lx>\n" " next_to_watch.status <%x>\n", - (unsigned long)((tx_ring - adapter->tx_ring) / - sizeof(struct e1000_tx_ring)), + (unsigned long)(tx_ring - adapter->tx_ring), readl(hw->hw_addr + tx_ring->tdh), readl(hw->hw_addr + tx_ring->tdt), tx_ring->next_to_use, @@ -4969,6 +4979,11 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); if (netif_running(netdev)) { + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); + WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 42f0f67..70e16f7 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1374,7 +1374,7 @@ static void e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index) /* RAR[1-6] are owned by manageability. Skip those and program the * next address into the SHRA register array. */ - if (index < (u32)(hw->mac.rar_entry_count - 6)) { + if (index < (u32)(hw->mac.rar_entry_count)) { s32 ret_val; ret_val = e1000_acquire_swflag_ich8lan(hw); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 217090d..5986569 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -98,7 +98,7 @@ #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 -#define E1000_PCH2_RAR_ENTRIES 11 /* RAR[0-6], SHRA[0-3] */ +#define E1000_PCH2_RAR_ENTRIES 5 /* RAR[0], SHRA[0-3] */ #define E1000_PCH_LPT_RAR_ENTRIES 12 /* RAR[0], SHRA[0-10] */ #define PHY_PAGE_SHIFT 5 diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 9cb400c..07547f6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6563,21 +6563,15 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; pci_using_dac = 0; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; + pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } } @@ -7033,13 +7027,11 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = { }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -#ifdef CONFIG_PM static const struct dev_pm_ops e1000_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume) SET_RUNTIME_PM_OPS(e1000_runtime_suspend, e1000_runtime_resume, e1000_idle) }; -#endif /* PCI Device API Driver */ static struct pci_driver e1000_driver = { @@ -7047,11 +7039,9 @@ static struct pci_driver e1000_driver = { .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = e1000_remove, -#ifdef CONFIG_PM .driver = { .pm = &e1000_pm_ops, }, -#endif .shutdown = e1000_shutdown, .err_handler = &e1000_err_handler }; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 47c2d10..974558e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1403,6 +1403,13 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) s32 ret_val; u16 i, rar_count = mac->rar_entry_count; + if ((hw->mac.type >= e1000_i210) && + !(igb_get_flash_presence_i210(hw))) { + ret_val = igb_pll_workaround_i210(hw); + if (ret_val) + return ret_val; + } + /* Initialize identification LED */ ret_val = igb_id_led_init(hw); if (ret_val) { diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 978eca3..956c4c3 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -46,14 +46,15 @@ /* Extended Device Control */ #define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Defineable Pin 3 */ /* Physical Func Reset Done Indication */ -#define E1000_CTRL_EXT_PFRSTD 0x00004000 -#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 -#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 -#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 -#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 -#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 -#define E1000_CTRL_EXT_EIAME 0x01000000 -#define E1000_CTRL_EXT_IRCA 0x00000001 +#define E1000_CTRL_EXT_PFRSTD 0x00004000 +#define E1000_CTRL_EXT_SDLPE 0X00040000 /* SerDes Low Power Enable */ +#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 +#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 +#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IRCA 0x00000001 /* Interrupt delay cancellation */ /* Driver loaded bit for FW */ #define E1000_CTRL_EXT_DRV_LOAD 0x10000000 @@ -62,6 +63,7 @@ /* packet buffer parity error detection enabled */ /* descriptor FIFO parity error detection enable */ #define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_CTRL_EXT_PHYPDEN 0x00100000 #define E1000_I2CCMD_REG_ADDR_SHIFT 16 #define E1000_I2CCMD_PHY_ADDR_SHIFT 24 #define E1000_I2CCMD_OPCODE_READ 0x08000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 37a9c06..80f20d1 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -569,4 +569,7 @@ extern struct net_device *igb_get_hw_dev(struct e1000_hw *hw); /* These functions must be implemented by drivers */ s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); + +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif /* _E1000_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 0c03933..0217d4e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -835,3 +835,69 @@ s32 igb_init_nvm_params_i210(struct e1000_hw *hw) } return ret_val; } + +/** + * igb_pll_workaround_i210 + * @hw: pointer to the HW structure + * + * Works around an errata in the PLL circuit where it occasionally + * provides the wrong clock frequency after power up. + **/ +s32 igb_pll_workaround_i210(struct e1000_hw *hw) +{ + s32 ret_val; + u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val; + u16 nvm_word, phy_word, pci_word, tmp_nvm; + int i; + + /* Get and set needed register values */ + wuc = rd32(E1000_WUC); + mdicnfg = rd32(E1000_MDICNFG); + reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO; + wr32(E1000_MDICNFG, reg_val); + + /* Get data from NVM, or set default */ + ret_val = igb_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD, + &nvm_word); + if (ret_val) + nvm_word = E1000_INVM_DEFAULT_AL; + tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; + for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { + /* check current state directly from internal PHY */ + igb_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | + E1000_PHY_PLL_FREQ_REG), &phy_word); + if ((phy_word & E1000_PHY_PLL_UNCONF) + != E1000_PHY_PLL_UNCONF) { + ret_val = 0; + break; + } else { + ret_val = -E1000_ERR_PHY; + } + /* directly reset the internal PHY */ + ctrl = rd32(E1000_CTRL); + wr32(E1000_CTRL, ctrl|E1000_CTRL_PHY_RST); + + ctrl_ext = rd32(E1000_CTRL_EXT); + ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE); + wr32(E1000_CTRL_EXT, ctrl_ext); + + wr32(E1000_WUC, 0); + reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16); + wr32(E1000_EEARBC_I210, reg_val); + + igb_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + pci_word |= E1000_PCI_PMCSR_D3; + igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + usleep_range(1000, 2000); + pci_word &= ~E1000_PCI_PMCSR_D3; + igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word); + reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16); + wr32(E1000_EEARBC_I210, reg_val); + + /* restore WUC register */ + wr32(E1000_WUC, wuc); + } + /* restore MDICNFG setting */ + wr32(E1000_MDICNFG, mdicnfg); + return ret_val; +} diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index dde3c4b..99f4611 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -48,6 +48,7 @@ extern s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data); extern s32 igb_init_nvm_params_i210(struct e1000_hw *hw); extern bool igb_get_flash_presence_i210(struct e1000_hw *hw); +s32 igb_pll_workaround_i210(struct e1000_hw *hw); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 @@ -93,4 +94,15 @@ enum E1000_INVM_STRUCTURE_TYPE { #define NVM_LED_1_CFG_DEFAULT_I211 0x0184 #define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C +/* PLL Defines */ +#define E1000_PCI_PMCSR 0x44 +#define E1000_PCI_PMCSR_D3 0x03 +#define E1000_MAX_PLL_TRIES 5 +#define E1000_PHY_PLL_UNCONF 0xFF +#define E1000_PHY_PLL_FREQ_PAGE 0xFC0000 +#define E1000_PHY_PLL_FREQ_REG 0x000E +#define E1000_INVM_DEFAULT_AL 0x202F +#define E1000_INVM_AUTOLOAD 0x0A +#define E1000_INVM_PLL_WO_VAL 0x0010 + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 556da81..ad2b74d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -708,11 +708,6 @@ s32 igb_copper_link_setup_m88(struct e1000_hw *hw) hw_dbg("Error committing the PHY changes\n"); goto out; } - if (phy->type == e1000_phy_i210) { - ret_val = igb_set_master_slave_mode(hw); - if (ret_val) - return ret_val; - } out: return ret_val; @@ -806,6 +801,9 @@ s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw) hw_dbg("Error committing the PHY changes\n"); return ret_val; } + ret_val = igb_set_master_slave_mode(hw); + if (ret_val) + return ret_val; return 0; } diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 82632c6..7156981 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -69,6 +69,7 @@ #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEARBC_I210 0x12024 /* EEPROM Auto Read Bus Control */ #define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ #define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ #define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 151e00c..3eb020c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -771,8 +771,10 @@ static int igb_set_eeprom(struct net_device *netdev, if (eeprom->len == 0) return -EOPNOTSUPP; - if (hw->mac.type == e1000_i211) + if ((hw->mac.type >= e1000_i210) && + !igb_get_flash_presence_i210(hw)) { return -EOPNOTSUPP; + } if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) return -EFAULT; @@ -1659,7 +1661,8 @@ static int igb_setup_loopback_test(struct igb_adapter *adapter) if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP) || + (hw->device_id == E1000_DEV_ID_I354_SGMII)) { /* Enable DH89xxCC MPHY for near end loopback */ reg = rd32(E1000_MPHY_ADDR_CTL); @@ -1725,7 +1728,8 @@ static void igb_loopback_cleanup(struct igb_adapter *adapter) if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || - (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP) || + (hw->device_id == E1000_DEV_ID_I354_SGMII)) { u32 reg; /* Disable near end loopback on DH89xxCC */ @@ -2055,14 +2059,15 @@ static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct igb_adapter *adapter = netdev_priv(netdev); - wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC | - WAKE_PHY; wol->wolopts = 0; if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) return; + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; + /* apply any specific unsupported masks here */ switch (adapter->hw.device_id) { default: diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8cf44f2..02544ce 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -182,6 +182,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); +static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs); #endif #ifdef CONFIG_PM @@ -1586,6 +1587,8 @@ void igb_power_up_link(struct igb_adapter *adapter) igb_power_up_phy_copper(&adapter->hw); else igb_power_up_serdes_link_82575(&adapter->hw); + + igb_setup_link(&adapter->hw); } /** @@ -2034,21 +2037,15 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; pci_using_dac = 0; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; + pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } } @@ -2429,7 +2426,7 @@ err_dma: } #ifdef CONFIG_PCI_IOV -static int igb_disable_sriov(struct pci_dev *pdev) +static int igb_disable_sriov(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); @@ -2470,27 +2467,19 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) int err = 0; int i; - if (!adapter->msix_entries) { + if (!adapter->msix_entries || num_vfs > 7) { err = -EPERM; goto out; } - if (!num_vfs) goto out; - else if (old_vfs && old_vfs == num_vfs) - goto out; - else if (old_vfs && old_vfs != num_vfs) - err = igb_disable_sriov(pdev); - - if (err) - goto out; - - if (num_vfs > 7) { - err = -EPERM; - goto out; - } - adapter->vfs_allocated_count = num_vfs; + if (old_vfs) { + dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n", + old_vfs, max_vfs); + adapter->vfs_allocated_count = old_vfs; + } else + adapter->vfs_allocated_count = num_vfs; adapter->vf_data = kcalloc(adapter->vfs_allocated_count, sizeof(struct vf_data_storage), GFP_KERNEL); @@ -2504,10 +2493,12 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) goto out; } - err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); - if (err) - goto err_out; - + /* only call pci_enable_sriov() if no VFs are allocated already */ + if (!old_vfs) { + err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + if (err) + goto err_out; + } dev_info(&pdev->dev, "%d VFs allocated\n", adapter->vfs_allocated_count); for (i = 0; i < adapter->vfs_allocated_count; i++) @@ -2623,7 +2614,7 @@ static void igb_probe_vfs(struct igb_adapter *adapter) return; pci_sriov_set_totalvfs(pdev, 7); - igb_enable_sriov(pdev, max_vfs); + igb_pci_enable_sriov(pdev, max_vfs); #endif /* CONFIG_PCI_IOV */ } @@ -6918,6 +6909,20 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } } +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; @@ -7281,6 +7286,8 @@ static int igb_sriov_reinit(struct pci_dev *dev) if (netif_running(netdev)) igb_close(netdev); + else + igb_reset(adapter); igb_clear_interrupt_scheme(adapter); diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 93eb7ee..04bf22e 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2343,10 +2343,9 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu) struct igbvf_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { - dev_err(&adapter->pdev->dev, "Invalid MTU setting\n"); + if (new_mtu < 68 || new_mtu > INT_MAX - ETH_HLEN - ETH_FCS_LEN || + max_frame > MAX_JUMBO_FRAME_SIZE) return -EINVAL; - } #define MAX_STD_JUMBO_FRAME_SIZE 9234 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { @@ -2638,21 +2637,15 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return err; pci_using_dac = 0; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; + pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA " - "configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, "No usable DMA " + "configuration, aborting\n"); + goto err_dma; } } @@ -2699,7 +2692,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ei->get_variants) { err = ei->get_variants(adapter); if (err) - goto err_ioremap; + goto err_get_variants; } /* setup adapter struct */ @@ -2796,6 +2789,7 @@ err_hw_init: kfree(adapter->rx_ring); err_sw_init: igbvf_reset_interrupt_capability(adapter); +err_get_variants: iounmap(adapter->hw.hw_addr); err_ioremap: free_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 0ac6b11..4506f8a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -369,11 +369,13 @@ struct ixgbe_q_vector { #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define IXGBE_QV_STATE_IDLE 0 -#define IXGBE_QV_STATE_NAPI 1 /* NAPI owns this QV */ -#define IXGBE_QV_STATE_POLL 2 /* poll owns this QV */ -#define IXGBE_QV_LOCKED (IXGBE_QV_STATE_NAPI | IXGBE_QV_STATE_POLL) -#define IXGBE_QV_STATE_NAPI_YIELD 4 /* NAPI yielded this QV */ -#define IXGBE_QV_STATE_POLL_YIELD 8 /* poll yielded this QV */ +#define IXGBE_QV_STATE_NAPI 1 /* NAPI owns this QV */ +#define IXGBE_QV_STATE_POLL 2 /* poll owns this QV */ +#define IXGBE_QV_STATE_DISABLED 4 /* QV is disabled */ +#define IXGBE_QV_OWNED (IXGBE_QV_STATE_NAPI | IXGBE_QV_STATE_POLL) +#define IXGBE_QV_LOCKED (IXGBE_QV_OWNED | IXGBE_QV_STATE_DISABLED) +#define IXGBE_QV_STATE_NAPI_YIELD 8 /* NAPI yielded this QV */ +#define IXGBE_QV_STATE_POLL_YIELD 16 /* poll yielded this QV */ #define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD) #define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD) spinlock_t lock; @@ -394,7 +396,7 @@ static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector) { int rc = true; - spin_lock(&q_vector->lock); + spin_lock_bh(&q_vector->lock); if (q_vector->state & IXGBE_QV_LOCKED) { WARN_ON(q_vector->state & IXGBE_QV_STATE_NAPI); q_vector->state |= IXGBE_QV_STATE_NAPI_YIELD; @@ -405,7 +407,7 @@ static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector) } else /* we don't care if someone yielded */ q_vector->state = IXGBE_QV_STATE_NAPI; - spin_unlock(&q_vector->lock); + spin_unlock_bh(&q_vector->lock); return rc; } @@ -413,14 +415,15 @@ static inline bool ixgbe_qv_lock_napi(struct ixgbe_q_vector *q_vector) static inline bool ixgbe_qv_unlock_napi(struct ixgbe_q_vector *q_vector) { int rc = false; - spin_lock(&q_vector->lock); + spin_lock_bh(&q_vector->lock); WARN_ON(q_vector->state & (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_NAPI_YIELD)); if (q_vector->state & IXGBE_QV_STATE_POLL_YIELD) rc = true; - q_vector->state = IXGBE_QV_STATE_IDLE; - spin_unlock(&q_vector->lock); + /* will reset state to idle, unless QV is disabled */ + q_vector->state &= IXGBE_QV_STATE_DISABLED; + spin_unlock_bh(&q_vector->lock); return rc; } @@ -451,7 +454,8 @@ static inline bool ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector) if (q_vector->state & IXGBE_QV_STATE_POLL_YIELD) rc = true; - q_vector->state = IXGBE_QV_STATE_IDLE; + /* will reset state to idle, unless QV is disabled */ + q_vector->state &= IXGBE_QV_STATE_DISABLED; spin_unlock_bh(&q_vector->lock); return rc; } @@ -459,9 +463,23 @@ static inline bool ixgbe_qv_unlock_poll(struct ixgbe_q_vector *q_vector) /* true if a socket is polling, even if it did not get the lock */ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) { - WARN_ON(!(q_vector->state & IXGBE_QV_LOCKED)); + WARN_ON(!(q_vector->state & IXGBE_QV_OWNED)); return q_vector->state & IXGBE_QV_USER_PEND; } + +/* false if QV is currently owned */ +static inline bool ixgbe_qv_disable(struct ixgbe_q_vector *q_vector) +{ + int rc = true; + spin_lock_bh(&q_vector->lock); + if (q_vector->state & IXGBE_QV_OWNED) + rc = false; + q_vector->state |= IXGBE_QV_STATE_DISABLED; + spin_unlock_bh(&q_vector->lock); + + return rc; +} + #else /* CONFIG_NET_RX_BUSY_POLL */ static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector) { @@ -491,6 +509,12 @@ static inline bool ixgbe_qv_ll_polling(struct ixgbe_q_vector *q_vector) { return false; } + +static inline bool ixgbe_qv_disable(struct ixgbe_q_vector *q_vector) +{ + return true; +} + #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_IXGBE_HWMON diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index e8649ab..2cd86d3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2212,13 +2212,13 @@ static int ixgbe_set_coalesce(struct net_device *netdev, #if IS_ENABLED(CONFIG_BQL) /* detect ITR changes that require update of TXDCTL.WTHRESH */ - if ((adapter->tx_itr_setting > 1) && + if ((adapter->tx_itr_setting != 1) && (adapter->tx_itr_setting < IXGBE_100K_ITR)) { if ((tx_itr_prev == 1) || - (tx_itr_prev > IXGBE_100K_ITR)) + (tx_itr_prev >= IXGBE_100K_ITR)) need_reset = true; } else { - if ((tx_itr_prev > 1) && + if ((tx_itr_prev != 1) && (tx_itr_prev < IXGBE_100K_ITR)) need_reset = true; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0ade0cd..8a14f96 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3825,14 +3825,6 @@ void ixgbe_set_rx_mode(struct net_device *netdev) if (netdev->flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; vmolr |= IXGBE_VMOLR_MPE; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - hw->mac.ops.update_mc_addr_list(hw, netdev); - vmolr |= IXGBE_VMOLR_ROMPE; } ixgbe_vlan_filter_enable(adapter); hw->addr_ctrl.user_set_promisc = false; @@ -3849,6 +3841,13 @@ void ixgbe_set_rx_mode(struct net_device *netdev) vmolr |= IXGBE_VMOLR_ROPE; } + /* Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + hw->mac.ops.update_mc_addr_list(hw, netdev); + vmolr |= IXGBE_VMOLR_ROMPE; + if (adapter->num_vfs) ixgbe_restore_vf_multicasts(adapter); @@ -3893,15 +3892,13 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) { int q_idx; - local_bh_disable(); /* for ixgbe_qv_lock_napi() */ for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) { napi_disable(&adapter->q_vector[q_idx]->napi); - while (!ixgbe_qv_lock_napi(adapter->q_vector[q_idx])) { + while (!ixgbe_qv_disable(adapter->q_vector[q_idx])) { pr_info("QV %d locked\n", q_idx); - mdelay(1); + usleep_range(1000, 20000); } } - local_bh_enable(); } #ifdef CONFIG_IXGBE_DCB @@ -7490,19 +7487,14 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } pci_using_dac = 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 59a62bb..83544f8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -756,37 +756,12 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) static irqreturn_t ixgbevf_msix_other(int irq, void *data) { struct ixgbevf_adapter *adapter = data; - struct pci_dev *pdev = adapter->pdev; struct ixgbe_hw *hw = &adapter->hw; - u32 msg; - bool got_ack = false; hw->mac.get_link_status = 1; - if (!hw->mbx.ops.check_for_ack(hw)) - got_ack = true; - - if (!hw->mbx.ops.check_for_msg(hw)) { - hw->mbx.ops.read(hw, &msg, 1); - - if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) { - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 1)); - adapter->link_up = false; - } - - if (msg & IXGBE_VT_MSGTYPE_NACK) - dev_info(&pdev->dev, - "Last Request of type %2.2x to PF Nacked\n", - msg & 0xFF); - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFSTS; - } - /* checking for the ack clears the PFACK bit. Place - * it back in the v2p_mailbox cache so that anyone - * polling for an ack will not miss it - */ - if (got_ack) - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; + if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other); @@ -3326,19 +3301,14 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA " - "configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, "No usable DMA " + "configuration, aborting\n"); + goto err_dma; } pci_using_dac = 0; } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 71d9cad..9c66d31 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -88,9 +88,8 @@ #define MVNETA_TX_IN_PRGRS BIT(1) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c -#define MVNETA_SERDES_CFG 0x24A0 +#define MVNETA_SGMII_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 -#define MVNETA_RGMII_SERDES_PROTO 0x0667 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 @@ -173,7 +172,7 @@ /* Various constants */ /* Coalescing */ -#define MVNETA_TXDONE_COAL_PKTS 16 +#define MVNETA_TXDONE_COAL_PKTS 1 #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 @@ -666,6 +665,35 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp, mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } + + +/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */ +static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable) +{ + u32 val; + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + + if (enable) + val |= MVNETA_GMAC2_PORT_RGMII; + else + val &= ~MVNETA_GMAC2_PORT_RGMII; + + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); +} + +/* Config SGMII port */ +static void mvneta_port_sgmii_config(struct mvneta_port *pp) +{ + u32 val; + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val |= MVNETA_GMAC2_PCS_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + + mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); +} + /* Start the Ethernet port RX and TX activity */ static void mvneta_port_up(struct mvneta_port *pp) { @@ -1496,6 +1524,7 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; struct mvneta_tx_desc *tx_desc; struct netdev_queue *nq; + int len = skb->len; int frags = 0; u32 tx_cmd; @@ -1556,7 +1585,7 @@ out: if (frags > 0) { u64_stats_update_begin(&pp->tx_stats.syncp); pp->tx_stats.packets++; - pp->tx_stats.bytes += skb->len; + pp->tx_stats.bytes += len; u64_stats_update_end(&pp->tx_stats.syncp); } else { @@ -2330,7 +2359,7 @@ static void mvneta_adjust_link(struct net_device *ndev) if (phydev->speed == SPEED_1000) val |= MVNETA_GMAC_CONFIG_GMII_SPEED; - else + else if (phydev->speed == SPEED_100) val |= MVNETA_GMAC_CONFIG_MII_SPEED; mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); @@ -2695,15 +2724,12 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); if (phy_mode == PHY_INTERFACE_MODE_SGMII) - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); - else - mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO); + mvneta_port_sgmii_config(pp); - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - - val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII; + mvneta_gmac_rgmii_set(pp, 1); /* Cancel Port Reset */ + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); val &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 3e2d504..d9303d8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -55,7 +55,6 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq->ring = ring; cq->is_tx = mode; - spin_lock_init(&cq->lock); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fa37b7a..35d3821 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1219,15 +1219,11 @@ static void mlx4_en_netpoll(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_cq *cq; - unsigned long flags; int i; for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; - spin_lock_irqsave(&cq->lock, flags); - napi_synchronize(&cq->napi); - mlx4_en_process_rx_cq(dev, cq, 0); - spin_unlock_irqrestore(&cq->lock, flags); + napi_schedule(&cq->napi); } } #endif diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index bf06e36..a47455f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -306,7 +306,6 @@ struct mlx4_en_cq { struct mlx4_cq mcq; struct mlx4_hwq_resources wqres; int ring; - spinlock_t lock; struct net_device *dev; struct napi_struct napi; int size; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index dd68763..cdbe637 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1227,7 +1227,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: - count = get_param_l(&in_param); + count = get_param_l(&in_param) & 0xffffff; align = get_param_h(&in_param); err = __mlx4_qp_reserve_range(dev, count, align, &base); if (err) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 149355b..c155b92 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -872,6 +872,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) return -ENOMEM; dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) { + __free_page(dmatest_page); + return -ENOMEM; + } /* Run a small DMA test. * The magic multipliers to the length tell the firmware @@ -1293,6 +1297,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, int bytes, int watchdog) { struct page *page; + dma_addr_t bus; int idx; #if MYRI10GE_ALLOC_SIZE > 4096 int end_offset; @@ -1317,11 +1322,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, rx->watchdog_needed = 1; return; } + + bus = pci_map_page(mgp->pdev, page, 0, + MYRI10GE_ALLOC_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { + __free_pages(page, MYRI10GE_ALLOC_ORDER); + if (rx->fill_cnt - rx->cnt < 16) + rx->watchdog_needed = 1; + return; + } + rx->page = page; rx->page_offset = 0; - rx->bus = pci_map_page(mgp->pdev, page, 0, - MYRI10GE_ALLOC_SIZE, - PCI_DMA_FROMDEVICE); + rx->bus = bus; + } rx->info[idx].page = rx->page; rx->info[idx].page_offset = rx->page_offset; @@ -2765,6 +2780,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, mb(); } +static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp, + struct myri10ge_tx_buf *tx, int idx) +{ + unsigned int len; + int last_idx; + + /* Free any DMA resources we've alloced and clear out the skb slot */ + last_idx = (idx + 1) & tx->mask; + idx = tx->req & tx->mask; + do { + len = dma_unmap_len(&tx->info[idx], len); + if (len) { + if (tx->info[idx].skb != NULL) + pci_unmap_single(mgp->pdev, + dma_unmap_addr(&tx->info[idx], + bus), len, + PCI_DMA_TODEVICE); + else + pci_unmap_page(mgp->pdev, + dma_unmap_addr(&tx->info[idx], + bus), len, + PCI_DMA_TODEVICE); + dma_unmap_len_set(&tx->info[idx], len, 0); + tx->info[idx].skb = NULL; + } + idx = (idx + 1) & tx->mask; + } while (idx != last_idx); +} + /* * Transmit a packet. We need to split the packet so that a single * segment does not cross myri10ge->tx_boundary, so this makes segment @@ -2788,7 +2832,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, u32 low; __be32 high_swapped; unsigned int len; - int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; + int idx, avail, frag_cnt, frag_idx, count, mss, max_segments; u16 pseudo_hdr_offset, cksum_offset, queue; int cum_len, seglen, boundary, rdma_count; u8 flags, odd_flag; @@ -2885,9 +2929,12 @@ again: /* map the skb for DMA */ len = skb_headlen(skb); + bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) + goto drop; + idx = tx->req & tx->mask; tx->info[idx].skb = skb; - bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); dma_unmap_addr_set(&tx->info[idx], bus, bus); dma_unmap_len_set(&tx->info[idx], len, len); @@ -2986,12 +3033,16 @@ again: break; /* map next fragment for DMA */ - idx = (count + tx->req) & tx->mask; frag = &skb_shinfo(skb)->frags[frag_idx]; frag_idx++; len = skb_frag_size(frag); bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, DMA_TO_DEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { + myri10ge_unmap_tx_dma(mgp, tx, idx); + goto drop; + } + idx = (count + tx->req) & tx->mask; dma_unmap_addr_set(&tx->info[idx], bus, bus); dma_unmap_len_set(&tx->info[idx], len, len); } @@ -3022,31 +3073,8 @@ again: return NETDEV_TX_OK; abort_linearize: - /* Free any DMA resources we've alloced and clear out the skb - * slot so as to not trip up assertions, and to avoid a - * double-free if linearizing fails */ + myri10ge_unmap_tx_dma(mgp, tx, idx); - last_idx = (idx + 1) & tx->mask; - idx = tx->req & tx->mask; - tx->info[idx].skb = NULL; - do { - len = dma_unmap_len(&tx->info[idx], len); - if (len) { - if (tx->info[idx].skb != NULL) - pci_unmap_single(mgp->pdev, - dma_unmap_addr(&tx->info[idx], - bus), len, - PCI_DMA_TODEVICE); - else - pci_unmap_page(mgp->pdev, - dma_unmap_addr(&tx->info[idx], - bus), len, - PCI_DMA_TODEVICE); - dma_unmap_len_set(&tx->info[idx], len, 0); - tx->info[idx].skb = NULL; - } - idx = (idx + 1) & tx->mask; - } while (idx != last_idx); if (skb_is_gso(skb)) { netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); goto drop; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 7692dfd..cc68657 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -1604,13 +1604,13 @@ netxen_process_lro(struct netxen_adapter *adapter, u32 seq_number; u8 vhdr_len = 0; - if (unlikely(ring > adapter->max_rds_rings)) + if (unlikely(ring >= adapter->max_rds_rings)) return NULL; rds_ring = &recv_ctx->rds_rings[ring]; index = netxen_get_lro_sts_refhandle(sts_data0); - if (unlikely(index > rds_ring->num_desc)) + if (unlikely(index >= rds_ring->num_desc)) return NULL; buffer = &rds_ring->rx_buf_arr[index]; diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c index d62d5ce..d677eab 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c @@ -1053,6 +1053,7 @@ static int qlcnic_dcb_peer_app_info(struct net_device *netdev, struct qlcnic_dcb_cee *peer; int i; + memset(info, 0, sizeof(*info)); *app_count = 0; if (!test_bit(__QLCNIC_DCB_STATE, &adapter->state)) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index b57c278..36119b3 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -247,6 +247,27 @@ static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = { }; static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = { + [EDMR] = 0x0000, + [EDTRR] = 0x0004, + [EDRRR] = 0x0008, + [TDLAR] = 0x000c, + [RDLAR] = 0x0010, + [EESR] = 0x0014, + [EESIPR] = 0x0018, + [TRSCER] = 0x001c, + [RMFCR] = 0x0020, + [TFTR] = 0x0024, + [FDR] = 0x0028, + [RMCR] = 0x002c, + [EDOCR] = 0x0030, + [FCFTR] = 0x0034, + [RPADIR] = 0x0038, + [TRIMD] = 0x003c, + [RBWAR] = 0x0040, + [RDFAR] = 0x0044, + [TBRAR] = 0x004c, + [TDFAR] = 0x0050, + [ECMR] = 0x0160, [ECSR] = 0x0164, [ECSIPR] = 0x0168, @@ -483,7 +504,6 @@ static struct sh_eth_cpu_data sh7757_data = { .register_type = SH_ETH_REG_FAST_SH4, .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, - .rmcr_value = 0x00000001, .tx_check = EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO, .eesr_err_check = EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE | @@ -561,7 +581,6 @@ static struct sh_eth_cpu_data sh7757_data_giga = { EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, .fdr_value = 0x0000072f, - .rmcr_value = 0x00000001, .irq_flags = IRQF_SHARED, .apr = 1, @@ -689,7 +708,6 @@ static struct sh_eth_cpu_data r8a7740_data = { EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI, .fdr_value = 0x0000070f, - .rmcr_value = 0x00000001, .apr = 1, .mpr = 1, @@ -738,9 +756,6 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd) if (!cd->fdr_value) cd->fdr_value = DEFAULT_FDR_INIT; - if (!cd->rmcr_value) - cd->rmcr_value = DEFAULT_RMCR_VALUE; - if (!cd->tx_check) cd->tx_check = DEFAULT_TX_CHECK; @@ -1193,8 +1208,8 @@ static int sh_eth_dev_init(struct net_device *ndev, bool start) sh_eth_write(ndev, mdp->cd->fdr_value, FDR); sh_eth_write(ndev, 0, TFTR); - /* Frame recv control */ - sh_eth_write(ndev, mdp->cd->rmcr_value, RMCR); + /* Frame recv control (enable multiple-packets per rx irq) */ + sh_eth_write(ndev, 0x00000001, RMCR); sh_eth_write(ndev, DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2, TRSCER); diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index a0db02c..8cd5ad2 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -321,7 +321,6 @@ enum TD_STS_BIT { #define TD_TFP (TD_TFP1|TD_TFP0) /* RMCR */ -#define DEFAULT_RMCR_VALUE 0x00000000 /* ECMR */ enum FELIC_MODE_BIT { @@ -470,7 +469,6 @@ struct sh_eth_cpu_data { unsigned long fdr_value; unsigned long fcftr_value; unsigned long rpadir_value; - unsigned long rmcr_value; /* interrupt checking mask */ unsigned long tx_check; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 9826594..6508717 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -155,13 +155,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) efx->net_dev->rx_cpu_rmap = NULL; #endif - /* Disable MSI/MSI-X interrupts */ - efx_for_each_channel(channel, efx) - free_irq(channel->irq, &efx->msi_context[channel->channel]); - - /* Disable legacy interrupt */ - if (efx->legacy_irq) + if (EFX_INT_MODE_USE_MSI(efx)) { + /* Disable MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + free_irq(channel->irq, + &efx->msi_context[channel->channel]); + } else { + /* Disable legacy interrupt */ free_irq(efx->legacy_irq, efx); + } } /* Register dump */ diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 3df5684..ade8bdf 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -656,7 +656,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&port->vio.lock, flags); dr = &port->vio.drings[VIO_DRIVER_TX_RING]; - if (unlikely(vnet_tx_dring_avail(dr) < 2)) { + if (unlikely(vnet_tx_dring_avail(dr) < 1)) { if (!netif_queue_stopped(dev)) { netif_stop_queue(dev); @@ -704,7 +704,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_bytes += skb->len; dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); - if (unlikely(vnet_tx_dring_avail(dr) < 2)) { + if (unlikely(vnet_tx_dring_avail(dr) < 1)) { netif_stop_queue(dev); if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) netif_wake_queue(dev); @@ -1083,6 +1083,24 @@ static struct vnet *vnet_find_or_create(const u64 *local_mac) return vp; } +static void vnet_cleanup(void) +{ + struct vnet *vp; + struct net_device *dev; + + mutex_lock(&vnet_list_mutex); + while (!list_empty(&vnet_list)) { + vp = list_first_entry(&vnet_list, struct vnet, list); + list_del(&vp->list); + dev = vp->dev; + /* vio_unregister_driver() should have cleaned up port_list */ + BUG_ON(!list_empty(&vp->port_list)); + unregister_netdev(dev); + free_netdev(dev); + } + mutex_unlock(&vnet_list_mutex); +} + static const char *local_mac_prop = "local-mac-address"; static struct vnet *vnet_find_parent(struct mdesc_handle *hp, @@ -1240,7 +1258,6 @@ static int vnet_port_remove(struct vio_dev *vdev) kfree(port); - unregister_netdev(vp->dev); } return 0; } @@ -1268,6 +1285,7 @@ static int __init vnet_init(void) static void __exit vnet_exit(void) { vio_unregister_driver(&vnet_port_driver); + vnet_cleanup(); } module_init(vnet_init); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index cc3ce55..07cd14d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -546,6 +546,12 @@ static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num) static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + int vid; + + if (priv->data.dual_emac) + vid = priv->slaves[priv->emac_port].port_vlan; + else + vid = priv->data.default_vlan; if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ @@ -554,7 +560,8 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } /* Clear all mcast from ALE */ - cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port); + cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port, + vid); if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; @@ -639,6 +646,14 @@ void cpsw_rx_handler(void *token, int len, int status) static irqreturn_t cpsw_interrupt(int irq, void *dev_id) { struct cpsw_priv *priv = dev_id; + int value = irq - priv->irqs_table[0]; + + /* NOTICE: Ending IRQ here. The trick with the 'value' variable above + * is to make sure we will always write the correct value to the EOI + * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2 + * for TX Interrupt and 3 for MISC Interrupt. + */ + cpdma_ctlr_eoi(priv->dma, value); cpsw_intr_disable(priv); if (priv->irq_enabled == true) { @@ -668,8 +683,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) int num_tx, num_rx; num_tx = cpdma_chan_process(priv->txch, 128); - if (num_tx) - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); num_rx = cpdma_chan_process(priv->rxch, budget); if (num_rx < budget) { @@ -677,7 +690,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) napi_complete(napi); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; @@ -1165,8 +1177,6 @@ static int cpsw_ndo_open(struct net_device *ndev) napi_enable(&priv->napi); cpdma_ctlr_start(priv->dma); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); if (priv->data.dual_emac) priv->slaves[priv->emac_port].open_stat = true; @@ -1416,9 +1426,6 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) cpdma_chan_start(priv->txch); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) @@ -1464,9 +1471,6 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) cpsw_interrupt(ndev->irq, priv); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } #endif @@ -1797,6 +1801,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); + if (!mdio) { + pr_err("Missing mdio platform device\n"); + return -EINVAL; + } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 7fa60d6..f7acf76 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -236,7 +236,7 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry, cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); } -int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask) +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid) { u32 ale_entry[ALE_ENTRY_WORDS]; int ret, idx; @@ -247,6 +247,14 @@ int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask) if (ret != ALE_TYPE_ADDR && ret != ALE_TYPE_VLAN_ADDR) continue; + /* if vid passed is -1 then remove all multicast entry from + * the table irrespective of vlan id, if a valid vlan id is + * passed then remove only multicast added to that vlan id. + * if vlan id doesn't match then move on to next entry. + */ + if (vid != -1 && cpsw_ale_get_vlan_id(ale_entry) != vid) + continue; + if (cpsw_ale_get_mcast(ale_entry)) { u8 addr[6]; diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index 30daa12..20c7976 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -86,7 +86,7 @@ void cpsw_ale_stop(struct cpsw_ale *ale); int cpsw_ale_set_ageout(struct cpsw_ale *ale, int ageout); int cpsw_ale_flush(struct cpsw_ale *ale, int port_mask); -int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask); +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid); int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags, u16 vid); int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f813572..616b4e1 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -138,6 +138,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct hv_netvsc_packet *packet; int ret; unsigned int i, num_pages, npg_data; + u32 skb_length = skb->len; /* Add multipages for skb->data and additional 2 for RNDIS */ npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1) @@ -208,7 +209,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ret = rndis_filter_send(net_device_ctx->device_ctx, packet); if (ret == 0) { - net->stats.tx_bytes += skb->len; + net->stats.tx_bytes += skb_length; net->stats.tx_packets++; } else { kfree(packet); diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c index bf0d55e..6adbef8 100644 --- a/drivers/net/ieee802154/fakehard.c +++ b/drivers/net/ieee802154/fakehard.c @@ -376,17 +376,20 @@ static int ieee802154fake_probe(struct platform_device *pdev) err = wpan_phy_register(phy); if (err) - goto out; + goto err_phy_reg; err = register_netdev(dev); - if (err < 0) - goto out; + if (err) + goto err_netdev_reg; dev_info(&pdev->dev, "Added ieee802154 HardMAC hardware\n"); return 0; -out: - unregister_netdev(dev); +err_netdev_reg: + wpan_phy_unregister(phy); +err_phy_reg: + free_netdev(dev); + wpan_phy_free(phy); return err; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 9bf46bd..1124ea0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -263,11 +263,9 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) const struct macvlan_dev *vlan = netdev_priv(dev); const struct macvlan_port *port = vlan->port; const struct macvlan_dev *dest; - __u8 ip_summed = skb->ip_summed; if (vlan->mode == MACVLAN_MODE_BRIDGE) { const struct ethhdr *eth = (void *)skb->data; - skb->ip_summed = CHECKSUM_UNNECESSARY; /* send to other bridge ports directly */ if (is_multicast_ether_addr(eth->h_dest)) { @@ -285,7 +283,6 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) } xmit_world: - skb->ip_summed = ip_summed; skb->dev = vlan->lowerdev; return dev_queue_xmit(skb); } @@ -428,8 +425,10 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + } } static void macvlan_set_mac_lists(struct net_device *dev) @@ -506,6 +505,7 @@ static int macvlan_init(struct net_device *dev) (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; dev->features |= NETIF_F_LLTX; + dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; dev->hard_header_len = lowerdev->hard_header_len; @@ -992,7 +992,6 @@ static int macvlan_device_event(struct notifier_block *unused, list_for_each_entry_safe(vlan, next, &port->vlans, list) vlan->dev->rtnl_link_ops->dellink(vlan->dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlaying device to change its type. */ diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 5895e4d..89d21fc 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -17,6 +17,7 @@ #include <linux/idr.h> #include <linux/fs.h> +#include <net/ipv6.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> @@ -108,17 +109,15 @@ out: return err; } +/* Requires RTNL */ static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); - int err = -EBUSY; - rtnl_lock(); if (vlan->numqueues == MAX_MACVTAP_QUEUES) - goto out; + return -EBUSY; - err = 0; rcu_assign_pointer(q->vlan, vlan); rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); sock_hold(&q->sk); @@ -132,9 +131,7 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, vlan->numvtaps++; vlan->numqueues++; -out: - rtnl_unlock(); - return err; + return 0; } static int macvtap_disable_queue(struct macvtap_queue *q) @@ -315,6 +312,15 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb) segs = nskb; } } else { + /* If we receive a partial checksum and the tap side + * doesn't support checksum offload, compute the checksum. + * Note: it doesn't matter which checksum feature to + * check, we either support them all or none. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + !(features & NETIF_F_ALL_CSUM) && + skb_checksum_help(skb)) + goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); } @@ -441,11 +447,12 @@ static void macvtap_sock_destruct(struct sock *sk) static int macvtap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; - struct net_device *dev = dev_get_by_macvtap_minor(iminor(inode)); + struct net_device *dev; struct macvtap_queue *q; - int err; + int err = -ENODEV; - err = -ENODEV; + rtnl_lock(); + dev = dev_get_by_macvtap_minor(iminor(inode)); if (!dev) goto out; @@ -485,6 +492,7 @@ out: if (dev) dev_put(dev); + rtnl_unlock(); return err; } @@ -559,6 +567,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: return -EINVAL; @@ -615,6 +625,8 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL) { vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; vnet_hdr->csum_start = skb_checksum_start_offset(skb); + if (vlan_tx_tag_present(skb)) + vnet_hdr->csum_start += VLAN_HLEN; vnet_hdr->csum_offset = skb->csum_offset; } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 936f091..cceae07 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -604,7 +604,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (file == ppp->owner) ppp_shutdown_interface(ppp); } - if (atomic_long_read(&file->f_count) <= 2) { + if (atomic_long_read(&file->f_count) < 2) { ppp_release(NULL, file); err = 0; } else diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 82ee6ed..addd232 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -675,7 +675,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); - po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr); + po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 0180531..1dc628f 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) nf_reset(skb); skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); ip_local_out(skb); @@ -506,7 +506,9 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; - sp.sa_family = AF_PPPOX; + memset(&sp.sa_addr, 0, sizeof(sp.sa_addr)); + + sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_PPTP; sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index cc70ecf..8752644 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -83,6 +83,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/workqueue.h> #include "slip.h" #ifdef CONFIG_INET #include <linux/ip.h> @@ -416,34 +417,44 @@ static void sl_encaps(struct slip *sl, unsigned char *icp, int len) #endif } -/* - * Called by the driver when there's room for more data. If we have - * more packets to send, we send them here. - */ -static void slip_write_wakeup(struct tty_struct *tty) +/* Write out any remaining transmit buffer. Scheduled when tty is writable */ +static void slip_transmit(struct work_struct *work) { + struct slip *sl = container_of(work, struct slip, tx_work); int actual; - struct slip *sl = tty->disc_data; + spin_lock_bh(&sl->lock); /* First make sure we're connected. */ - if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) + if (!sl->tty || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) { + spin_unlock_bh(&sl->lock); return; + } - spin_lock(&sl->lock); if (sl->xleft <= 0) { /* Now serial buffer is almost free & we can start * transmission of another packet */ sl->dev->stats.tx_packets++; - clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - spin_unlock(&sl->lock); + clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); + spin_unlock_bh(&sl->lock); sl_unlock(sl); return; } - actual = tty->ops->write(tty, sl->xhead, sl->xleft); + actual = sl->tty->ops->write(sl->tty, sl->xhead, sl->xleft); sl->xleft -= actual; sl->xhead += actual; - spin_unlock(&sl->lock); + spin_unlock_bh(&sl->lock); +} + +/* + * Called by the driver when there's room for more data. + * Schedule the transmit. + */ +static void slip_write_wakeup(struct tty_struct *tty) +{ + struct slip *sl = tty->disc_data; + + schedule_work(&sl->tx_work); } static void sl_tx_timeout(struct net_device *dev) @@ -749,6 +760,7 @@ static struct slip *sl_alloc(dev_t line) sl->magic = SLIP_MAGIC; sl->dev = dev; spin_lock_init(&sl->lock); + INIT_WORK(&sl->tx_work, slip_transmit); sl->mode = SL_MODE_DEFAULT; #ifdef CONFIG_SLIP_SMART /* initialize timer_list struct */ @@ -872,8 +884,12 @@ static void slip_close(struct tty_struct *tty) if (!sl || sl->magic != SLIP_MAGIC || sl->tty != tty) return; + spin_lock_bh(&sl->lock); tty->disc_data = NULL; sl->tty = NULL; + spin_unlock_bh(&sl->lock); + + flush_work(&sl->tx_work); /* VSV = very important to remove timers */ #ifdef CONFIG_SLIP_SMART diff --git a/drivers/net/slip/slip.h b/drivers/net/slip/slip.h index 67673cf..cf32aad 100644 --- a/drivers/net/slip/slip.h +++ b/drivers/net/slip/slip.h @@ -53,6 +53,7 @@ struct slip { struct tty_struct *tty; /* ptr to TTY structure */ struct net_device *dev; /* easy for intr handling */ spinlock_t lock; + struct work_struct tx_work; /* Flushes transmit buffer */ #ifdef SL_INCLUDE_CSLIP struct slcompress *slcomp; /* for header compression */ diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 6327df2..258f65b 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -629,6 +629,7 @@ static int team_change_mode(struct team *team, const char *kind) static void team_notify_peers_work(struct work_struct *work) { struct team *team; + int val; team = container_of(work, struct team, notify_peers.dw.work); @@ -636,9 +637,14 @@ static void team_notify_peers_work(struct work_struct *work) schedule_delayed_work(&team->notify_peers.dw, 0); return; } + val = atomic_dec_if_positive(&team->notify_peers.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev); rtnl_unlock(); - if (!atomic_dec_and_test(&team->notify_peers.count_pending)) + if (val) schedule_delayed_work(&team->notify_peers.dw, msecs_to_jiffies(team->notify_peers.interval)); } @@ -647,7 +653,7 @@ static void team_notify_peers(struct team *team) { if (!team->notify_peers.count || !netif_running(team->dev)) return; - atomic_set(&team->notify_peers.count_pending, team->notify_peers.count); + atomic_add(team->notify_peers.count, &team->notify_peers.count_pending); schedule_delayed_work(&team->notify_peers.dw, 0); } @@ -669,6 +675,7 @@ static void team_notify_peers_fini(struct team *team) static void team_mcast_rejoin_work(struct work_struct *work) { struct team *team; + int val; team = container_of(work, struct team, mcast_rejoin.dw.work); @@ -676,9 +683,14 @@ static void team_mcast_rejoin_work(struct work_struct *work) schedule_delayed_work(&team->mcast_rejoin.dw, 0); return; } + val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev); rtnl_unlock(); - if (!atomic_dec_and_test(&team->mcast_rejoin.count_pending)) + if (val) schedule_delayed_work(&team->mcast_rejoin.dw, msecs_to_jiffies(team->mcast_rejoin.interval)); } @@ -687,7 +699,7 @@ static void team_mcast_rejoin(struct team *team) { if (!team->mcast_rejoin.count || !netif_running(team->dev)) return; - atomic_set(&team->mcast_rejoin.count_pending, team->mcast_rejoin.count); + atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending); schedule_delayed_work(&team->mcast_rejoin.dw, 0); } @@ -1725,6 +1737,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) * to traverse list in reverse under rcu_read_lock */ mutex_lock(&team->lock); + team->port_mtu_change_allowed = true; list_for_each_entry(port, &team->port_list, list) { err = dev_set_mtu(port->dev, new_mtu); if (err) { @@ -1733,6 +1746,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) goto unwind; } } + team->port_mtu_change_allowed = false; mutex_unlock(&team->lock); dev->mtu = new_mtu; @@ -1742,6 +1756,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu) unwind: list_for_each_entry_continue_reverse(port, &team->port_list, list) dev_set_mtu(port->dev, dev->mtu); + team->port_mtu_change_allowed = false; mutex_unlock(&team->lock); return err; @@ -2861,7 +2876,9 @@ static int team_device_event(struct notifier_block *unused, break; case NETDEV_CHANGEMTU: /* Forbid to change mtu of underlaying device */ - return NOTIFY_BAD; + if (!port->team->port_mtu_change_allowed) + return NOTIFY_BAD; + break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid to change type of underlaying device */ return NOTIFY_BAD; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 10636cb..d72d063 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,6 +65,7 @@ #include <linux/nsproxy.h> #include <linux/virtio_net.h> #include <linux/rcupdate.h> +#include <net/ipv6.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> @@ -1103,6 +1104,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } + skb_reset_network_header(skb); + if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1114,6 +1117,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; case VIRTIO_NET_HDR_GSO_UDP: skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: tun->dev->stats.rx_frame_errors++; @@ -1143,7 +1148,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } - skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_rxhash(skb); @@ -1185,6 +1189,10 @@ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; int vlan_offset = 0, copied; + int vlan_hlen = 0; + + if (vlan_tx_tag_present(skb)) + vlan_hlen = VLAN_HLEN; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) @@ -1236,7 +1244,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (skb->ip_summed == CHECKSUM_PARTIAL) { gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - gso.csum_start = skb_checksum_start_offset(skb); + gso.csum_start = skb_checksum_start_offset(skb) + + vlan_hlen; gso.csum_offset = skb->csum_offset; } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { gso.flags = VIRTIO_NET_HDR_F_DATA_VALID; @@ -1249,10 +1258,9 @@ static ssize_t tun_put_user(struct tun_struct *tun, } copied = total; - total += skb->len; - if (!vlan_tx_tag_present(skb)) { - len = min_t(int, skb->len, len); - } else { + len = min_t(int, skb->len + vlan_hlen, len); + total += skb->len + vlan_hlen; + if (vlan_hlen) { int copy, ret; struct { __be16 h_vlan_proto; @@ -1263,8 +1271,6 @@ static ssize_t tun_put_user(struct tun_struct *tun, veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); - len = min_t(int, skb->len + VLAN_HLEN, len); - total += VLAN_HLEN; copy = min_t(int, vlan_offset, len); ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 3ecb213..b8b8f99 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -698,6 +698,7 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p) { struct usbnet *dev = netdev_priv(net); struct sockaddr *addr = p; + int ret; if (netif_running(net)) return -EBUSY; @@ -707,8 +708,12 @@ static int ax88179_set_mac_addr(struct net_device *net, void *p) memcpy(net->dev_addr, addr->sa_data, ETH_ALEN); /* Set the MAC address */ - return ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, + ret = ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_NODE_ID, ETH_ALEN, ETH_ALEN, net->dev_addr); + if (ret < 0) + return ret; + + return 0; } static const struct net_device_ops ax88179_netdev_ops = { diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 25ba7ec..7cabe45 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -120,6 +120,16 @@ static void cdc_mbim_unbind(struct usbnet *dev, struct usb_interface *intf) cdc_ncm_unbind(dev, intf); } +/* verify that the ethernet protocol is IPv4 or IPv6 */ +static bool is_ip_proto(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + return true; + } + return false; +} static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { @@ -128,6 +138,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb struct cdc_ncm_ctx *ctx = info->ctx; __le32 sign = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); u16 tci = 0; + bool is_ip; u8 *c; if (!ctx) @@ -137,25 +148,32 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb if (skb->len <= ETH_HLEN) goto error; + /* Some applications using e.g. packet sockets will + * bypass the VLAN acceleration and create tagged + * ethernet frames directly. We primarily look for + * the accelerated out-of-band tag, but fall back if + * required + */ + skb_reset_mac_header(skb); + if (vlan_get_tag(skb, &tci) < 0 && skb->len > VLAN_ETH_HLEN && + __vlan_get_tag(skb, &tci) == 0) { + is_ip = is_ip_proto(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto); + skb_pull(skb, VLAN_ETH_HLEN); + } else { + is_ip = is_ip_proto(eth_hdr(skb)->h_proto); + skb_pull(skb, ETH_HLEN); + } + /* mapping VLANs to MBIM sessions: * no tag => IPS session <0> * 1 - 255 => IPS session <vlanid> * 256 - 511 => DSS session <vlanid - 256> * 512 - 4095 => unsupported, drop */ - vlan_get_tag(skb, &tci); - switch (tci & 0x0f00) { case 0x0000: /* VLAN ID 0 - 255 */ - /* verify that datagram is IPv4 or IPv6 */ - skb_reset_mac_header(skb); - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - case htons(ETH_P_IPV6): - break; - default: + if (!is_ip) goto error; - } c = (u8 *)&sign; c[3] = tci; break; @@ -169,7 +187,6 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb "unsupported tci=0x%04x\n", tci); goto error; } - skb_pull(skb, ETH_HLEN); } spin_lock_bh(&ctx->mtx); diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 558469f..7f22d27 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -647,8 +647,25 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, + {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ + {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ + {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ + {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ + {QMI_FIXED_INTF(0x16d8, 0x6280, 0)}, /* CMOTech CHU-628 */ + {QMI_FIXED_INTF(0x16d8, 0x7001, 0)}, /* CMOTech CHU-720S */ + {QMI_FIXED_INTF(0x16d8, 0x7002, 0)}, /* CMOTech 7002 */ + {QMI_FIXED_INTF(0x16d8, 0x7003, 4)}, /* CMOTech CHU-629K */ + {QMI_FIXED_INTF(0x16d8, 0x7004, 3)}, /* CMOTech 7004 */ + {QMI_FIXED_INTF(0x16d8, 0x7006, 5)}, /* CMOTech CGU-629 */ + {QMI_FIXED_INTF(0x16d8, 0x700a, 4)}, /* CMOTech CHU-629S */ + {QMI_FIXED_INTF(0x16d8, 0x7211, 0)}, /* CMOTech CHU-720I */ + {QMI_FIXED_INTF(0x16d8, 0x7212, 0)}, /* CMOTech 7212 */ + {QMI_FIXED_INTF(0x16d8, 0x7213, 0)}, /* CMOTech 7213 */ + {QMI_FIXED_INTF(0x16d8, 0x7251, 1)}, /* CMOTech 7251 */ + {QMI_FIXED_INTF(0x16d8, 0x7252, 1)}, /* CMOTech 7252 */ + {QMI_FIXED_INTF(0x16d8, 0x7253, 1)}, /* CMOTech 7253 */ {QMI_FIXED_INTF(0x19d2, 0x0002, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0012, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0017, 3)}, @@ -699,24 +716,47 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1255, 3)}, {QMI_FIXED_INTF(0x19d2, 0x1255, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1256, 4)}, + {QMI_FIXED_INTF(0x19d2, 0x1270, 5)}, /* ZTE MF667 */ {QMI_FIXED_INTF(0x19d2, 0x1401, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1402, 2)}, /* ZTE MF60 */ {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ + {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */ + {QMI_FIXED_INTF(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC73xx */ + {QMI_FIXED_INTF(0x1199, 0x68c0, 10)}, /* Sierra Wireless MC73xx */ + {QMI_FIXED_INTF(0x1199, 0x68c0, 11)}, /* Sierra Wireless MC73xx */ {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ + {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ + {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */ + {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ + {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ - {QMI_FIXED_INTF(0x0b3c, 0xc005, 6)}, /* Olivetti Olicard 200 */ + {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ + {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ + {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ + {QMI_FIXED_INTF(0x0b3c, 0xc004, 6)}, /* Olivetti Olicard 155 */ + {QMI_FIXED_INTF(0x0b3c, 0xc005, 6)}, /* Olivetti Olicard 200 */ + {QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)}, /* Olivetti Olicard 160 */ + {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ + {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ + {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ + {QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ + {QMI_FIXED_INTF(0x03f0, 0x581d, 4)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 4ecdf3c..c8e3333 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -282,13 +282,15 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } -/* Find VXLAN socket based on network namespace and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port) + if (inet_sk(vs->sock->sk)->inet_sport == port && + inet_sk(vs->sock->sk)->sk.sk_family == family) return vs; } return NULL; @@ -307,11 +309,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port) +static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, port); + vs = vxlan_find_sock(net, family, port); if (!vs) return NULL; @@ -1228,7 +1231,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) } else if (vxlan->flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin.sin_addr.s_addr = tip, - .sa.sa_family = AF_INET, + .sin.sin_family = AF_INET, }; vxlan_ip_miss(dev, &ipa); @@ -1341,9 +1344,6 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) if (!in6_dev) goto out; - if (!pskb_may_pull(skb, skb->len)) - goto out; - iphdr = ipv6_hdr(skb); saddr = &iphdr->saddr; daddr = &iphdr->daddr; @@ -1389,7 +1389,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) } else if (vxlan->flags & VXLAN_F_L3MISS) { union vxlan_addr ipa = { .sin6.sin6_addr = msg->target, - .sa.sa_family = AF_INET6, + .sin6.sin6_family = AF_INET6, }; vxlan_ip_miss(dev, &ipa); @@ -1422,7 +1422,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { union vxlan_addr ipa = { .sin.sin_addr.s_addr = pip->daddr, - .sa.sa_family = AF_INET, + .sin.sin_family = AF_INET, }; vxlan_ip_miss(dev, &ipa); @@ -1443,7 +1443,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) if (!n && (vxlan->flags & VXLAN_F_L3MISS)) { union vxlan_addr ipa = { .sin6.sin6_addr = pip6->daddr, - .sa.sa_family = AF_INET6, + .sin6.sin6_family = AF_INET6, }; vxlan_ip_miss(dev, &ipa); @@ -1683,6 +1683,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_tstats *rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; + struct net_device *dev = skb->dev; + int len = skb->len; skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; @@ -1704,16 +1706,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; - tx_stats->tx_bytes += skb->len; + tx_stats->tx_bytes += len; u64_stats_update_end(&tx_stats->syncp); if (netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; + rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { - skb->dev->stats.rx_dropped++; + dev->stats.rx_dropped++; } } @@ -1784,7 +1786,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + dst_vxlan = vxlan_find_vni(dev_net(dev), vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1837,7 +1840,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; dst_release(ndst); - dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port); + dst_vxlan = vxlan_find_vni(dev_net(dev), vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1888,7 +1892,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) return arp_reduce(dev, skb); #if IS_ENABLED(CONFIG_IPV6) else if (ntohs(eth->h_proto) == ETH_P_IPV6 && - skb->len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { struct nd_msg *msg; @@ -1897,6 +1902,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) return neigh_reduce(dev, skb); } + eth = eth_hdr(skb); #endif } @@ -1986,6 +1992,7 @@ static int vxlan_init(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); + bool ipv6 = vxlan->flags & VXLAN_F_IPV6; struct vxlan_sock *vs; dev->tstats = alloc_percpu(struct pcpu_tstats); @@ -1993,10 +2000,10 @@ static int vxlan_init(struct net_device *dev) return -ENOMEM; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port); - if (vs) { + vs = vxlan_find_sock(dev_net(dev), ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port); + if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { /* If we have a socket with same port already, reuse it */ - atomic_inc(&vs->refcnt); vxlan_vs_add_dev(vs, vxlan); } else { /* otherwise make new socket outside of RTNL */ @@ -2157,9 +2164,9 @@ static void vxlan_setup(struct net_device *dev) eth_hw_addr_random(dev); ether_setup(dev); if (vxlan->default_dst.remote_ip.sa.sa_family == AF_INET6) - dev->hard_header_len = ETH_HLEN + VXLAN6_HEADROOM; + dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM; else - dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM; + dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM; dev->netdev_ops = &vxlan_netdev_ops; dev->destructor = free_netdev; @@ -2438,13 +2445,10 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, return vs; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(net, port); - if (vs) { - if (vs->rcv == rcv) - atomic_inc(&vs->refcnt); - else + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); + if (vs && ((vs->rcv != rcv) || + !atomic_add_unless(&vs->refcnt, 1, 0))) vs = ERR_PTR(-EBUSY); - } spin_unlock(&vn->sock_lock); if (!vs) @@ -2540,8 +2544,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (!tb[IFLA_MTU]) dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); - /* update header length based on lower device */ - dev->hard_header_len = lowerdev->hard_header_len + + dev->needed_headroom = lowerdev->hard_header_len + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); } @@ -2584,7 +2587,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (data[IFLA_VXLAN_PORT]) vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); - if (vxlan_find_vni(net, vni, vxlan->dst_port)) { + if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; } diff --git a/drivers/net/wireless/ath/ath5k/qcu.c b/drivers/net/wireless/ath/ath5k/qcu.c index 0583c69..ddaad71 100644 --- a/drivers/net/wireless/ath/ath5k/qcu.c +++ b/drivers/net/wireless/ath/ath5k/qcu.c @@ -225,13 +225,7 @@ ath5k_hw_setup_tx_queue(struct ath5k_hw *ah, enum ath5k_tx_queue queue_type, } else { switch (queue_type) { case AR5K_TX_QUEUE_DATA: - for (queue = AR5K_TX_QUEUE_ID_DATA_MIN; - ah->ah_txq[queue].tqi_type != - AR5K_TX_QUEUE_INACTIVE; queue++) { - - if (queue > AR5K_TX_QUEUE_ID_DATA_MAX) - return -EINVAL; - } + queue = queue_info->tqi_subtype; break; case AR5K_TX_QUEUE_UAPSD: queue = AR5K_TX_QUEUE_ID_UAPSD; diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index e897648..5092343 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -648,6 +648,19 @@ static void ar9003_hw_override_ini(struct ath_hw *ah) else ah->enabled_cals &= ~TX_CL_CAL; } + + if (AR_SREV_9340(ah) || AR_SREV_9550(ah)) { + if (ah->is_clk_25mhz) { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); + } else { + REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); + REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); + REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); + } + udelay(100); + } } static void ar9003_hw_prog_ini(struct ath_hw *ah, diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 4e0a942..c6f255f 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -916,19 +916,6 @@ static void ath9k_hw_init_pll(struct ath_hw *ah, udelay(RTC_PLL_SETTLE_DELAY); REG_WRITE(ah, AR_RTC_SLEEP_CLK, AR_RTC_FORCE_DERIVED_CLK); - - if (AR_SREV_9340(ah) || AR_SREV_9550(ah)) { - if (ah->is_clk_25mhz) { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x17c << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); - } else { - REG_WRITE(ah, AR_RTC_DERIVED_CLK, 0x261 << 1); - REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); - REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); - } - udelay(100); - } } static void ath9k_hw_init_interrupt_masks(struct ath_hw *ah, diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 69a907b..5bf775e 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -215,8 +215,8 @@ #define AH_WOW_BEACON_MISS BIT(3) enum ath_hw_txq_subtype { - ATH_TXQ_AC_BE = 0, - ATH_TXQ_AC_BK = 1, + ATH_TXQ_AC_BK = 0, + ATH_TXQ_AC_BE = 1, ATH_TXQ_AC_VI = 2, ATH_TXQ_AC_VO = 3, }; diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index a3eff09..0244680 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -311,14 +311,7 @@ int ath9k_hw_setuptxqueue(struct ath_hw *ah, enum ath9k_tx_queue type, q = ATH9K_NUM_TX_QUEUES - 3; break; case ATH9K_TX_QUEUE_DATA: - for (q = 0; q < ATH9K_NUM_TX_QUEUES; q++) - if (ah->txq[q].tqi_type == - ATH9K_TX_QUEUE_INACTIVE) - break; - if (q == ATH9K_NUM_TX_QUEUES) { - ath_err(common, "No available TX queue\n"); - return -1; - } + q = qinfo->tqi_subtype; break; default: ath_err(common, "Invalid TX queue type: %u\n", type); diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index ba39178..d92c6ff 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -904,6 +904,15 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, tx_info = IEEE80211_SKB_CB(skb); tx_info->flags &= ~IEEE80211_TX_CTL_CLEAR_PS_FILT; + + /* + * No aggregation session is running, but there may be frames + * from a previous session or a failed attempt in the queue. + * Send them out as normal data frames + */ + if (!tid->active) + tx_info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (!(tx_info->flags & IEEE80211_TX_CTL_AMPDU)) { bf->bf_state.bf_type = 0; return bf; @@ -1718,7 +1727,7 @@ int ath_cabq_update(struct ath_softc *sc) else if (sc->config.cabqReadytime > ATH9K_READY_TIME_HI_BOUND) sc->config.cabqReadytime = ATH9K_READY_TIME_HI_BOUND; - qi.tqi_readyTime = (cur_conf->beacon_interval * + qi.tqi_readyTime = (TU_TO_USEC(cur_conf->beacon_interval) * sc->config.cabqReadytime) / 100; ath_txq_update(sc, qnum, &qi); @@ -2078,7 +2087,7 @@ static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc, ATH_TXBUF_RESET(bf); - if (tid) { + if (tid && ieee80211_is_data_present(hdr->frame_control)) { fragno = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; seqno = tid->seq_next; hdr->seq_ctrl = cpu_to_le16(tid->seq_next << IEEE80211_SEQ_SEQ_SHIFT); @@ -2201,7 +2210,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, txq->stopped = true; } - if (txctl->an) + if (txctl->an && ieee80211_is_data_present(hdr->frame_control)) tid = ath_get_skb_tid(sc, txctl->an, skb); if (info->flags & IEEE80211_TX_CTL_PS_RESPONSE) { diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h index 8596aba..237d0cd 100644 --- a/drivers/net/wireless/ath/carl9170/carl9170.h +++ b/drivers/net/wireless/ath/carl9170/carl9170.h @@ -256,6 +256,7 @@ struct ar9170 { atomic_t rx_work_urbs; atomic_t rx_pool_urbs; kernel_ulong_t features; + bool usb_ep_cmd_is_bulk; /* firmware settings */ struct completion fw_load_wait; diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 307bc0d..83d20c8 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd, goto err_free; } - usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev, - AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4, - carl9170_usb_cmd_complete, ar, 1); + if (ar->usb_ep_cmd_is_bulk) + usb_fill_bulk_urb(urb, ar->udev, + usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD), + cmd, cmd->hdr.len + 4, + carl9170_usb_cmd_complete, ar); + else + usb_fill_int_urb(urb, ar->udev, + usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD), + cmd, cmd->hdr.len + 4, + carl9170_usb_cmd_complete, ar, 1); if (free_buf) urb->transfer_flags |= URB_FREE_BUFFER; @@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw, static int carl9170_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep; struct ar9170 *ar; struct usb_device *udev; - int err; + int i, err; err = usb_reset_device(interface_to_usbdev(intf)); if (err) @@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf, ar->intf = intf; ar->features = id->driver_info; + /* We need to remember the type of endpoint 4 because it differs + * between high- and full-speed configuration. The high-speed + * configuration specifies it as interrupt and the full-speed + * configuration as bulk endpoint. This information is required + * later when sending urbs to that endpoint. + */ + for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) { + ep = &intf->cur_altsetting->endpoint[i].desc; + + if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD && + usb_endpoint_dir_out(ep) && + usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK) + ar->usb_ep_cmd_is_bulk = true; + } + usb_set_intfdata(intf, ar); SET_IEEE80211_DEV(ar->hw, &intf->dev); diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 7c970d3..80ecca3 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -5175,22 +5175,22 @@ static void b43_nphy_channel_setup(struct b43_wldev *dev, int ch = new_channel->hw_value; u16 old_band_5ghz; - u32 tmp32; + u16 tmp16; old_band_5ghz = b43_phy_read(dev, B43_NPHY_BANDCTL) & B43_NPHY_BANDCTL_5GHZ; if (new_channel->band == IEEE80211_BAND_5GHZ && !old_band_5ghz) { - tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); - b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); + tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); + b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); b43_phy_set(dev, B43_PHY_B_BBCFG, 0xC000); - b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); + b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); b43_phy_set(dev, B43_NPHY_BANDCTL, B43_NPHY_BANDCTL_5GHZ); } else if (new_channel->band == IEEE80211_BAND_2GHZ && old_band_5ghz) { b43_phy_mask(dev, B43_NPHY_BANDCTL, ~B43_NPHY_BANDCTL_5GHZ); - tmp32 = b43_read32(dev, B43_MMIO_PSM_PHY_HDR); - b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32 | 4); + tmp16 = b43_read16(dev, B43_MMIO_PSM_PHY_HDR); + b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16 | 4); b43_phy_mask(dev, B43_PHY_B_BBCFG, 0x3FFF); - b43_write32(dev, B43_MMIO_PSM_PHY_HDR, tmp32); + b43_write16(dev, B43_MMIO_PSM_PHY_HDR, tmp16); } b43_chantab_phy_upload(dev, e); diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index e85d34b..ebcce00 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -810,9 +810,13 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) break; case B43_PHYTYPE_G: status.band = IEEE80211_BAND_2GHZ; - /* chanid is the radio channel cookie value as used - * to tune the radio. */ - status.freq = chanid + 2400; + /* Somewhere between 478.104 and 508.1084 firmware for G-PHY + * has been modified to be compatible with N-PHY and others. + */ + if (dev->fw.rev >= 508) + status.freq = ieee80211_channel_to_frequency(chanid, status.band); + else + status.freq = chanid + 2400; break; case B43_PHYTYPE_N: case B43_PHYTYPE_LP: diff --git a/drivers/net/wireless/iwlwifi/dvm/main.c b/drivers/net/wireless/iwlwifi/dvm/main.c index 7aad766..ca9c4f1 100644 --- a/drivers/net/wireless/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/iwlwifi/dvm/main.c @@ -252,13 +252,17 @@ static void iwl_bg_bt_runtime_config(struct work_struct *work) struct iwl_priv *priv = container_of(work, struct iwl_priv, bt_runtime_config); + mutex_lock(&priv->mutex); if (test_bit(STATUS_EXIT_PENDING, &priv->status)) - return; + goto out; /* dont send host command if rf-kill is on */ if (!iwl_is_ready_rf(priv)) - return; + goto out; + iwlagn_send_advance_bt_config(priv); +out: + mutex_unlock(&priv->mutex); } static void iwl_bg_bt_full_concurrency(struct work_struct *work) diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h index 80b4750..c8d1e37 100644 --- a/drivers/net/wireless/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/iwlwifi/iwl-trans.h @@ -484,6 +484,7 @@ enum iwl_trans_state { * Set during transport allocation. * @hw_id_str: a string with info about HW ID. Set during transport allocation. * @pm_support: set to true in start_hw if link pm is supported + * @ltr_enabled: set to true if the LTR is enabled * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. * The user should use iwl_trans_{alloc,free}_tx_cmd. * @dev_cmd_headroom: room needed for the transport's private use before the @@ -508,6 +509,7 @@ struct iwl_trans { u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size; bool pm_support; + bool ltr_enabled; /* The following fields are internal only */ struct kmem_cache *dev_cmd_pool; diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h index 8e7ab41..4dacb20 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h @@ -66,13 +66,46 @@ /* Power Management Commands, Responses, Notifications */ +/** + * enum iwl_ltr_config_flags - masks for LTR config command flags + * @LTR_CFG_FLAG_FEATURE_ENABLE: Feature operational status + * @LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS: allow LTR change on shadow + * memory access + * @LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH: allow LTR msg send on ANY LTR + * reg change + * @LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3: allow LTR msg send on transition from + * D0 to D3 + * @LTR_CFG_FLAG_SW_SET_SHORT: fixed static short LTR register + * @LTR_CFG_FLAG_SW_SET_LONG: fixed static short LONG register + * @LTR_CFG_FLAG_DENIE_C10_ON_PD: allow going into C10 on PD + */ +enum iwl_ltr_config_flags { + LTR_CFG_FLAG_FEATURE_ENABLE = BIT(0), + LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS = BIT(1), + LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH = BIT(2), + LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3 = BIT(3), + LTR_CFG_FLAG_SW_SET_SHORT = BIT(4), + LTR_CFG_FLAG_SW_SET_LONG = BIT(5), + LTR_CFG_FLAG_DENIE_C10_ON_PD = BIT(6), +}; + +/** + * struct iwl_ltr_config_cmd - configures the LTR + * @flags: See %enum iwl_ltr_config_flags + */ +struct iwl_ltr_config_cmd { + __le32 flags; + __le32 static_long; + __le32 static_short; +} __packed; + /* Radio LP RX Energy Threshold measured in dBm */ #define POWER_LPRX_RSSI_THRESHOLD 75 #define POWER_LPRX_RSSI_THRESHOLD_MAX 94 #define POWER_LPRX_RSSI_THRESHOLD_MIN 30 /** - * enum iwl_scan_flags - masks for power table command flags + * enum iwl_power_flags - masks for power table command flags * @POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off * receiver and transmitter. '0' - does not allow. * @POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management, diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h index 66264cc..cd59ae1 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h @@ -138,6 +138,7 @@ enum { /* Power - legacy power table command */ POWER_TABLE_CMD = 0x77, + LTR_CONFIG = 0xee, /* Thermal Throttling*/ REPLY_THERMAL_MNG_BACKOFF = 0x7e, diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c index c76299a..08f1200 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/iwlwifi/mvm/fw.c @@ -424,6 +424,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm) goto error; } + if (mvm->trans->ltr_enabled) { + struct iwl_ltr_config_cmd cmd = { + .flags = cpu_to_le32(LTR_CFG_FLAG_FEATURE_ENABLE), + }; + + WARN_ON(iwl_mvm_send_cmd_pdu(mvm, LTR_CONFIG, 0, + sizeof(cmd), &cmd)); + } + IWL_DEBUG_INFO(mvm, "RT uCode started.\n"); return 0; error: diff --git a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c index 5fe23a5..72c6415 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac-ctxt.c @@ -1102,10 +1102,18 @@ int iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, static void iwl_mvm_beacon_loss_iterator(void *_data, u8 *mac, struct ieee80211_vif *vif) { - u16 *id = _data; + struct iwl_missed_beacons_notif *missed_beacons = _data; struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); - if (mvmvif->id == *id) + if (mvmvif->id != (u16)le32_to_cpu(missed_beacons->mac_id)) + return; + + /* + * TODO: the threshold should be adjusted based on latency conditions, + * and/or in case of a CS flow on one of the other AP vifs. + */ + if (le32_to_cpu(missed_beacons->consec_missed_beacons_since_last_rx) > + IWL_MVM_MISSED_BEACONS_THRESHOLD) ieee80211_beacon_loss(vif); } @@ -1114,12 +1122,19 @@ int iwl_mvm_rx_missed_beacons_notif(struct iwl_mvm *mvm, struct iwl_device_cmd *cmd) { struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_missed_beacons_notif *missed_beacons = (void *)pkt->data; - u16 id = (u16)le32_to_cpu(missed_beacons->mac_id); + struct iwl_missed_beacons_notif *mb = (void *)pkt->data; + + IWL_DEBUG_INFO(mvm, + "missed bcn mac_id=%u, consecutive=%u (%u, %u, %u)\n", + le32_to_cpu(mb->mac_id), + le32_to_cpu(mb->consec_missed_beacons), + le32_to_cpu(mb->consec_missed_beacons_since_last_rx), + le32_to_cpu(mb->num_recvd_beacons), + le32_to_cpu(mb->num_expected_beacons)); ieee80211_iterate_active_interfaces_atomic(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_beacon_loss_iterator, - &id); + mb); return 0; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index c86663e..2103447 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -82,6 +82,7 @@ #define IWL_MVM_MAX_ADDRESSES 5 /* RSSI offset for WkP */ #define IWL_RSSI_OFFSET 50 +#define IWL_MVM_MISSED_BEACONS_THRESHOLD 8 enum iwl_mvm_tx_fifo { IWL_MVM_TX_FIFO_BK = 0, diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c index 1fd08ba..e3cdc97 100644 --- a/drivers/net/wireless/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/iwlwifi/mvm/ops.c @@ -303,6 +303,7 @@ static const char *iwl_mvm_cmd_strings[REPLY_MAX] = { CMD(REPLY_BEACON_FILTERING_CMD), CMD(REPLY_THERMAL_MNG_BACKOFF), CMD(MAC_PM_POWER_TABLE), + CMD(LTR_CONFIG), }; #undef CMD diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 26108a1..968c128 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -272,6 +272,8 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { {IWL_PCI_DEVICE(0x08B1, 0x4070, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4072, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4170, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0x4C60, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0x4C70, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4060, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x406A, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0x4160, iwl7260_2n_cfg)}, @@ -312,6 +314,8 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { {IWL_PCI_DEVICE(0x08B1, 0xC770, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B1, 0xC760, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC270, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0xCC70, iwl7260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x08B1, 0xCC60, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC272, iwl7260_2ac_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC260, iwl7260_2n_cfg)}, {IWL_PCI_DEVICE(0x08B2, 0xC26A, iwl7260_n_cfg)}, diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 62aac3b..e10646b 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -121,6 +121,7 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u16 lctl; + u16 cap; /* * HW bug W/A for instability in PCIe bus L0S->L1 transition. @@ -131,16 +132,17 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans) * power savings, even without L1. */ pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl); - if (lctl & PCI_EXP_LNKCTL_ASPM_L1) { - /* L1-ASPM enabled; disable(!) L0S */ + if (lctl & PCI_EXP_LNKCTL_ASPM_L1) iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); - dev_info(trans->dev, "L1 Enabled; Disabling L0S\n"); - } else { - /* L1-ASPM disabled; enable(!) L0S */ + else iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED); - dev_info(trans->dev, "L1 Disabled; Enabling L0S\n"); - } trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S); + + pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap); + trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN; + dev_info(trans->dev, "L1 %sabled - LTR %sabled\n", + (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis", + trans->ltr_enabled ? "En" : "Dis"); } /* @@ -345,6 +347,7 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) { int ret; int t = 0; + int iter; IWL_DEBUG_INFO(trans, "iwl_trans_prepare_card_hw enter\n"); @@ -353,18 +356,23 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) if (ret >= 0) return 0; - /* If HW is not ready, prepare the conditions to check again */ - iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, - CSR_HW_IF_CONFIG_REG_PREPARE); + for (iter = 0; iter < 10; iter++) { + /* If HW is not ready, prepare the conditions to check again */ + iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, + CSR_HW_IF_CONFIG_REG_PREPARE); + + do { + ret = iwl_pcie_set_hw_ready(trans); + if (ret >= 0) + return 0; - do { - ret = iwl_pcie_set_hw_ready(trans); - if (ret >= 0) - return 0; + usleep_range(200, 1000); + t += 200; + } while (t < 150000); + msleep(25); + } - usleep_range(200, 1000); - t += 200; - } while (t < 150000); + IWL_DEBUG_INFO(trans, "got NIC after %d iterations\n", iter); return ret; } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 2cd3f54..38b8b71 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2261,7 +2261,7 @@ static int __init init_mac80211_hwsim(void) printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n", err); - goto failed_hw; + goto failed_bind; } skb_queue_head_init(&data->pending); @@ -2563,6 +2563,8 @@ failed_mon: return err; failed_hw: + device_release_driver(data->dev); +failed_bind: device_unregister(data->dev); failed_drvdata: ieee80211_free_hw(hw); diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c index c2b91f5..edf5239 100644 --- a/drivers/net/wireless/mwifiex/main.c +++ b/drivers/net/wireless/mwifiex/main.c @@ -654,6 +654,7 @@ mwifiex_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } tx_info = MWIFIEX_SKB_TXCB(skb); + memset(tx_info, 0, sizeof(*tx_info)); tx_info->bss_num = priv->bss_num; tx_info->bss_type = priv->bss_type; diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 0ac5c58..13f557a 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -1684,8 +1684,13 @@ static int rt2500pci_init_eeprom(struct rt2x00_dev *rt2x00dev) /* * Detect if this device has an hardware controlled radio. */ - if (rt2x00_get_field16(eeprom, EEPROM_ANTENNA_HARDWARE_RADIO)) + if (rt2x00_get_field16(eeprom, EEPROM_ANTENNA_HARDWARE_RADIO)) { __set_bit(CAPABILITY_HW_BUTTON, &rt2x00dev->cap_flags); + /* + * On this device RFKILL initialized during probe does not work. + */ + __set_bit(REQUIRE_DELAYED_RFKILL, &rt2x00dev->cap_flags); + } /* * Check if the BBP tuning should be enabled. diff --git a/drivers/net/wireless/rt2x00/rt2800.h b/drivers/net/wireless/rt2x00/rt2800.h index e3eb952..71f70c7 100644 --- a/drivers/net/wireless/rt2x00/rt2800.h +++ b/drivers/net/wireless/rt2x00/rt2800.h @@ -54,6 +54,7 @@ * RF5592 2.4G/5G 2T2R * RF3070 2.4G 1T1R * RF5360 2.4G 1T1R + * RF5362 2.4G 1T1R * RF5370 2.4G 1T1R * RF5390 2.4G 1T1R */ @@ -74,6 +75,7 @@ #define RF3070 0x3070 #define RF3290 0x3290 #define RF5360 0x5360 +#define RF5362 0x5362 #define RF5370 0x5370 #define RF5372 0x5372 #define RF5390 0x5390 @@ -2041,7 +2043,7 @@ struct mac_iveiv_entry { * 2 - drop tx power by 12dBm, * 3 - increase tx power by 6dBm */ -#define BBP1_TX_POWER_CTRL FIELD8(0x07) +#define BBP1_TX_POWER_CTRL FIELD8(0x03) #define BBP1_TX_ANTENNA FIELD8(0x18) /* @@ -2147,7 +2149,7 @@ struct mac_iveiv_entry { /* Bits [7-4] for RF3320 (RT3370/RT3390), on other chipsets reserved */ #define RFCSR3_PA1_BIAS_CCK FIELD8(0x70) #define RFCSR3_PA2_CASCODE_BIAS_CCKK FIELD8(0x80) -/* Bits for RF3290/RF5360/RF5370/RF5372/RF5390/RF5392 */ +/* Bits for RF3290/RF5360/RF5362/RF5370/RF5372/RF5390/RF5392 */ #define RFCSR3_VCOCAL_EN FIELD8(0x80) /* Bits for RF3050 */ #define RFCSR3_BIT1 FIELD8(0x02) diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 446eade..3bbd03c 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -3154,6 +3154,7 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, break; case RF3070: case RF5360: + case RF5362: case RF5370: case RF5372: case RF5390: @@ -3171,6 +3172,7 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, rt2x00_rf(rt2x00dev, RF3290) || rt2x00_rf(rt2x00dev, RF3322) || rt2x00_rf(rt2x00dev, RF5360) || + rt2x00_rf(rt2x00dev, RF5362) || rt2x00_rf(rt2x00dev, RF5370) || rt2x00_rf(rt2x00dev, RF5372) || rt2x00_rf(rt2x00dev, RF5390) || @@ -4269,6 +4271,7 @@ void rt2800_vco_calibration(struct rt2x00_dev *rt2x00dev) case RF3070: case RF3290: case RF5360: + case RF5362: case RF5370: case RF5372: case RF5390: @@ -7032,6 +7035,7 @@ static int rt2800_init_eeprom(struct rt2x00_dev *rt2x00dev) case RF3320: case RF3322: case RF5360: + case RF5362: case RF5370: case RF5372: case RF5390: @@ -7555,6 +7559,7 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) rt2x00_rf(rt2x00dev, RF3320) || rt2x00_rf(rt2x00dev, RF3322) || rt2x00_rf(rt2x00dev, RF5360) || + rt2x00_rf(rt2x00dev, RF5362) || rt2x00_rf(rt2x00dev, RF5370) || rt2x00_rf(rt2x00dev, RF5372) || rt2x00_rf(rt2x00dev, RF5390) || @@ -7682,6 +7687,7 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) case RF3070: case RF3290: case RF5360: + case RF5362: case RF5370: case RF5372: case RF5390: diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 8c64627..e42fa72 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -992,6 +992,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x07d1, 0x3c15) }, { USB_DEVICE(0x07d1, 0x3c16) }, { USB_DEVICE(0x07d1, 0x3c17) }, + { USB_DEVICE(0x2001, 0x3317) }, { USB_DEVICE(0x2001, 0x3c1b) }, /* Draytek */ { USB_DEVICE(0x07fa, 0x7712) }, @@ -1064,6 +1065,7 @@ static struct usb_device_id rt2800usb_device_table[] = { /* Ovislink */ { USB_DEVICE(0x1b75, 0x3071) }, { USB_DEVICE(0x1b75, 0x3072) }, + { USB_DEVICE(0x1b75, 0xa200) }, /* Para */ { USB_DEVICE(0x20b8, 0x8888) }, /* Pegatron */ @@ -1180,6 +1182,8 @@ static struct usb_device_id rt2800usb_device_table[] = { /* Linksys */ { USB_DEVICE(0x13b1, 0x002f) }, { USB_DEVICE(0x1737, 0x0079) }, + /* Logitec */ + { USB_DEVICE(0x0789, 0x0170) }, /* Ralink */ { USB_DEVICE(0x148f, 0x3572) }, /* Sitecom */ @@ -1203,6 +1207,8 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x050d, 0x1103) }, /* Cameo */ { USB_DEVICE(0x148f, 0xf301) }, + /* D-Link */ + { USB_DEVICE(0x2001, 0x3c1f) }, /* Edimax */ { USB_DEVICE(0x7392, 0x7733) }, /* Hawking */ @@ -1216,6 +1222,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x0789, 0x016b) }, /* NETGEAR */ { USB_DEVICE(0x0846, 0x9012) }, + { USB_DEVICE(0x0846, 0x9013) }, { USB_DEVICE(0x0846, 0x9019) }, /* Planex */ { USB_DEVICE(0x2019, 0xed19) }, @@ -1224,6 +1231,7 @@ static struct usb_device_id rt2800usb_device_table[] = { /* Sitecom */ { USB_DEVICE(0x0df6, 0x0067) }, { USB_DEVICE(0x0df6, 0x006a) }, + { USB_DEVICE(0x0df6, 0x006e) }, /* ZyXEL */ { USB_DEVICE(0x0586, 0x3421) }, #endif @@ -1231,6 +1239,8 @@ static struct usb_device_id rt2800usb_device_table[] = { /* Arcadyan */ { USB_DEVICE(0x043e, 0x7a12) }, { USB_DEVICE(0x043e, 0x7a32) }, + /* ASUS */ + { USB_DEVICE(0x0b05, 0x17e8) }, /* Azurewave */ { USB_DEVICE(0x13d3, 0x3329) }, { USB_DEVICE(0x13d3, 0x3365) }, @@ -1240,6 +1250,9 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x2001, 0x3c1c) }, { USB_DEVICE(0x2001, 0x3c1d) }, { USB_DEVICE(0x2001, 0x3c1e) }, + { USB_DEVICE(0x2001, 0x3c20) }, + { USB_DEVICE(0x2001, 0x3c22) }, + { USB_DEVICE(0x2001, 0x3c23) }, /* LG innotek */ { USB_DEVICE(0x043e, 0x7a22) }, { USB_DEVICE(0x043e, 0x7a42) }, @@ -1262,12 +1275,18 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x043e, 0x7a32) }, /* AVM GmbH */ { USB_DEVICE(0x057c, 0x8501) }, - /* D-Link DWA-160-B2 */ + /* Buffalo */ + { USB_DEVICE(0x0411, 0x0241) }, + { USB_DEVICE(0x0411, 0x0253) }, + /* D-Link */ { USB_DEVICE(0x2001, 0x3c1a) }, + { USB_DEVICE(0x2001, 0x3c21) }, /* Proware */ { USB_DEVICE(0x043e, 0x7a13) }, /* Ralink */ { USB_DEVICE(0x148f, 0x5572) }, + /* TRENDnet */ + { USB_DEVICE(0x20f4, 0x724a) }, #endif #ifdef CONFIG_RT2800USB_UNKNOWN /* @@ -1337,6 +1356,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x1d4d, 0x0010) }, /* Planex */ { USB_DEVICE(0x2019, 0xab24) }, + { USB_DEVICE(0x2019, 0xab29) }, /* Qcom */ { USB_DEVICE(0x18e8, 0x6259) }, /* RadioShack */ @@ -1348,6 +1368,7 @@ static struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x0df6, 0x0053) }, { USB_DEVICE(0x0df6, 0x0069) }, { USB_DEVICE(0x0df6, 0x006f) }, + { USB_DEVICE(0x0df6, 0x0078) }, /* SMC */ { USB_DEVICE(0x083a, 0xa512) }, { USB_DEVICE(0x083a, 0xc522) }, diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index fe4c572..89dbf2d 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -705,6 +705,7 @@ enum rt2x00_capability_flags { REQUIRE_SW_SEQNO, REQUIRE_HT_TX_DESC, REQUIRE_PS_AUTOWAKE, + REQUIRE_DELAYED_RFKILL, /* * Capabilities diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index f12e909..6ccfa0a 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1128,9 +1128,10 @@ static void rt2x00lib_uninitialize(struct rt2x00_dev *rt2x00dev) return; /* - * Unregister extra components. + * Stop rfkill polling. */ - rt2x00rfkill_unregister(rt2x00dev); + if (test_bit(REQUIRE_DELAYED_RFKILL, &rt2x00dev->cap_flags)) + rt2x00rfkill_unregister(rt2x00dev); /* * Allow the HW to uninitialize. @@ -1168,6 +1169,12 @@ static int rt2x00lib_initialize(struct rt2x00_dev *rt2x00dev) set_bit(DEVICE_STATE_INITIALIZED, &rt2x00dev->flags); + /* + * Start rfkill polling. + */ + if (test_bit(REQUIRE_DELAYED_RFKILL, &rt2x00dev->cap_flags)) + rt2x00rfkill_register(rt2x00dev); + return 0; } @@ -1377,7 +1384,12 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev) rt2x00link_register(rt2x00dev); rt2x00leds_register(rt2x00dev); rt2x00debug_register(rt2x00dev); - rt2x00rfkill_register(rt2x00dev); + + /* + * Start rfkill polling. + */ + if (!test_bit(REQUIRE_DELAYED_RFKILL, &rt2x00dev->cap_flags)) + rt2x00rfkill_register(rt2x00dev); return 0; @@ -1393,6 +1405,12 @@ void rt2x00lib_remove_dev(struct rt2x00_dev *rt2x00dev) clear_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags); /* + * Stop rfkill polling. + */ + if (!test_bit(REQUIRE_DELAYED_RFKILL, &rt2x00dev->cap_flags)) + rt2x00rfkill_unregister(rt2x00dev); + + /* * Disable radio. */ rt2x00lib_disable_radio(rt2x00dev); diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index f8cff1f..c03748d 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -489,6 +489,8 @@ int rt2x00mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, crypto.cipher = rt2x00crypto_key_to_cipher(key); if (crypto.cipher == CIPHER_NONE) return -EOPNOTSUPP; + if (crypto.cipher == CIPHER_TKIP && rt2x00_is_usb(rt2x00dev)) + return -EOPNOTSUPP; crypto.cmd = cmd; @@ -623,20 +625,18 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, bss_conf->bssid); /* - * Update the beacon. This is only required on USB devices. PCI - * devices fetch beacons periodically. - */ - if (changes & BSS_CHANGED_BEACON && rt2x00_is_usb(rt2x00dev)) - rt2x00queue_update_beacon(rt2x00dev, vif); - - /* * Start/stop beaconing. */ if (changes & BSS_CHANGED_BEACON_ENABLED) { if (!bss_conf->enable_beacon && intf->enable_beacon) { - rt2x00queue_clear_beacon(rt2x00dev, vif); rt2x00dev->intf_beaconing--; intf->enable_beacon = false; + /* + * Clear beacon in the H/W for this vif. This is needed + * to disable beaconing on this particular interface + * and keep it running on other interfaces. + */ + rt2x00queue_clear_beacon(rt2x00dev, vif); if (rt2x00dev->intf_beaconing == 0) { /* @@ -647,11 +647,15 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, rt2x00queue_stop_queue(rt2x00dev->bcn); mutex_unlock(&intf->beacon_skb_mutex); } - - } else if (bss_conf->enable_beacon && !intf->enable_beacon) { rt2x00dev->intf_beaconing++; intf->enable_beacon = true; + /* + * Upload beacon to the H/W. This is only required on + * USB devices. PCI devices fetch beacons periodically. + */ + if (rt2x00_is_usb(rt2x00dev)) + rt2x00queue_update_beacon(rt2x00dev, vif); if (rt2x00dev->intf_beaconing == 1) { /* diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 66a2db8..e618217 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -160,55 +160,29 @@ void rt2x00queue_align_frame(struct sk_buff *skb) skb_trim(skb, frame_length); } -void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int header_length) +/* + * H/W needs L2 padding between the header and the paylod if header size + * is not 4 bytes aligned. + */ +void rt2x00queue_insert_l2pad(struct sk_buff *skb, unsigned int hdr_len) { - unsigned int payload_length = skb->len - header_length; - unsigned int header_align = ALIGN_SIZE(skb, 0); - unsigned int payload_align = ALIGN_SIZE(skb, header_length); - unsigned int l2pad = payload_length ? L2PAD_SIZE(header_length) : 0; + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; - /* - * Adjust the header alignment if the payload needs to be moved more - * than the header. - */ - if (payload_align > header_align) - header_align += 4; - - /* There is nothing to do if no alignment is needed */ - if (!header_align) + if (!l2pad) return; - /* Reserve the amount of space needed in front of the frame */ - skb_push(skb, header_align); - - /* - * Move the header. - */ - memmove(skb->data, skb->data + header_align, header_length); - - /* Move the payload, if present and if required */ - if (payload_length && payload_align) - memmove(skb->data + header_length + l2pad, - skb->data + header_length + l2pad + payload_align, - payload_length); - - /* Trim the skb to the correct size */ - skb_trim(skb, header_length + l2pad + payload_length); + skb_push(skb, l2pad); + memmove(skb->data, skb->data + l2pad, hdr_len); } -void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int header_length) +void rt2x00queue_remove_l2pad(struct sk_buff *skb, unsigned int hdr_len) { - /* - * L2 padding is only present if the skb contains more than just the - * IEEE 802.11 header. - */ - unsigned int l2pad = (skb->len > header_length) ? - L2PAD_SIZE(header_length) : 0; + unsigned int l2pad = (skb->len > hdr_len) ? L2PAD_SIZE(hdr_len) : 0; if (!l2pad) return; - memmove(skb->data + l2pad, skb->data, header_length); + memmove(skb->data + l2pad, skb->data, hdr_len); skb_pull(skb, l2pad); } diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c index e06971b..f923d8c 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/hw.c @@ -1025,9 +1025,20 @@ int rtl88ee_hw_init(struct ieee80211_hw *hw) bool rtstatus = true; int err = 0; u8 tmp_u1b, u1byte; + unsigned long flags; RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "Rtl8188EE hw init\n"); rtlpriv->rtlhal.being_init_adapter = true; + /* As this function can take a very long time (up to 350 ms) + * and can be called with irqs disabled, reenable the irqs + * to let the other devices continue being serviced. + * + * It is safe doing so since our own interrupts will only be enabled + * in a subsequent step. + */ + local_save_flags(flags); + local_irq_enable(); + rtlpriv->intf_ops->disable_aspm(hw); tmp_u1b = rtl_read_byte(rtlpriv, REG_SYS_CLKR+1); @@ -1043,7 +1054,7 @@ int rtl88ee_hw_init(struct ieee80211_hw *hw) if (rtstatus != true) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Init MAC failed\n"); err = 1; - return err; + goto exit; } err = rtl88e_download_fw(hw, false); @@ -1051,8 +1062,7 @@ int rtl88ee_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Failed to download FW. Init HW without FW now..\n"); err = 1; - rtlhal->fw_ready = false; - return err; + goto exit; } else { rtlhal->fw_ready = true; } @@ -1135,10 +1145,12 @@ int rtl88ee_hw_init(struct ieee80211_hw *hw) } rtl_write_byte(rtlpriv, REG_NAV_CTRL+2, ((30000+127)/128)); rtl88e_dm_init(hw); +exit: + local_irq_restore(flags); rtlpriv->rtlhal.being_init_adapter = false; RT_TRACE(rtlpriv, COMP_INIT, DBG_LOUD, "end of Rtl8188EE hw init %x\n", err); - return 0; + return err; } static enum version_8188e _rtl88ee_read_chip_version(struct ieee80211_hw *hw) diff --git a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c index 68685a8..749d417 100644 --- a/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8188ee/trx.c @@ -293,7 +293,7 @@ static void _rtl88ee_translate_rx_signal_stuff(struct ieee80211_hw *hw, u8 *psaddr; __le16 fc; u16 type, ufc; - bool match_bssid, packet_toself, packet_beacon, addr; + bool match_bssid, packet_toself, packet_beacon = false, addr; tmp_buf = skb->data + pstatus->rx_drvinfo_size + pstatus->rx_bufshift; diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c index 189ba12..c3f2b55 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c @@ -985,19 +985,30 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw)); int err = 0; static bool iqk_initialized; + unsigned long flags; + + /* As this function can take a very long time (up to 350 ms) + * and can be called with irqs disabled, reenable the irqs + * to let the other devices continue being serviced. + * + * It is safe doing so since our own interrupts will only be enabled + * in a subsequent step. + */ + local_save_flags(flags); + local_irq_enable(); rtlhal->hw_type = HARDWARE_TYPE_RTL8192CU; err = _rtl92cu_init_mac(hw); if (err) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "init mac failed!\n"); - return err; + goto exit; } err = rtl92c_download_fw(hw); if (err) { RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Failed to download FW. Init HW without FW now..\n"); err = 1; - return err; + goto exit; } rtlhal->last_hmeboxnum = 0; /* h2c */ _rtl92cu_phy_param_tab_init(hw); @@ -1034,6 +1045,8 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) _InitPABias(hw); _update_mac_setting(hw); rtl92c_dm_init(hw); +exit: + local_irq_restore(flags); return err; } diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c index 8188dcb..e7a2af3 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c @@ -316,6 +316,7 @@ static struct usb_device_id rtl8192c_usb_ids[] = { {RTL_USB_DEVICE(0x0bda, 0x5088, rtl92cu_hal_cfg)}, /*Thinkware-CC&C*/ {RTL_USB_DEVICE(0x0df6, 0x0052, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ {RTL_USB_DEVICE(0x0df6, 0x005c, rtl92cu_hal_cfg)}, /*Sitecom - Edimax*/ + {RTL_USB_DEVICE(0x0df6, 0x0070, rtl92cu_hal_cfg)}, /*Sitecom - 150N */ {RTL_USB_DEVICE(0x0df6, 0x0077, rtl92cu_hal_cfg)}, /*Sitecom-WLA2100V2*/ {RTL_USB_DEVICE(0x0eb0, 0x9071, rtl92cu_hal_cfg)}, /*NO Brand - Etop*/ {RTL_USB_DEVICE(0x4856, 0x0091, rtl92cu_hal_cfg)}, /*NetweeN - Feixun*/ diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c index 4f46178..c471400 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/hw.c @@ -955,7 +955,7 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_efuse *rtlefuse = rtl_efuse(rtl_priv(hw)); u8 tmp_byte = 0; - + unsigned long flags; bool rtstatus = true; u8 tmp_u1b; int err = false; @@ -967,6 +967,16 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) rtlpci->being_init_adapter = true; + /* As this function can take a very long time (up to 350 ms) + * and can be called with irqs disabled, reenable the irqs + * to let the other devices continue being serviced. + * + * It is safe doing so since our own interrupts will only be enabled + * in a subsequent step. + */ + local_save_flags(flags); + local_irq_enable(); + rtlpriv->intf_ops->disable_aspm(hw); /* 1. MAC Initialize */ @@ -984,7 +994,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Failed to download FW. Init HW without FW now... " "Please copy FW into /lib/firmware/rtlwifi\n"); - return 1; + err = 1; + goto exit; } /* After FW download, we have to reset MAC register */ @@ -997,7 +1008,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) /* 3. Initialize MAC/PHY Config by MACPHY_reg.txt */ if (!rtl92s_phy_mac_config(hw)) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "MAC Config failed\n"); - return rtstatus; + err = rtstatus; + goto exit; } /* because last function modify RCR, so we update @@ -1016,7 +1028,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) /* 4. Initialize BB After MAC Config PHY_reg.txt, AGC_Tab.txt */ if (!rtl92s_phy_bb_config(hw)) { RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG, "BB Config failed\n"); - return rtstatus; + err = rtstatus; + goto exit; } /* 5. Initiailze RF RAIO_A.txt RF RAIO_B.txt */ @@ -1033,7 +1046,8 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) if (!rtl92s_phy_rf_config(hw)) { RT_TRACE(rtlpriv, COMP_INIT, DBG_DMESG, "RF Config failed\n"); - return rtstatus; + err = rtstatus; + goto exit; } /* After read predefined TXT, we must set BB/MAC/RF @@ -1122,8 +1136,9 @@ int rtl92se_hw_init(struct ieee80211_hw *hw) rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_ON); rtl92s_dm_init(hw); +exit: + local_irq_restore(flags); rtlpci->being_init_adapter = false; - return err; } diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c index 7d0f2e2..c240b75 100644 --- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c +++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c @@ -49,6 +49,12 @@ static u8 _rtl92se_map_hwqueue_to_fwqueue(struct sk_buff *skb, u8 skb_queue) if (ieee80211_is_nullfunc(fc)) return QSLT_HIGH; + /* Kernel commit 1bf4bbb4024dcdab changed EAPOL packets to use + * queue V0 at priority 7; however, the RTL8192SE appears to have + * that queue at priority 6 + */ + if (skb->priority == 7) + return QSLT_VO; return skb->priority; } diff --git a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c index c333dfd..99f6bc5 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723ae/hw.c @@ -880,14 +880,25 @@ int rtl8723ae_hw_init(struct ieee80211_hw *hw) bool rtstatus = true; int err; u8 tmp_u1b; + unsigned long flags; rtlpriv->rtlhal.being_init_adapter = true; + /* As this function can take a very long time (up to 350 ms) + * and can be called with irqs disabled, reenable the irqs + * to let the other devices continue being serviced. + * + * It is safe doing so since our own interrupts will only be enabled + * in a subsequent step. + */ + local_save_flags(flags); + local_irq_enable(); + rtlpriv->intf_ops->disable_aspm(hw); rtstatus = _rtl8712e_init_mac(hw); if (rtstatus != true) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Init MAC failed\n"); err = 1; - return err; + goto exit; } err = rtl8723ae_download_fw(hw); @@ -895,8 +906,7 @@ int rtl8723ae_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_ERR, DBG_WARNING, "Failed to download FW. Init HW without FW now..\n"); err = 1; - rtlhal->fw_ready = false; - return err; + goto exit; } else { rtlhal->fw_ready = true; } @@ -971,6 +981,8 @@ int rtl8723ae_hw_init(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, "under 1.5V\n"); } rtl8723ae_dm_init(hw); +exit: + local_irq_restore(flags); rtlpriv->rtlhal.being_init_adapter = false; return err; } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7c541dc..fd3c1da 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -468,9 +468,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, len = skb_frag_size(frag); offset = frag->page_offset; - /* Data must not cross a page boundary. */ - BUG_ON(len + offset > PAGE_SIZE<<compound_order(page)); - /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; @@ -478,8 +475,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, while (len > 0) { unsigned long bytes; - BUG_ON(offset >= PAGE_SIZE); - bytes = PAGE_SIZE - offset; if (bytes > len) bytes = len; diff --git a/drivers/nfc/microread/microread.c b/drivers/nfc/microread/microread.c index cdb9f6d..562fa6b 100644 --- a/drivers/nfc/microread/microread.c +++ b/drivers/nfc/microread/microread.c @@ -501,9 +501,13 @@ static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate, targets->sens_res = be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A_ATQA]); targets->sel_res = skb->data[MICROREAD_EMCF_A_SAK]; - memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID], - skb->data[MICROREAD_EMCF_A_LEN]); targets->nfcid1_len = skb->data[MICROREAD_EMCF_A_LEN]; + if (targets->nfcid1_len > sizeof(targets->nfcid1)) { + r = -EINVAL; + goto exit_free; + } + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A_UID], + targets->nfcid1_len); break; case MICROREAD_GATE_ID_MREAD_ISO_A_3: targets->supported_protocols = @@ -511,9 +515,13 @@ static void microread_target_discovered(struct nfc_hci_dev *hdev, u8 gate, targets->sens_res = be16_to_cpu(*(u16 *)&skb->data[MICROREAD_EMCF_A3_ATQA]); targets->sel_res = skb->data[MICROREAD_EMCF_A3_SAK]; - memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID], - skb->data[MICROREAD_EMCF_A3_LEN]); targets->nfcid1_len = skb->data[MICROREAD_EMCF_A3_LEN]; + if (targets->nfcid1_len > sizeof(targets->nfcid1)) { + r = -EINVAL; + goto exit_free; + } + memcpy(targets->nfcid1, &skb->data[MICROREAD_EMCF_A3_UID], + targets->nfcid1_len); break; case MICROREAD_GATE_ID_MREAD_ISO_B: targets->supported_protocols = NFC_PROTO_ISO14443_B_MASK; diff --git a/drivers/of/address.c b/drivers/of/address.c index 503b4e4..f5582f3 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -401,6 +401,21 @@ static struct of_bus *of_match_bus(struct device_node *np) return NULL; } +static int of_empty_ranges_quirk(void) +{ + if (IS_ENABLED(CONFIG_PPC)) { + /* To save cycles, we cache the result */ + static int quirk_state = -1; + + if (quirk_state < 0) + quirk_state = + of_machine_is_compatible("Power Macintosh") || + of_machine_is_compatible("MacRISC"); + return quirk_state; + } + return false; +} + static int of_translate_one(struct device_node *parent, struct of_bus *bus, struct of_bus *pbus, __be32 *addr, int na, int ns, int pna, const char *rprop) @@ -426,12 +441,10 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * This code is only enabled on powerpc. --gcl */ ranges = of_get_property(parent, rprop, &rlen); -#if !defined(CONFIG_PPC) - if (ranges == NULL) { + if (ranges == NULL && !of_empty_ranges_quirk()) { pr_err("OF: no ranges; cannot translate\n"); return 1; } -#endif /* !defined(CONFIG_PPC) */ if (ranges == NULL || rlen == 0) { offset = of_read_number(addr, na); memset(addr, 0, pna * 4); diff --git a/drivers/of/base.c b/drivers/of/base.c index 7d4c70f..6c18ab2 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1057,52 +1057,6 @@ int of_property_read_string(struct device_node *np, const char *propname, EXPORT_SYMBOL_GPL(of_property_read_string); /** - * of_property_read_string_index - Find and read a string from a multiple - * strings property. - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @index: index of the string in the list of strings - * @out_string: pointer to null terminated return string, modified only if - * return value is 0. - * - * Search for a property in a device tree node and retrieve a null - * terminated string value (pointer to data, not a copy) in the list of strings - * contained in that property. - * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if - * property does not have a value, and -EILSEQ if the string is not - * null-terminated within the length of the property data. - * - * The out_string pointer is modified only if a valid string can be decoded. - */ -int of_property_read_string_index(struct device_node *np, const char *propname, - int index, const char **output) -{ - struct property *prop = of_find_property(np, propname, NULL); - int i = 0; - size_t l = 0, total = 0; - const char *p; - - if (!prop) - return -EINVAL; - if (!prop->value) - return -ENODATA; - if (strnlen(prop->value, prop->length) >= prop->length) - return -EILSEQ; - - p = prop->value; - - for (i = 0; total < prop->length; total += l, p += l) { - l = strlen(p) + 1; - if (i++ == index) { - *output = p; - return 0; - } - } - return -ENODATA; -} -EXPORT_SYMBOL_GPL(of_property_read_string_index); - -/** * of_property_match_string() - Find string in a list and return index * @np: pointer to node containing string list property * @propname: string list property name @@ -1128,7 +1082,7 @@ int of_property_match_string(struct device_node *np, const char *propname, end = p + prop->length; for (i = 0; p < end; i++, p += l) { - l = strlen(p) + 1; + l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); @@ -1140,39 +1094,41 @@ int of_property_match_string(struct device_node *np, const char *propname, EXPORT_SYMBOL_GPL(of_property_match_string); /** - * of_property_count_strings - Find and return the number of strings from a - * multiple strings property. + * of_property_read_string_util() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * @skip: Number of strings to skip over at beginning of list. * - * Search for a property in a device tree node and retrieve the number of null - * terminated string contain in it. Returns the number of strings on - * success, -EINVAL if the property does not exist, -ENODATA if property - * does not have a value, and -EILSEQ if the string is not null-terminated - * within the length of the property data. + * Don't call this function directly. It is a utility helper for the + * of_property_read_string*() family of functions. */ -int of_property_count_strings(struct device_node *np, const char *propname) +int of_property_read_string_helper(struct device_node *np, const char *propname, + const char **out_strs, size_t sz, int skip) { struct property *prop = of_find_property(np, propname, NULL); - int i = 0; - size_t l = 0, total = 0; - const char *p; + int l = 0, i = 0; + const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; - if (strnlen(prop->value, prop->length) >= prop->length) - return -EILSEQ; - p = prop->value; + end = p + prop->length; - for (i = 0; total < prop->length; total += l, p += l, i++) - l = strlen(p) + 1; - - return i; + for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { + l = strnlen(p, end - p) + 1; + if (p + l > end) + return -EILSEQ; + if (out_strs && i >= skip) + *out_strs++ = p; + } + i -= skip; + return i <= 0 ? -ENODATA : i; } -EXPORT_SYMBOL_GPL(of_property_count_strings); +EXPORT_SYMBOL_GPL(of_property_read_string_helper); static int __of_parse_phandle_with_args(const struct device_node *np, const char *list_name, diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 0eb5c38..f5e8dc7 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -126,8 +126,9 @@ static void __init of_selftest_parse_phandle_with_args(void) selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } -static void __init of_selftest_property_match_string(void) +static void __init of_selftest_property_string(void) { + const char *strings[4]; struct device_node *np; int rc; @@ -145,13 +146,66 @@ static void __init of_selftest_property_match_string(void) rc = of_property_match_string(np, "phandle-list-names", "third"); selftest(rc == 2, "third expected:0 got:%i\n", rc); rc = of_property_match_string(np, "phandle-list-names", "fourth"); - selftest(rc == -ENODATA, "unmatched string; rc=%i", rc); + selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc); rc = of_property_match_string(np, "missing-property", "blah"); - selftest(rc == -EINVAL, "missing property; rc=%i", rc); + selftest(rc == -EINVAL, "missing property; rc=%i\n", rc); rc = of_property_match_string(np, "empty-property", "blah"); - selftest(rc == -ENODATA, "empty property; rc=%i", rc); + selftest(rc == -ENODATA, "empty property; rc=%i\n", rc); rc = of_property_match_string(np, "unterminated-string", "blah"); - selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + + /* of_property_count_strings() tests */ + rc = of_property_count_strings(np, "string-property"); + selftest(rc == 1, "Incorrect string count; rc=%i\n", rc); + rc = of_property_count_strings(np, "phandle-list-names"); + selftest(rc == 3, "Incorrect string count; rc=%i\n", rc); + rc = of_property_count_strings(np, "unterminated-string"); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + rc = of_property_count_strings(np, "unterminated-string-list"); + selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc); + + /* of_property_read_string_index() tests */ + rc = of_property_read_string_index(np, "string-property", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "string-property", 1, strings); + selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 1, strings); + selftest(rc == 0 && !strcmp(strings[0], "second"), "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 2, strings); + selftest(rc == 0 && !strcmp(strings[0], "third"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "phandle-list-names", 3, strings); + selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "unterminated-string", 0, strings); + selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "unterminated-string-list", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "unterminated-string-list", 2, strings); /* should fail */ + selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + strings[1] = NULL; + + /* of_property_read_string_array() tests */ + rc = of_property_read_string_array(np, "string-property", strings, 4); + selftest(rc == 1, "Incorrect string count; rc=%i\n", rc); + rc = of_property_read_string_array(np, "phandle-list-names", strings, 4); + selftest(rc == 3, "Incorrect string count; rc=%i\n", rc); + rc = of_property_read_string_array(np, "unterminated-string", strings, 4); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + /* -- An incorrectly formed string should cause a failure */ + rc = of_property_read_string_array(np, "unterminated-string-list", strings, 4); + selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc); + /* -- parsing the correctly formed strings should still work: */ + strings[2] = NULL; + rc = of_property_read_string_array(np, "unterminated-string-list", strings, 2); + selftest(rc == 2 && strings[2] == NULL, "of_property_read_string_array() failure; rc=%i\n", rc); + strings[1] = NULL; + rc = of_property_read_string_array(np, "phandle-list-names", strings, 1); + selftest(rc == 1 && strings[1] == NULL, "Overwrote end of string array; rc=%i, str='%s'\n", rc, strings[1]); } static int __init of_selftest(void) @@ -167,7 +221,7 @@ static int __init of_selftest(void) pr_info("start of selftest - you will see error messages\n"); of_selftest_parse_phandle_with_args(); - of_selftest_property_match_string(); + of_selftest_property_string(); pr_info("end of selftest - %s\n", selftest_passed ? "PASS" : "FAIL"); return 0; } diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 1b8bdb7..72b7365 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -62,6 +62,7 @@ enum parport_pc_pci_cards { timedia_9079a, timedia_9079b, timedia_9079c, + wch_ch353_1s1p, wch_ch353_2s1p, sunix_2s1p, }; @@ -148,6 +149,7 @@ static struct parport_pc_pci cards[] = { /* timedia_9079a */ { 1, { { 2, 3 }, } }, /* timedia_9079b */ { 1, { { 2, 3 }, } }, /* timedia_9079c */ { 1, { { 2, 3 }, } }, + /* wch_ch353_1s1p*/ { 1, { { 1, -1}, } }, /* wch_ch353_2s1p*/ { 1, { { 2, -1}, } }, /* sunix_2s1p */ { 1, { { 3, -1 }, } }, }; @@ -253,6 +255,7 @@ static struct pci_device_id parport_serial_pci_tbl[] = { { 0x1409, 0x7168, 0x1409, 0xd079, 0, 0, timedia_9079c }, /* WCH CARDS */ + { 0x4348, 0x5053, PCI_ANY_ID, PCI_ANY_ID, 0, 0, wch_ch353_1s1p}, { 0x4348, 0x7053, 0x4348, 0x3253, 0, 0, wch_ch353_2s1p}, /* @@ -479,6 +482,12 @@ static struct pciserial_board pci_parport_serial_boards[] = { .base_baud = 921600, .uart_offset = 8, }, + [wch_ch353_1s1p] = { + .flags = FL_BASE0|FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, [wch_ch353_2s1p] = { .flags = FL_BASE0|FL_BASE_BARS, .num_ports = 2, diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c index 8efd11d..1324c3b 100644 --- a/drivers/pci/host/pci-mvebu.c +++ b/drivers/pci/host/pci-mvebu.c @@ -297,7 +297,7 @@ static void mvebu_pcie_handle_iobase_change(struct mvebu_pcie_port *port) port->iowin_base = port->pcie->io.start + iobase; port->iowin_size = ((0xFFF | ((port->bridge.iolimit & 0xF0) << 8) | (port->bridge.iolimitupper << 16)) - - iobase); + iobase) + 1; mvebu_mbus_add_window_remap_by_id(port->io_target, port->io_attr, port->iowin_base, port->iowin_size, @@ -331,7 +331,7 @@ static void mvebu_pcie_handle_membase_change(struct mvebu_pcie_port *port) port->memwin_base = ((port->bridge.membase & 0xFFF0) << 16); port->memwin_size = (((port->bridge.memlimit & 0xFFF0) << 16) | 0xFFFFF) - - port->memwin_base; + port->memwin_base + 1; mvebu_mbus_add_window_by_id(port->mem_target, port->mem_attr, port->memwin_base, port->memwin_size); @@ -747,9 +747,9 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn, rangesz = pna + na + ns; nranges = rlen / sizeof(__be32) / rangesz; - for (i = 0; i < nranges; i++) { + for (i = 0; i < nranges; i++, range += rangesz) { u32 flags = of_read_number(range, 1); - u32 slot = of_read_number(range, 2); + u32 slot = of_read_number(range + 1, 1); u64 cpuaddr = of_read_number(range + na, pna); unsigned long rtype; @@ -757,14 +757,14 @@ static int mvebu_get_tgt_attr(struct device_node *np, int devfn, rtype = IORESOURCE_IO; else if (DT_FLAGS_TO_TYPE(flags) == DT_TYPE_MEM32) rtype = IORESOURCE_MEM; + else + continue; if (slot == PCI_SLOT(devfn) && type == rtype) { *tgt = DT_CPUADDR_TO_TARGET(cpuaddr); *attr = DT_CPUADDR_TO_ATTR(cpuaddr); return 0; } - - range += rangesz; } return -ENOENT; diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 127d6e6..d023af8 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -223,16 +223,16 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, type_tmp = (char *) &types[1]; /* Iterate through parent properties, looking for my-drc-index */ - for (i = 0; i < indexes[0]; i++) { + for (i = 0; i < be32_to_cpu(indexes[0]); i++) { if ((unsigned int) indexes[i + 1] == *my_index) { if (drc_name) *drc_name = name_tmp; if (drc_type) *drc_type = type_tmp; if (drc_index) - *drc_index = *my_index; + *drc_index = be32_to_cpu(*my_index); if (drc_power_domain) - *drc_power_domain = domains[i+1]; + *drc_power_domain = be32_to_cpu(domains[i+1]); return 0; } name_tmp += (strlen(name_tmp) + 1); @@ -321,16 +321,19 @@ int rpaphp_add_slot(struct device_node *dn) /* register PCI devices */ name = (char *) &names[1]; type = (char *) &types[1]; - for (i = 0; i < indexes[0]; i++) { + for (i = 0; i < be32_to_cpu(indexes[0]); i++) { + int index; - slot = alloc_slot_struct(dn, indexes[i + 1], name, power_domains[i + 1]); + index = be32_to_cpu(indexes[i + 1]); + slot = alloc_slot_struct(dn, index, name, + be32_to_cpu(power_domains[i + 1])); if (!slot) return -ENOMEM; slot->type = simple_strtoul(type, NULL, 10); dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n", - indexes[i + 1], name, type); + index, name, type); retval = rpaphp_enable_slot(slot); if (!retval) diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c index 5849927..6efc2ec 100644 --- a/drivers/pci/hotplug/shpchp_ctrl.c +++ b/drivers/pci/hotplug/shpchp_ctrl.c @@ -282,8 +282,8 @@ static int board_added(struct slot *p_slot) return WRONG_BUS_FREQUENCY; } - bsp = ctrl->pci_dev->bus->cur_bus_speed; - msp = ctrl->pci_dev->bus->max_bus_speed; + bsp = ctrl->pci_dev->subordinate->cur_bus_speed; + msp = ctrl->pci_dev->subordinate->max_bus_speed; /* Check if there are other slots or devices on the same bus */ if (!list_empty(&ctrl->pci_dev->subordinate->devices)) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 8312492..cda687d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -567,6 +567,20 @@ out_unroll: return ret; } +static int msi_verify_entries(struct pci_dev *dev) +{ + struct msi_desc *entry; + + list_for_each_entry(entry, &dev->msi_list, list) { + if (!dev->no_64bit_msi || !entry->msg.address_hi) + continue; + dev_err(&dev->dev, "Device has broken 64-bit MSI but arch" + " tried to assign one above 4G\n"); + return -EIO; + } + return 0; +} + /** * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function @@ -620,6 +634,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) return ret; } + ret = msi_verify_entries(dev); + if (ret) { + msi_mask_irq(entry, mask, ~mask); + free_msi_irqs(dev); + return ret; + } + ret = populate_msi_sysfs(dev); if (ret) { msi_mask_irq(entry, mask, ~mask); @@ -733,7 +754,12 @@ static int msix_capability_init(struct pci_dev *dev, ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); if (ret) - goto error; + goto out_avail; + + /* Check if all MSI entries honor device restrictions */ + ret = msi_verify_entries(dev); + if (ret) + goto out_free; /* * Some devices require MSI-X to be enabled before we can touch the @@ -746,10 +772,8 @@ static int msix_capability_init(struct pci_dev *dev, msix_program_entries(dev, entries); ret = populate_msi_sysfs(dev); - if (ret) { - ret = 0; - goto error; - } + if (ret) + goto out_free; /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); @@ -760,7 +784,7 @@ static int msix_capability_init(struct pci_dev *dev, return 0; -error: +out_avail: if (ret < 0) { /* * If we had some success, report the number of irqs @@ -777,6 +801,7 @@ error: ret = avail; } +out_free: free_msi_irqs(dev); return ret; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 7128cfd..7919b7f 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -175,7 +175,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, { struct pci_dev *pci_dev = to_pci_dev(dev); - return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x\n", + return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n", pci_dev->vendor, pci_dev->device, pci_dev->subsystem_vendor, pci_dev->subsystem_device, (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8), diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9fc3f1f..2d16354 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -782,12 +782,6 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (!__pci_complete_power_transition(dev, state)) error = 0; - /* - * When aspm_policy is "powersave" this call ensures - * that ASPM is configured. - */ - if (!error && dev->bus->self) - pcie_aspm_powersave_config_link(dev->bus->self); return error; } @@ -1120,12 +1114,18 @@ EXPORT_SYMBOL_GPL(pci_load_and_free_saved_state); static int do_pci_enable_device(struct pci_dev *dev, int bars) { int err; + struct pci_dev *bridge; u16 cmd; u8 pin; err = pci_set_power_state(dev, PCI_D0); if (err < 0 && err != -EIO) return err; + + bridge = pci_upstream_bridge(dev); + if (bridge) + pcie_aspm_powersave_config_link(bridge); + err = pcibios_enable_device(dev, bars); if (err < 0) return err; @@ -4135,7 +4135,7 @@ int pci_set_vga_state(struct pci_dev *dev, bool decode, u16 cmd; int rc; - WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY))); + WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) && (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY))); /* ARCH specific VGA enables */ rc = pci_set_vga_state_arch(dev, decode, command_bits, flags); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 7ef0f86..16b3bd6 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -214,14 +214,17 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= IORESOURCE_SIZEALIGN; if (res->flags & IORESOURCE_IO) { l &= PCI_BASE_ADDRESS_IO_MASK; + sz &= PCI_BASE_ADDRESS_IO_MASK; mask = PCI_BASE_ADDRESS_IO_MASK & (u32) IO_SPACE_LIMIT; } else { l &= PCI_BASE_ADDRESS_MEM_MASK; + sz &= PCI_BASE_ADDRESS_MEM_MASK; mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; } } else { res->flags |= (l & IORESOURCE_ROM_ENABLE); l &= PCI_ROM_ADDRESS_MASK; + sz &= PCI_ROM_ADDRESS_MASK; mask = (u32)PCI_ROM_ADDRESS_MASK; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index f6c31fa..a7b7eea 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -28,6 +28,7 @@ #include <linux/ioport.h> #include <linux/sched.h> #include <linux/ktime.h> +#include <linux/mm.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ #include "pci.h" @@ -291,6 +292,25 @@ static void quirk_citrine(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CITRINE, quirk_citrine); +/* On IBM Crocodile ipr SAS adapters, expand BAR to system page size */ +static void quirk_extend_bar_to_page(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < PCI_STD_RESOURCE_END; i++) { + struct resource *r = &dev->resource[i]; + + if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) { + r->end = PAGE_SIZE - 1; + r->start = 0; + r->flags |= IORESOURCE_UNSET; + dev_info(&dev->dev, "expanded BAR %d to page size: %pR\n", + i, r); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, 0x034a, quirk_extend_bar_to_page); + /* * S3 868 and 968 chips report region size equal to 32M, but they decode 64M. * If it's needed, re-allocate the region. @@ -2953,6 +2973,7 @@ static void disable_igfx_irq(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0152, disable_igfx_irq); /* * Some devices may pass our check in pci_intx_mask_supported if diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 0c657d6..51cf808 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -202,6 +202,7 @@ config PCMCIA_SA1111 depends on ARM && SA1111 && PCMCIA select PCMCIA_SOC_COMMON select PCMCIA_SA11XX_BASE if ARCH_SA1100 + select PCMCIA_PXA2XX if ARCH_LUBBOCK && SA1111 help Say Y here to include support for SA1111-based PCMCIA or CF sockets, found on the Jornada 720, Graphicsmaster and other @@ -217,7 +218,6 @@ config PCMCIA_PXA2XX || ARCOM_PCMCIA || ARCH_PXA_ESERIES || MACH_STARGATE2 \ || MACH_VPAC270 || MACH_BALLOON3 || MACH_COLIBRI \ || MACH_COLIBRI320 || MACH_H4700) - select PCMCIA_SA1111 if ARCH_LUBBOCK && SA1111 select PCMCIA_SOC_COMMON help Say Y here to include support for the PXA2xx PCMCIA controller diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index 7745b51..fd55a69 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -49,6 +49,7 @@ sa1100_cs-y += sa1100_generic.o sa1100_cs-$(CONFIG_SA1100_ASSABET) += sa1100_assabet.o sa1100_cs-$(CONFIG_SA1100_CERF) += sa1100_cerf.o sa1100_cs-$(CONFIG_SA1100_COLLIE) += pxa2xx_sharpsl.o +sa1100_cs-$(CONFIG_SA1100_H3100) += sa1100_h3600.o sa1100_cs-$(CONFIG_SA1100_H3600) += sa1100_h3600.o sa1100_cs-$(CONFIG_SA1100_NANOENGINE) += sa1100_nanoengine.o sa1100_cs-$(CONFIG_SA1100_SHANNON) += sa1100_shannon.o diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c index b8f5acf..de24232 100644 --- a/drivers/pcmcia/at91_cf.c +++ b/drivers/pcmcia/at91_cf.c @@ -245,7 +245,7 @@ static int at91_cf_dt_init(struct platform_device *pdev) } #endif -static int __init at91_cf_probe(struct platform_device *pdev) +static int at91_cf_probe(struct platform_device *pdev) { struct at91_cf_socket *cf; struct at91_cf_data *board = pdev->dev.platform_data; @@ -354,7 +354,7 @@ fail0a: return status; } -static int __exit at91_cf_remove(struct platform_device *pdev) +static int at91_cf_remove(struct platform_device *pdev) { struct at91_cf_socket *cf = platform_get_drvdata(pdev); @@ -404,14 +404,13 @@ static struct platform_driver at91_cf_driver = { .owner = THIS_MODULE, .of_match_table = of_match_ptr(at91_cf_dt_ids), }, - .remove = __exit_p(at91_cf_remove), + .probe = at91_cf_probe, + .remove = at91_cf_remove, .suspend = at91_cf_suspend, .resume = at91_cf_resume, }; -/*--------------------------------------------------------------------------*/ - -module_platform_driver_probe(at91_cf_driver, at91_cf_probe); +module_platform_driver(at91_cf_driver); MODULE_DESCRIPTION("AT91 Compact Flash Driver"); MODULE_AUTHOR("David Brownell"); diff --git a/drivers/pcmcia/sa1111_jornada720.c b/drivers/pcmcia/sa1111_jornada720.c index 3baa3ef..40e0403 100644 --- a/drivers/pcmcia/sa1111_jornada720.c +++ b/drivers/pcmcia/sa1111_jornada720.c @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/io.h> #include <mach/hardware.h> #include <asm/hardware/sa1111.h> @@ -94,6 +95,7 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = { int pcmcia_jornada720_init(struct device *dev) { int ret = -ENODEV; + struct sa1111_dev *sadev = SA1111_DEV(dev); if (machine_is_jornada720()) { unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; @@ -101,12 +103,12 @@ int pcmcia_jornada720_init(struct device *dev) GRER |= 0x00000002; /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(dev, pin, 0, 0); - sa1111_set_io(dev, pin, 0); - sa1111_set_sleep_io(dev, pin, 0); + sa1111_set_io_dir(sadev, pin, 0, 0); + sa1111_set_io(sadev, pin, 0); + sa1111_set_sleep_io(sadev, pin, 0); sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); - ret = sa1111_pcmcia_add(dev, &jornada720_pcmcia_ops, + ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, sa11xx_drv_pcmcia_add_one); } diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index a1ffae4..260a255 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1796,14 +1796,15 @@ void pinctrl_unregister(struct pinctrl_dev *pctldev) if (pctldev == NULL) return; - mutex_lock(&pinctrldev_list_mutex); mutex_lock(&pctldev->mutex); - pinctrl_remove_device_debugfs(pctldev); + mutex_unlock(&pctldev->mutex); if (!IS_ERR(pctldev->p)) pinctrl_put(pctldev->p); + mutex_lock(&pinctrldev_list_mutex); + mutex_lock(&pctldev->mutex); /* TODO: check that no pinmuxes are still active? */ list_del(&pctldev->node); /* Destroy descriptor tree */ diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index c9076bd..59a8d32 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -572,6 +572,17 @@ static const struct dmi_system_id video_vendor_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5750"), }, }, + { + /* + * Note no video_set_backlight_video_vendor, we must use the + * acer interface, as there is no native backlight interface. + */ + .ident = "Acer KAV80", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "KAV80"), + }, + }, {} }; diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index fa9a217..b264d8f 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -163,18 +163,24 @@ static void dell_wmi_notify(u32 value, void *context) const struct key_entry *key; int reported_key; u16 *buffer_entry = (u16 *)obj->buffer.pointer; + int buffer_size = obj->buffer.length/2; - if (dell_new_hk_type && (buffer_entry[1] != 0x10)) { + if (buffer_size >= 2 && dell_new_hk_type && buffer_entry[1] != 0x10) { pr_info("Received unknown WMI event (0x%x)\n", buffer_entry[1]); kfree(obj); return; } - if (dell_new_hk_type || buffer_entry[1] == 0x0) + if (buffer_size >= 3 && (dell_new_hk_type || buffer_entry[1] == 0x0)) reported_key = (int)buffer_entry[2]; - else + else if (buffer_size >= 2) reported_key = (int)buffer_entry[1] & 0xffff; + else { + pr_info("Received unknown WMI event\n"); + kfree(obj); + return; + } key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev, reported_key); diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 0ed96df..3458eb6 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -237,6 +237,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap), AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap), AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted), + AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted), { NULL, } /* Laptop models without axis info (yet): * "NC6910" "HP Compaq 6910" diff --git a/drivers/platform/x86/pvpanic.c b/drivers/platform/x86/pvpanic.c index 47ae0c4..469e182 100644 --- a/drivers/platform/x86/pvpanic.c +++ b/drivers/platform/x86/pvpanic.c @@ -71,6 +71,7 @@ pvpanic_panic_notify(struct notifier_block *nb, unsigned long code, static struct notifier_block pvpanic_panic_nb = { .notifier_call = pvpanic_panic_notify, + .priority = 1, /* let this called before broken drm_fb_helper */ }; diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index 91245f5..47257b6 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -287,6 +287,12 @@ struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan) "desc %p not ACKed\n", tx_desc); } + if (ret == NULL) { + dev_dbg(bdma_chan->dchan.device->dev, + "%s: unable to obtain tx descriptor\n", __func__); + goto err_out; + } + i = bdma_chan->wr_count_next % bdma_chan->bd_num; if (i == bdma_chan->bd_num - 1) { i = 0; @@ -297,7 +303,7 @@ struct tsi721_tx_desc *tsi721_desc_get(struct tsi721_bdma_chan *bdma_chan) tx_desc->txd.phys = bdma_chan->bd_phys + i * sizeof(struct tsi721_dma_desc); tx_desc->hw_desc = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[i]; - +err_out: spin_unlock_bh(&bdma_chan->lock); return ret; diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c index 81d8681..406e50e 100644 --- a/drivers/regulator/arizona-ldo1.c +++ b/drivers/regulator/arizona-ldo1.c @@ -141,8 +141,6 @@ static struct regulator_ops arizona_ldo1_ops = { .map_voltage = regulator_map_voltage_linear, .get_voltage_sel = regulator_get_voltage_sel_regmap, .set_voltage_sel = regulator_set_voltage_sel_regmap, - .get_bypass = regulator_get_bypass_regmap, - .set_bypass = regulator_set_bypass_regmap, }; static const struct regulator_desc arizona_ldo1 = { @@ -153,11 +151,9 @@ static const struct regulator_desc arizona_ldo1 = { .vsel_reg = ARIZONA_LDO1_CONTROL_1, .vsel_mask = ARIZONA_LDO1_VSEL_MASK, - .bypass_reg = ARIZONA_LDO1_CONTROL_1, - .bypass_mask = ARIZONA_LDO1_BYPASS, .min_uV = 900000, - .uV_step = 50000, - .n_voltages = 7, + .uV_step = 25000, + .n_voltages = 13, .enable_time = 500, .owner = THIS_MODULE, @@ -203,6 +199,7 @@ static int arizona_ldo1_probe(struct platform_device *pdev) */ switch (arizona->type) { case WM5102: + case WM8997: desc = &arizona_ldo1_hc; ldo1->init_data = arizona_ldo1_dvfs; break; diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693.c index ce4b96c..85a54b3 100644 --- a/drivers/regulator/max77693.c +++ b/drivers/regulator/max77693.c @@ -231,7 +231,7 @@ static int max77693_pmic_probe(struct platform_device *pdev) struct max77693_pmic_dev *max77693_pmic; struct max77693_regulator_data *rdata = NULL; int num_rdata, i, ret; - struct regulator_config config; + struct regulator_config config = { }; num_rdata = max77693_pmic_init_rdata(&pdev->dev, &rdata); if (!rdata || num_rdata <= 0) { diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 72c5cdb..ff20d90 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -290,7 +290,8 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); do { alarm->time.tm_year++; - } while (rtc_valid_tm(&alarm->time) != 0); + } while (!is_leap_year(alarm->time.tm_year + 1900) + && rtc_valid_tm(&alarm->time) != 0); break; default: @@ -298,7 +299,16 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) } done: - return 0; + err = rtc_valid_tm(&alarm->time); + + if (err) { + dev_warn(&rtc->dev, "invalid alarm value: %d-%d-%d %d:%d:%d\n", + alarm->time.tm_year + 1900, alarm->time.tm_mon + 1, + alarm->time.tm_mday, alarm->time.tm_hour, alarm->time.tm_min, + alarm->time.tm_sec); + } + + return err; } int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index b86eec3..8ebf09f 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -48,6 +48,7 @@ struct at91_rtc_config { static const struct at91_rtc_config *at91_rtc_config; static DECLARE_COMPLETION(at91_rtc_updated); +static DECLARE_COMPLETION(at91_rtc_upd_rdy); static unsigned int at91_alarm_year = AT91_RTC_EPOCH; static void __iomem *at91_rtc_regs; static int irq; @@ -161,6 +162,8 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm) 1900 + tm->tm_year, tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); + wait_for_completion(&at91_rtc_upd_rdy); + /* Stop Time/Calendar from counting */ cr = at91_rtc_read(AT91_RTC_CR); at91_rtc_write(AT91_RTC_CR, cr | AT91_RTC_UPDCAL | AT91_RTC_UPDTIM); @@ -183,7 +186,9 @@ static int at91_rtc_settime(struct device *dev, struct rtc_time *tm) /* Restart Time/Calendar */ cr = at91_rtc_read(AT91_RTC_CR); + at91_rtc_write(AT91_RTC_SCCR, AT91_RTC_SECEV); at91_rtc_write(AT91_RTC_CR, cr & ~(AT91_RTC_UPDCAL | AT91_RTC_UPDTIM)); + at91_rtc_write_ier(AT91_RTC_SECEV); return 0; } @@ -290,8 +295,10 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) if (rtsr) { /* this interrupt is shared! Is it ours? */ if (rtsr & AT91_RTC_ALARM) events |= (RTC_AF | RTC_IRQF); - if (rtsr & AT91_RTC_SECEV) - events |= (RTC_UF | RTC_IRQF); + if (rtsr & AT91_RTC_SECEV) { + complete(&at91_rtc_upd_rdy); + at91_rtc_write_idr(AT91_RTC_SECEV); + } if (rtsr & AT91_RTC_ACKUPD) complete(&at91_rtc_updated); @@ -414,6 +421,11 @@ static int __init at91_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, rtc); + /* enable SECEV interrupt in order to initialize at91_rtc_upd_rdy + * completion. + */ + at91_rtc_write_ier(AT91_RTC_SECEV); + dev_info(&pdev->dev, "AT91 Real Time Clock driver.\n"); return 0; diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c index 797aa02..8225b89 100644 --- a/drivers/rtc/rtc-efi.c +++ b/drivers/rtc/rtc-efi.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <linux/stringify.h> #include <linux/time.h> #include <linux/platform_device.h> #include <linux/rtc.h> @@ -35,7 +36,7 @@ static inline int compute_yday(efi_time_t *eft) { /* efi_time_t.month is in the [1-12] so, we need -1 */ - return rtc_year_days(eft->day - 1, eft->month - 1, eft->year); + return rtc_year_days(eft->day, eft->month - 1, eft->year); } /* * returns day of the week [0-6] 0=Sunday @@ -48,8 +49,8 @@ compute_wday(efi_time_t *eft) int y; int ndays = 0; - if (eft->year < 1998) { - pr_err("EFI year < 1998, invalid date\n"); + if (eft->year < EFI_RTC_EPOCH) { + pr_err("EFI year < " __stringify(EFI_RTC_EPOCH) ", invalid date\n"); return -1; } @@ -78,19 +79,36 @@ convert_to_efi_time(struct rtc_time *wtime, efi_time_t *eft) eft->timezone = EFI_UNSPECIFIED_TIMEZONE; } -static void +static bool convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) { memset(wtime, 0, sizeof(*wtime)); + + if (eft->second >= 60) + return false; wtime->tm_sec = eft->second; + + if (eft->minute >= 60) + return false; wtime->tm_min = eft->minute; + + if (eft->hour >= 24) + return false; wtime->tm_hour = eft->hour; + + if (!eft->day || eft->day > 31) + return false; wtime->tm_mday = eft->day; + + if (!eft->month || eft->month > 12) + return false; wtime->tm_mon = eft->month - 1; wtime->tm_year = eft->year - 1900; /* day of the week [0-6], Sunday=0 */ wtime->tm_wday = compute_wday(eft); + if (wtime->tm_wday < 0) + return false; /* day in the year [1-365]*/ wtime->tm_yday = compute_yday(eft); @@ -106,6 +124,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime) default: wtime->tm_isdst = -1; } + + return true; } static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) @@ -122,7 +142,8 @@ static int efi_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm) if (status != EFI_SUCCESS) return -EINVAL; - convert_from_efi_time(&eft, &wkalrm->time); + if (!convert_from_efi_time(&eft, &wkalrm->time)) + return -EIO; return rtc_valid_tm(&wkalrm->time); } @@ -163,7 +184,8 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm) return -EINVAL; } - convert_from_efi_time(&eft, tm); + if (!convert_from_efi_time(&eft, tm)) + return -EIO; return rtc_valid_tm(tm); } diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c index 63460cf..3b13401 100644 --- a/drivers/rtc/rtc-sirfsoc.c +++ b/drivers/rtc/rtc-sirfsoc.c @@ -290,14 +290,6 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev) rtc_div = ((32768 / RTC_HZ) / 2) - 1; sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV); - rtcdrv->rtc = rtc_device_register(pdev->name, &(pdev->dev), - &sirfsoc_rtc_ops, THIS_MODULE); - if (IS_ERR(rtcdrv->rtc)) { - err = PTR_ERR(rtcdrv->rtc); - dev_err(&pdev->dev, "can't register RTC device\n"); - return err; - } - /* 0x3 -> RTC_CLK */ sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK, rtcdrv->rtc_base + RTC_CLOCK_SWITCH); @@ -312,6 +304,14 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev) rtcdrv->overflow_rtc = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE); + rtcdrv->rtc = rtc_device_register(pdev->name, &(pdev->dev), + &sirfsoc_rtc_ops, THIS_MODULE); + if (IS_ERR(rtcdrv->rtc)) { + err = PTR_ERR(rtcdrv->rtc); + dev_err(&pdev->dev, "can't register RTC device\n"); + return err; + } + rtcdrv->irq = platform_get_irq(pdev, 0); err = devm_request_irq( &pdev->dev, diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index eb5d227..19915c5 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -288,12 +288,16 @@ static void raw3215_timeout(unsigned long __data) unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - if (!(raw->port.flags & ASYNC_SUSPENDED)) { - raw3215_mk_write_req(raw); - raw3215_start_io(raw); + raw->flags &= ~RAW3215_TIMER_RUNS; + if (!(raw->port.flags & ASYNC_SUSPENDED)) { + raw3215_mk_write_req(raw); + raw3215_start_io(raw); + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; } } spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); @@ -317,17 +321,15 @@ static inline void raw3215_try_io(struct raw3215_info *raw) (raw->flags & RAW3215_FLUSHING)) { /* execute write requests bigger than minimum size */ raw3215_start_io(raw); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - } - } else if (!(raw->flags & RAW3215_TIMER_RUNS)) { - /* delay small writes */ - raw->timer.expires = RAW3215_TIMEOUT + jiffies; - add_timer(&raw->timer); - raw->flags |= RAW3215_TIMER_RUNS; } } + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; + } } /* @@ -922,7 +924,7 @@ static int __init con3215_init(void) raw3215_freelist = req; } - cdev = ccw_device_probe_console(); + cdev = ccw_device_probe_console(&raw3215_ccw_driver); if (IS_ERR(cdev)) return -ENODEV; @@ -1027,12 +1029,26 @@ static int tty3215_write(struct tty_struct * tty, const unsigned char *buf, int count) { struct raw3215_info *raw; + int i, written; if (!tty) return 0; raw = (struct raw3215_info *) tty->driver_data; - raw3215_write(raw, buf, count); - return count; + written = count; + while (count > 0) { + for (i = 0; i < count; i++) + if (buf[i] == '\t' || buf[i] == '\n') + break; + raw3215_write(raw, buf, i); + count -= i; + buf += i; + if (count > 0) { + raw3215_putchar(raw, *buf); + count--; + buf++; + } + } + return written; } /* @@ -1180,7 +1196,7 @@ static int __init tty3215_init(void) driver->subtype = SYSTEM_TYPE_TTY; driver->init_termios = tty_std_termios; driver->init_termios.c_iflag = IGNBRK | IGNPAR; - driver->init_termios.c_oflag = ONLCR | XTABS; + driver->init_termios.c_oflag = ONLCR; driver->init_termios.c_lflag = ISIG; driver->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(driver, &tty3215_ops); diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 699fd3e..bb6b0df 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -576,7 +576,6 @@ static struct console con3270 = { static int __init con3270_init(void) { - struct ccw_device *cdev; struct raw3270 *rp; void *cbuf; int i; @@ -591,10 +590,7 @@ con3270_init(void) cpcmd("TERM AUTOCR OFF", NULL, 0, NULL); } - cdev = ccw_device_probe_console(); - if (IS_ERR(cdev)) - return -ENODEV; - rp = raw3270_setup_console(cdev); + rp = raw3270_setup_console(); if (IS_ERR(rp)) return PTR_ERR(rp); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 24a08e8..651d1f5 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -776,16 +776,24 @@ raw3270_setup_device(struct ccw_device *cdev, struct raw3270 *rp, char *ascebc) } #ifdef CONFIG_TN3270_CONSOLE +/* Tentative definition - see below for actual definition. */ +static struct ccw_driver raw3270_ccw_driver; + /* * Setup 3270 device configured as console. */ -struct raw3270 __init *raw3270_setup_console(struct ccw_device *cdev) +struct raw3270 __init *raw3270_setup_console(void) { + struct ccw_device *cdev; unsigned long flags; struct raw3270 *rp; char *ascebc; int rc; + cdev = ccw_device_probe_console(&raw3270_ccw_driver); + if (IS_ERR(cdev)) + return ERR_CAST(cdev); + rp = kzalloc(sizeof(struct raw3270), GFP_KERNEL | GFP_DMA); ascebc = kzalloc(256, GFP_KERNEL); rc = raw3270_setup_device(cdev, rp, ascebc); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 7b73ff8..359276a 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -190,7 +190,7 @@ raw3270_put_view(struct raw3270_view *view) wake_up(&raw3270_wait_queue); } -struct raw3270 *raw3270_setup_console(struct ccw_device *cdev); +struct raw3270 *raw3270_setup_console(void); void raw3270_wait_cons_dev(struct raw3270 *); /* Notifier for device addition/removal */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 13299f9..ec0951a 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -560,18 +560,27 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area) static void chsc_process_event_information(struct chsc_sei *sei, u64 ntsm) { - do { + static int ntsm_unsupported; + + while (true) { memset(sei, 0, sizeof(*sei)); sei->request.length = 0x0010; sei->request.code = 0x000e; - sei->ntsm = ntsm; + if (!ntsm_unsupported) + sei->ntsm = ntsm; if (chsc(sei)) break; if (sei->response.code != 0x0001) { - CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x)\n", - sei->response.code); + CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x, ntsm=%llx)\n", + sei->response.code, sei->ntsm); + + if (sei->response.code == 3 && sei->ntsm) { + /* Fallback for old firmware. */ + ntsm_unsupported = 1; + continue; + } break; } @@ -587,7 +596,10 @@ static void chsc_process_event_information(struct chsc_sei *sei, u64 ntsm) CIO_CRW_EVENT(2, "chsc: unhandled nt: %d\n", sei->nt); break; } - } while (sei->u.nt0_area.flags & 0x80); + + if (!(sei->u.nt0_area.flags & 0x80)) + break; + } } /* diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e4a7ab2..3a33be6 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1610,7 +1610,7 @@ out_unlock: return rc; } -struct ccw_device *ccw_device_probe_console(void) +struct ccw_device *ccw_device_probe_console(struct ccw_driver *drv) { struct io_subchannel_private *io_priv; struct ccw_device *cdev; @@ -1632,6 +1632,7 @@ struct ccw_device *ccw_device_probe_console(void) kfree(io_priv); return cdev; } + cdev->drv = drv; set_io_private(sch, io_priv); ret = ccw_device_console_enable(cdev, sch); if (ret) { diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 02300dc..2efa66c 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -44,6 +44,7 @@ #include <linux/hrtimer.h> #include <linux/ktime.h> #include <asm/facility.h> +#include <linux/crypto.h> #include "ap_bus.h" @@ -71,7 +72,7 @@ MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Adjunct Processor Bus driver, " \ "Copyright IBM Corp. 2006, 2012"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("z90crypt"); +MODULE_ALIAS_CRYPTO("z90crypt"); /* * Module parameter diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c index 160e751..0787b97 100644 --- a/drivers/sbus/char/bbc_envctrl.c +++ b/drivers/sbus/char/bbc_envctrl.c @@ -452,6 +452,9 @@ static void attach_one_temp(struct bbc_i2c_bus *bp, struct platform_device *op, if (!tp) return; + INIT_LIST_HEAD(&tp->bp_list); + INIT_LIST_HEAD(&tp->glob_list); + tp->client = bbc_i2c_attach(bp, op); if (!tp->client) { kfree(tp); @@ -497,6 +500,9 @@ static void attach_one_fan(struct bbc_i2c_bus *bp, struct platform_device *op, if (!fp) return; + INIT_LIST_HEAD(&fp->bp_list); + INIT_LIST_HEAD(&fp->glob_list); + fp->client = bbc_i2c_attach(bp, op); if (!fp->client) { kfree(fp); diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index c1441ed..e0e6cd6 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -301,13 +301,18 @@ static struct bbc_i2c_bus * attach_one_i2c(struct platform_device *op, int index if (!bp) return NULL; + INIT_LIST_HEAD(&bp->temps); + INIT_LIST_HEAD(&bp->fans); + bp->i2c_control_regs = of_ioremap(&op->resource[0], 0, 0x2, "bbc_i2c_regs"); if (!bp->i2c_control_regs) goto fail; - bp->i2c_bussel_reg = of_ioremap(&op->resource[1], 0, 0x1, "bbc_i2c_bussel"); - if (!bp->i2c_bussel_reg) - goto fail; + if (op->num_resources == 2) { + bp->i2c_bussel_reg = of_ioremap(&op->resource[1], 0, 0x1, "bbc_i2c_bussel"); + if (!bp->i2c_bussel_reg) + goto fail; + } bp->waiting = 0; init_waitqueue_head(&bp->wq); diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 1e9d6ad..7563b3d 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -2655,14 +2655,14 @@ static void NCR5380_dma_complete(NCR5380_instance * instance) { * * Purpose : abort a command * - * Inputs : cmd - the Scsi_Cmnd to abort, code - code to set the - * host byte of the result field to, if zero DID_ABORTED is + * Inputs : cmd - the Scsi_Cmnd to abort, code - code to set the + * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * - * XXX - there is no way to abort the command that is currently - * connected, you have to wait for it to complete. If this is + * XXX - there is no way to abort the command that is currently + * connected, you have to wait for it to complete. If this is * a problem, we could implement longjmp() / setjmp(), setjmp() * called where the loop started in NCR5380_main(). * @@ -2712,7 +2712,7 @@ static int NCR5380_abort(Scsi_Cmnd * cmd) { * aborted flag and get back into our main loop. */ - return 0; + return SUCCESS; } #endif diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c index 5f31017..31ace4b 100644 --- a/drivers/scsi/aha1740.c +++ b/drivers/scsi/aha1740.c @@ -531,7 +531,7 @@ static int aha1740_eh_abort_handler (Scsi_Cmnd *dummy) * quiet as possible... */ - return 0; + return SUCCESS; } static struct scsi_host_template aha1740_template = { diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 278c9fa..1822cb9 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -2501,16 +2501,15 @@ static int arcmsr_polling_ccbdone(struct AdapterControlBlock *acb, static int arcmsr_iop_confirm(struct AdapterControlBlock *acb) { uint32_t cdb_phyaddr, cdb_phyaddr_hi32; - dma_addr_t dma_coherent_handle; + /* ******************************************************************** ** here we need to tell iop 331 our freeccb.HighPart ** if freeccb.HighPart is not zero ******************************************************************** */ - dma_coherent_handle = acb->dma_coherent_handle; - cdb_phyaddr = (uint32_t)(dma_coherent_handle); - cdb_phyaddr_hi32 = (uint32_t)((cdb_phyaddr >> 16) >> 16); + cdb_phyaddr = lower_32_bits(acb->dma_coherent_handle); + cdb_phyaddr_hi32 = upper_32_bits(acb->dma_coherent_handle); acb->cdb_phyaddr_hi32 = cdb_phyaddr_hi32; /* *********************************************************************** diff --git a/drivers/scsi/atari_NCR5380.c b/drivers/scsi/atari_NCR5380.c index 0f3cdbc..30073d4 100644 --- a/drivers/scsi/atari_NCR5380.c +++ b/drivers/scsi/atari_NCR5380.c @@ -2613,7 +2613,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance) * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * * XXX - there is no way to abort the command that is currently * connected, you have to wait for it to complete. If this is diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c index 245a959..ef0a78b 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.c +++ b/drivers/scsi/be2iscsi/be_mgmt.c @@ -812,17 +812,20 @@ mgmt_static_ip_modify(struct beiscsi_hba *phba, if (ip_action == IP_ACTION_ADD) { memcpy(req->ip_params.ip_record.ip_addr.addr, ip_param->value, - ip_param->len); + sizeof(req->ip_params.ip_record.ip_addr.addr)); if (subnet_param) memcpy(req->ip_params.ip_record.ip_addr.subnet_mask, - subnet_param->value, subnet_param->len); + subnet_param->value, + sizeof(req->ip_params.ip_record.ip_addr.subnet_mask)); } else { memcpy(req->ip_params.ip_record.ip_addr.addr, - if_info->ip_addr.addr, ip_param->len); + if_info->ip_addr.addr, + sizeof(req->ip_params.ip_record.ip_addr.addr)); memcpy(req->ip_params.ip_record.ip_addr.subnet_mask, - if_info->ip_addr.subnet_mask, ip_param->len); + if_info->ip_addr.subnet_mask, + sizeof(req->ip_params.ip_record.ip_addr.subnet_mask)); } rc = mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0); @@ -850,7 +853,7 @@ static int mgmt_modify_gateway(struct beiscsi_hba *phba, uint8_t *gt_addr, req->action = gtway_action; req->ip_addr.ip_type = BE2_IPV4; - memcpy(req->ip_addr.addr, gt_addr, param_len); + memcpy(req->ip_addr.addr, gt_addr, sizeof(req->ip_addr.addr)); return mgmt_exec_nonemb_cmd(phba, &nonemb_cmd, NULL, 0); } diff --git a/drivers/scsi/bfa/bfa_ioc.h b/drivers/scsi/bfa/bfa_ioc.h index 90814fe..d5b3f66 100644 --- a/drivers/scsi/bfa/bfa_ioc.h +++ b/drivers/scsi/bfa/bfa_ioc.h @@ -72,7 +72,7 @@ struct bfa_sge_s { } while (0) #define bfa_swap_words(_x) ( \ - ((_x) << 32) | ((_x) >> 32)) + ((u64)(_x) << 32) | ((u64)(_x) >> 32)) #ifdef __BIG_ENDIAN #define bfa_sge_to_be(_x) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 69ac554..aad5535 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -411,6 +411,7 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev, struct fc_frame_header *fh; struct fcoe_rcv_info *fr; struct fcoe_percpu_s *bg; + struct sk_buff *tmp_skb; unsigned short oxid; interface = container_of(ptype, struct bnx2fc_interface, @@ -423,6 +424,12 @@ static int bnx2fc_rcv(struct sk_buff *skb, struct net_device *dev, goto err; } + tmp_skb = skb_share_check(skb, GFP_ATOMIC); + if (!tmp_skb) + goto err; + + skb = tmp_skb; + if (unlikely(eth_hdr(skb)->h_proto != htons(ETH_P_FCOE))) { printk(KERN_ERR PFX "bnx2fc_rcv: Wrong FC type frame\n"); goto err; diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 46a3765..f819cd1 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -2023,7 +2023,7 @@ static int bnx2fc_allocate_hash_table(struct bnx2fc_hba *hba) dma_segment_array = kzalloc(dma_segment_array_size, GFP_KERNEL); if (!dma_segment_array) { printk(KERN_ERR PFX "hash table pointers (dma) alloc failed\n"); - return -ENOMEM; + goto cleanup_ht; } for (i = 0; i < segment_count; ++i) { @@ -2034,15 +2034,7 @@ static int bnx2fc_allocate_hash_table(struct bnx2fc_hba *hba) GFP_KERNEL); if (!hba->hash_tbl_segments[i]) { printk(KERN_ERR PFX "hash segment alloc failed\n"); - while (--i >= 0) { - dma_free_coherent(&hba->pcidev->dev, - BNX2FC_HASH_TBL_CHUNK_SIZE, - hba->hash_tbl_segments[i], - dma_segment_array[i]); - hba->hash_tbl_segments[i] = NULL; - } - kfree(dma_segment_array); - return -ENOMEM; + goto cleanup_dma; } memset(hba->hash_tbl_segments[i], 0, BNX2FC_HASH_TBL_CHUNK_SIZE); @@ -2054,8 +2046,7 @@ static int bnx2fc_allocate_hash_table(struct bnx2fc_hba *hba) GFP_KERNEL); if (!hba->hash_tbl_pbl) { printk(KERN_ERR PFX "hash table pbl alloc failed\n"); - kfree(dma_segment_array); - return -ENOMEM; + goto cleanup_dma; } memset(hba->hash_tbl_pbl, 0, PAGE_SIZE); @@ -2080,6 +2071,22 @@ static int bnx2fc_allocate_hash_table(struct bnx2fc_hba *hba) } kfree(dma_segment_array); return 0; + +cleanup_dma: + for (i = 0; i < segment_count; ++i) { + if (hba->hash_tbl_segments[i]) + dma_free_coherent(&hba->pcidev->dev, + BNX2FC_HASH_TBL_CHUNK_SIZE, + hba->hash_tbl_segments[i], + dma_segment_array[i]); + } + + kfree(dma_segment_array); + +cleanup_ht: + kfree(hba->hash_tbl_segments); + hba->hash_tbl_segments = NULL; + return -ENOMEM; } /** diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c index 4abf127..5718b1f 100644 --- a/drivers/scsi/esas2r/esas2r_main.c +++ b/drivers/scsi/esas2r/esas2r_main.c @@ -1057,7 +1057,7 @@ int esas2r_eh_abort(struct scsi_cmnd *cmd) cmd->scsi_done(cmd); - return 0; + return SUCCESS; } spin_lock_irqsave(&a->queue_lock, flags); diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0eb0940..d535e75 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3140,7 +3140,7 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) } if (ioc->Request.Type.Direction == XFER_WRITE) { if (copy_from_user(buff[sg_used], data_ptr, sz)) { - status = -ENOMEM; + status = -EFAULT; goto cleanup1; } } else diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index fa76440..c5bb0e0 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -185,6 +185,11 @@ static struct viosrp_crq *crq_queue_next_crq(struct crq_queue *queue) if (crq->valid & 0x80) { if (++queue->cur == queue->size) queue->cur = 0; + + /* Ensure the read of the valid bit occurs before reading any + * other bits of the CRQ entry + */ + rmb(); } else crq = NULL; spin_unlock_irqrestore(&queue->lock, flags); @@ -203,6 +208,11 @@ static int ibmvscsi_send_crq(struct ibmvscsi_host_data *hostdata, { struct vio_dev *vdev = to_vio_dev(hostdata->dev); + /* + * Ensure the command buffer is flushed to memory before handing it + * over to the VIOS to prevent it from fetching any stale data. + */ + mb(); return plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2); } @@ -797,7 +807,8 @@ static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) evt->hostdata->dev); if (evt->cmnd_done) evt->cmnd_done(evt->cmnd); - } else if (evt->done) + } else if (evt->done && evt->crq.format != VIOSRP_MAD_FORMAT && + evt->iu.srp.login_req.opcode != SRP_LOGIN_REQ) evt->done(evt); free_event_struct(&evt->hostdata->pool, evt); spin_lock_irqsave(hostdata->host->host_lock, flags); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 573f412..5f84165 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -683,6 +683,7 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, ipr_reinit_ipr_cmnd(ipr_cmd); ipr_cmd->u.scratch = 0; ipr_cmd->sibling = NULL; + ipr_cmd->eh_comp = NULL; ipr_cmd->fast_done = fast_done; init_timer(&ipr_cmd->timer); } @@ -848,6 +849,8 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); + if (ipr_cmd->eh_comp) + complete(ipr_cmd->eh_comp); list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } @@ -4805,6 +4808,84 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } +/** + * ipr_match_lun - Match function for specified LUN + * @ipr_cmd: ipr command struct + * @device: device to match (sdev) + * + * Returns: + * 1 if command matches sdev / 0 if command does not match sdev + **/ +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device) +{ + if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device) + return 1; + return 0; +} + +/** + * ipr_wait_for_ops - Wait for matching commands to complete + * @ipr_cmd: ipr command struct + * @device: device to match (sdev) + * @match: match function to use + * + * Returns: + * SUCCESS / FAILED + **/ +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device, + int (*match)(struct ipr_cmnd *, void *)) +{ + struct ipr_cmnd *ipr_cmd; + int wait; + unsigned long flags; + struct ipr_hrr_queue *hrrq; + signed long timeout = IPR_ABORT_TASK_TIMEOUT; + DECLARE_COMPLETION_ONSTACK(comp); + + ENTER; + do { + wait = 0; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock_irqsave(hrrq->lock, flags); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (match(ipr_cmd, device)) { + ipr_cmd->eh_comp = ∁ + wait++; + } + } + spin_unlock_irqrestore(hrrq->lock, flags); + } + + if (wait) { + timeout = wait_for_completion_timeout(&comp, timeout); + + if (!timeout) { + wait = 0; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock_irqsave(hrrq->lock, flags); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (match(ipr_cmd, device)) { + ipr_cmd->eh_comp = NULL; + wait++; + } + } + spin_unlock_irqrestore(hrrq->lock, flags); + } + + if (wait) + dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted commands\n"); + LEAVE; + return wait ? FAILED : SUCCESS; + } + } + } while (wait); + + LEAVE; + return SUCCESS; +} + static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; @@ -5023,11 +5104,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) static int ipr_eh_dev_reset(struct scsi_cmnd *cmd) { int rc; + struct ipr_ioa_cfg *ioa_cfg; + + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; spin_lock_irq(cmd->device->host->host_lock); rc = __ipr_eh_dev_reset(cmd); spin_unlock_irq(cmd->device->host->host_lock); + if (rc == SUCCESS) + rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun); + return rc; } @@ -5205,13 +5292,18 @@ static int ipr_eh_abort(struct scsi_cmnd *scsi_cmd) { unsigned long flags; int rc; + struct ipr_ioa_cfg *ioa_cfg; ENTER; + ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags); rc = ipr_cancel_op(scsi_cmd); spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags); + if (rc == SUCCESS) + rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun); LEAVE; return rc; } @@ -10005,6 +10097,12 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57EF, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57F0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_2CCA, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_2CD2, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_2CCD, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index cad1483..c5f2e9a 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -107,6 +107,9 @@ #define IPR_SUBS_DEV_ID_57EE 0x049A #define IPR_SUBS_DEV_ID_57EF 0x049B #define IPR_SUBS_DEV_ID_57F0 0x049C +#define IPR_SUBS_DEV_ID_2CCA 0x04C7 +#define IPR_SUBS_DEV_ID_2CD2 0x04C8 +#define IPR_SUBS_DEV_ID_2CCD 0x04C9 #define IPR_NAME "ipr" /* @@ -1585,6 +1588,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct completion *eh_comp; struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index e399561..b69b233 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -717,11 +717,21 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, return NULL; } + if (data_size > ISCSI_DEF_MAX_RECV_SEG_LEN) { + iscsi_conn_printk(KERN_ERR, conn, "Invalid buffer len of %u for login task. Max len is %u\n", data_size, ISCSI_DEF_MAX_RECV_SEG_LEN); + return NULL; + } + task = conn->login_task; } else { if (session->state != ISCSI_STATE_LOGGED_IN) return NULL; + if (data_size != 0) { + iscsi_conn_printk(KERN_ERR, conn, "Can not send data buffer of len %u for op 0x%x\n", data_size, opcode); + return NULL; + } + BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE); BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED); diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 816db12..52587ce 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -1967,7 +1967,7 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) cmd->device->id, cmd->device->lun); if(list_empty(&adapter->pending_list)) - return FALSE; + return FAILED; list_for_each_safe(pos, next, &adapter->pending_list) { @@ -1990,7 +1990,7 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) (aor==SCB_ABORT) ? "ABORTING":"RESET", scb->idx); - return FALSE; + return FAILED; } else { @@ -2015,12 +2015,12 @@ megaraid_abort_and_reset(adapter_t *adapter, Scsi_Cmnd *cmd, int aor) list_add_tail(SCSI_LIST(cmd), &adapter->completed_list); - return TRUE; + return SUCCESS; } } } - return FALSE; + return FAILED; } static inline int diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index dfffd0f..a706927 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -486,6 +486,8 @@ mimd_to_kioc(mimd_t __user *umimd, mraid_mmadp_t *adp, uioc_t *kioc) pthru32->dataxferaddr = kioc->buf_paddr; if (kioc->data_dir & UIOC_WR) { + if (pthru32->dataxferlen > kioc->xferlen) + return -EINVAL; if (copy_from_user(kioc->buf_vaddr, kioc->user_data, pthru32->dataxferlen)) { return (-EFAULT); diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index a59a552..855dc7c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -953,7 +953,7 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, cpu_to_le32(upper_32_bits(cmd_to_abort->frame_phys_addr)); cmd->sync_cmd = 1; - cmd->cmd_status = 0xFF; + cmd->cmd_status = ENODATA; instance->instancet->issue_dcmd(instance, cmd); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 7f0af4f..6fd7d40 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -8293,7 +8293,6 @@ _scsih_suspend(struct pci_dev *pdev, pm_message_t state) mpt2sas_base_free_resources(ioc); pci_save_state(pdev); - pci_disable_device(pdev); pci_set_power_state(pdev, device_state); return 0; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 9d26637..396d78e 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1006,12 +1006,9 @@ mpt2sas_transport_update_links(struct MPT2SAS_ADAPTER *ioc, &mpt2sas_phy->remote_identify); _transport_add_phy_to_an_existing_port(ioc, sas_node, mpt2sas_phy, mpt2sas_phy->remote_identify.sas_address); - } else { + } else memset(&mpt2sas_phy->remote_identify, 0 , sizeof(struct sas_identify)); - _transport_del_phy_from_an_existing_port(ioc, sas_node, - mpt2sas_phy); - } if (mpt2sas_phy->phy) mpt2sas_phy->phy->negotiated_linkrate = diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index e771a88..dcadd56 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1003,12 +1003,9 @@ mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, &mpt3sas_phy->remote_identify); _transport_add_phy_to_an_existing_port(ioc, sas_node, mpt3sas_phy, mpt3sas_phy->remote_identify.sas_address); - } else { + } else memset(&mpt3sas_phy->remote_identify, 0 , sizeof(struct sas_identify)); - _transport_del_phy_from_an_existing_port(ioc, sas_node, - mpt3sas_phy); - } if (mpt3sas_phy->phy) mpt3sas_phy->phy->negotiated_linkrate = diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 9f01bbb..36d62fd 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2616,7 +2616,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->flags.enable_64bit_addressing ? "enable" : "disable"); ret = qla2x00_mem_alloc(ha, req_length, rsp_length, &req, &rsp); - if (!ret) { + if (ret) { ql_log_pci(ql_log_fatal, pdev, 0x0031, "Failed to allocate memory for adapter, aborting.\n"); @@ -3541,10 +3541,10 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, else { qla2x00_set_reserved_loop_ids(ha); ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0123, - "loop_id_map=%p. \n", ha->loop_id_map); + "loop_id_map=%p.\n", ha->loop_id_map); } - return 1; + return 0; fail_async_pd: dma_pool_free(ha->s_dma_pool, ha->ex_init_cb, ha->ex_init_cb_dma); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index e4fa6fb..3078832 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1361,12 +1361,10 @@ static inline void qlt_unmap_sg(struct scsi_qla_host *vha, static int qlt_check_reserve_free_req(struct scsi_qla_host *vha, uint32_t req_cnt) { - struct qla_hw_data *ha = vha->hw; - device_reg_t __iomem *reg = ha->iobase; uint32_t cnt; if (vha->req->cnt < (req_cnt + 2)) { - cnt = (uint16_t)RD_REG_DWORD(®->isp24.req_q_out); + cnt = (uint16_t)RD_REG_DWORD(vha->req->req_q_out); ql_dbg(ql_dbg_tgt, vha, 0xe00a, "Request ring circled: cnt=%d, vha->->ring_index=%d, " diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index f85b9e5..80a1f9f 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -740,7 +740,16 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess) pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id); node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id); - WARN_ON(node && (node != se_nacl)); + if (WARN_ON(node && (node != se_nacl))) { + /* + * The nacl no longer matches what we think it should be. + * Most likely a new dynamic acl has been added while + * someone dropped the hardware lock. It clearly is a + * bug elsewhere, but this bit can't make things worse. + */ + btree_insert32(&lport->lport_fcport_map, nacl->nport_id, + node, GFP_ATOMIC); + } pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n", se_nacl, nacl->nport_wwnn, nacl->nport_id); diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index f969aca..262ab83 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -202,6 +202,7 @@ static struct { {"IOMEGA", "Io20S *F", NULL, BLIST_KEY}, {"INSITE", "Floptical F*8I", NULL, BLIST_KEY}, {"INSITE", "I325VM", NULL, BLIST_KEY}, + {"Intel", "Multi-Flex", NULL, BLIST_NO_RSOC}, {"iRiver", "iFP Mass Driver", NULL, BLIST_NOT_LOCKABLE | BLIST_INQUIRY_36}, {"LASOUND", "CDX7405", "3.10", BLIST_MAX5LUN | BLIST_SINGLELUN}, {"MATSHITA", "PD-1", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, @@ -210,6 +211,7 @@ static struct { {"Medion", "Flash XL MMC/SD", "2.6D", BLIST_FORCELUN}, {"MegaRAID", "LD", NULL, BLIST_FORCELUN}, {"MICROP", "4110", NULL, BLIST_NOTQ}, + {"MSFT", "Virtual HD", NULL, BLIST_NO_RSOC}, {"MYLEX", "DACARMRB", "*", BLIST_REPORTLUN2}, {"nCipher", "Fastness Crypto", NULL, BLIST_FORCELUN}, {"NAKAMICH", "MJ-4.8S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, @@ -222,6 +224,7 @@ static struct { {"PIONEER", "CD-ROM DRM-602X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-604X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, {"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, + {"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC}, {"Promise", "", NULL, BLIST_SPARSELUN}, {"QUANTUM", "XP34301", "1071", BLIST_NOTQ}, {"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN}, diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 83e591b..066e319 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -143,7 +143,7 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) else if (host->hostt->eh_timed_out) rtn = host->hostt->eh_timed_out(scmd); - scmd->result |= DID_TIME_OUT << 16; + set_host_byte(scmd, DID_TIME_OUT); if (unlikely(rtn == BLK_EH_NOT_HANDLED && !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) @@ -1746,8 +1746,10 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * is no point trying to lock the door of an off-line device. */ shost_for_each_device(sdev, shost) { - if (scsi_device_online(sdev) && sdev->locked) + if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) { scsi_eh_lock_door(sdev); + sdev->was_reset = 0; + } } /* diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d1549b7..ad43b98 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -831,6 +831,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_next_command(cmd); return; } + } else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) { + /* + * Certain non BLOCK_PC requests are commands that don't + * actually transfer anything (FLUSH), so cannot use + * good_bytes != blk_rq_bytes(req) as the signal for an error. + * This sets the error explicitly for the problem case. + */ + error = __scsi_error_from_host_byte(cmd, result); } /* no bidi support for !REQ_TYPE_BLOCK_PC yet */ diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c index fe30ea9..109802f 100644 --- a/drivers/scsi/scsi_netlink.c +++ b/drivers/scsi/scsi_netlink.c @@ -77,7 +77,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb) goto next_msg; } - if (!capable(CAP_SYS_ADMIN)) { + if (!netlink_capable(skb, CAP_SYS_ADMIN)) { err = -EPERM; goto next_msg; } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 307a811..054ec2c 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -320,6 +320,7 @@ static void scsi_target_destroy(struct scsi_target *starget) struct Scsi_Host *shost = dev_to_shost(dev->parent); unsigned long flags; + starget->state = STARGET_DEL; transport_destroy_device(dev); spin_lock_irqsave(shost->host_lock, flags); if (shost->hostt->target_destroy) @@ -371,6 +372,37 @@ static struct scsi_target *__scsi_find_target(struct device *parent, } /** + * scsi_target_reap_ref_release - remove target from visibility + * @kref: the reap_ref in the target being released + * + * Called on last put of reap_ref, which is the indication that no device + * under this target is visible anymore, so render the target invisible in + * sysfs. Note: we have to be in user context here because the target reaps + * should be done in places where the scsi device visibility is being removed. + */ +static void scsi_target_reap_ref_release(struct kref *kref) +{ + struct scsi_target *starget + = container_of(kref, struct scsi_target, reap_ref); + + /* + * if we get here and the target is still in the CREATED state that + * means it was allocated but never made visible (because a scan + * turned up no LUNs), so don't call device_del() on it. + */ + if (starget->state != STARGET_CREATED) { + transport_remove_device(&starget->dev); + device_del(&starget->dev); + } + scsi_target_destroy(starget); +} + +static void scsi_target_reap_ref_put(struct scsi_target *starget) +{ + kref_put(&starget->reap_ref, scsi_target_reap_ref_release); +} + +/** * scsi_alloc_target - allocate a new or find an existing target * @parent: parent of the target (need not be a scsi host) * @channel: target channel number (zero if no channels) @@ -392,7 +424,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, + shost->transportt->target_size; struct scsi_target *starget; struct scsi_target *found_target; - int error; + int error, ref_got; starget = kzalloc(size, GFP_KERNEL); if (!starget) { @@ -401,7 +433,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, } dev = &starget->dev; device_initialize(dev); - starget->reap_ref = 1; + kref_init(&starget->reap_ref); dev->parent = get_device(parent); dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id); dev->bus = &scsi_bus_type; @@ -441,29 +473,36 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, return starget; found: - found_target->reap_ref++; + /* + * release routine already fired if kref is zero, so if we can still + * take the reference, the target must be alive. If we can't, it must + * be dying and we need to wait for a new target + */ + ref_got = kref_get_unless_zero(&found_target->reap_ref); + spin_unlock_irqrestore(shost->host_lock, flags); - if (found_target->state != STARGET_DEL) { + if (ref_got) { put_device(dev); return found_target; } - /* Unfortunately, we found a dying target; need to - * wait until it's dead before we can get a new one */ + /* + * Unfortunately, we found a dying target; need to wait until it's + * dead before we can get a new one. There is an anomaly here. We + * *should* call scsi_target_reap() to balance the kref_get() of the + * reap_ref above. However, since the target being released, it's + * already invisible and the reap_ref is irrelevant. If we call + * scsi_target_reap() we might spuriously do another device_del() on + * an already invisible target. + */ put_device(&found_target->dev); - flush_scheduled_work(); + /* + * length of time is irrelevant here, we just want to yield the CPU + * for a tick to avoid busy waiting for the target to die. + */ + msleep(1); goto retry; } -static void scsi_target_reap_usercontext(struct work_struct *work) -{ - struct scsi_target *starget = - container_of(work, struct scsi_target, ew.work); - - transport_remove_device(&starget->dev); - device_del(&starget->dev); - scsi_target_destroy(starget); -} - /** * scsi_target_reap - check to see if target is in use and destroy if not * @starget: target to be checked @@ -474,28 +513,13 @@ static void scsi_target_reap_usercontext(struct work_struct *work) */ void scsi_target_reap(struct scsi_target *starget) { - struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); - unsigned long flags; - enum scsi_target_state state; - int empty = 0; - - spin_lock_irqsave(shost->host_lock, flags); - state = starget->state; - if (--starget->reap_ref == 0 && list_empty(&starget->devices)) { - empty = 1; - starget->state = STARGET_DEL; - } - spin_unlock_irqrestore(shost->host_lock, flags); - - if (!empty) - return; - - BUG_ON(state == STARGET_DEL); - if (state == STARGET_CREATED) - scsi_target_destroy(starget); - else - execute_in_process_context(scsi_target_reap_usercontext, - &starget->ew); + /* + * serious problem if this triggers: STARGET_DEL is only set in the if + * the reap_ref drops to zero, so we're trying to do another final put + * on an already released kref + */ + BUG_ON(starget->state == STARGET_DEL); + scsi_target_reap_ref_put(starget); } /** @@ -898,6 +922,12 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags & BLIST_USE_10_BYTE_MS) sdev->use_10_for_ms = 1; + /* some devices don't like REPORT SUPPORTED OPERATION CODES + * and will simply timeout causing sd_mod init to take a very + * very long time */ + if (*bflags & BLIST_NO_RSOC) + sdev->no_report_opcodes = 1; + /* set the device running here so that slave configure * may do I/O */ ret = scsi_device_set_state(sdev, SDEV_RUNNING); @@ -926,7 +956,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; - if (*bflags & BLIST_SKIP_VPD_PAGES) + if (*bflags & BLIST_TRY_VPD_PAGES) + sdev->try_vpd_pages = 1; + else if (*bflags & BLIST_SKIP_VPD_PAGES) sdev->skip_vpd_pages = 1; transport_configure_device(&sdev->sdev_gendev); @@ -1212,6 +1244,12 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget, max_dev_lun = min(8U, max_dev_lun); /* + * Stop scanning at 255 unless BLIST_SCSI3LUN + */ + if (!(bflags & BLIST_SCSI3LUN)) + max_dev_lun = min(256U, max_dev_lun); + + /* * We have already scanned LUN 0, so start at LUN 1. Keep scanning * until we reach the max, or no LUN is found and we are not * sparse_lun. @@ -1532,6 +1570,10 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel, } mutex_unlock(&shost->scan_mutex); scsi_autopm_put_target(starget); + /* + * paired with scsi_alloc_target(). Target will be destroyed unless + * scsi_probe_and_add_lun made an underlying device visible + */ scsi_target_reap(starget); put_device(&starget->dev); @@ -1612,8 +1654,10 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel, out_reap: scsi_autopm_put_target(starget); - /* now determine if the target has any children at all - * and if not, nuke it */ + /* + * paired with scsi_alloc_target(): determine if the target has + * any children at all and if not, nuke it + */ scsi_target_reap(starget); put_device(&starget->dev); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 40c6394..dfb007c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -332,17 +332,14 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) { struct scsi_device *sdev; struct device *parent; - struct scsi_target *starget; struct list_head *this, *tmp; unsigned long flags; sdev = container_of(work, struct scsi_device, ew.work); parent = sdev->sdev_gendev.parent; - starget = to_scsi_target(parent); spin_lock_irqsave(sdev->host->host_lock, flags); - starget->reap_ref++; list_del(&sdev->siblings); list_del(&sdev->same_target_siblings); list_del(&sdev->starved_entry); @@ -362,8 +359,6 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) /* NULL queue means the device can't be used */ sdev->request_queue = NULL; - scsi_target_reap(scsi_target(sdev)); - kfree(sdev->inquiry); kfree(sdev); @@ -1018,6 +1013,13 @@ void __scsi_remove_device(struct scsi_device *sdev) sdev->host->hostt->slave_destroy(sdev); transport_destroy_device(dev); + /* + * Paired with the kref_get() in scsi_sysfs_initialize(). We have + * remoed sysfs visibility from the device, so make the target + * invisible if this was the last device underneath it. + */ + scsi_target_reap(scsi_target(sdev)); + put_device(dev); } @@ -1080,7 +1082,7 @@ void scsi_remove_target(struct device *dev) continue; if (starget->dev.parent == dev || &starget->dev == dev) { /* assuming new targets arrive at the end */ - starget->reap_ref++; + kref_get(&starget->reap_ref); spin_unlock_irqrestore(shost->host_lock, flags); if (last) scsi_target_reap(last); @@ -1164,6 +1166,12 @@ void scsi_sysfs_device_initialize(struct scsi_device *sdev) list_add_tail(&sdev->same_target_siblings, &starget->devices); list_add_tail(&sdev->siblings, &shost->__devices); spin_unlock_irqrestore(shost->host_lock, flags); + /* + * device can now only be removed via __scsi_remove_device() so hold + * the target. Target will be held in CREATED state until something + * beneath it becomes visible (in which case it moves to RUNNING) + */ + kref_get(&starget->reap_ref); } int scsi_is_sdev_device(const struct device *dev) diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 1b68142..c341f85 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -1621,8 +1621,6 @@ void sas_rphy_free(struct sas_rphy *rphy) list_del(&rphy->list); mutex_unlock(&sas_host->lock); - sas_bsg_remove(shost, rphy); - transport_destroy_device(dev); put_device(dev); @@ -1681,6 +1679,7 @@ sas_rphy_remove(struct sas_rphy *rphy) } sas_rphy_unlink(rphy); + sas_bsg_remove(NULL, rphy); transport_remove_device(dev); device_del(dev); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index dbc024b..69d2a70 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2667,6 +2667,11 @@ static void sd_read_write_same(struct scsi_disk *sdkp, unsigned char *buffer) static int sd_try_extended_inquiry(struct scsi_device *sdp) { + /* Attempt VPD inquiry if the device blacklist explicitly calls + * for it. + */ + if (sdp->try_vpd_pages) + return 1; /* * Although VPD inquiries can go to SCSI-2 type devices, * some USB ones crash on receiving them, and the pages diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 9969fa1..86b0515 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -33,6 +33,7 @@ #include <linux/device.h> #include <linux/hyperv.h> #include <linux/mempool.h> +#include <linux/blkdev.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_host.h> @@ -330,17 +331,17 @@ static int storvsc_timeout = 180; static void storvsc_on_channel_callback(void *context); -/* - * In Hyper-V, each port/path/target maps to 1 scsi host adapter. In - * reality, the path/target is not used (ie always set to 0) so our - * scsi host adapter essentially has 1 bus with 1 target that contains - * up to 256 luns. - */ -#define STORVSC_MAX_LUNS_PER_TARGET 64 -#define STORVSC_MAX_TARGETS 1 -#define STORVSC_MAX_CHANNELS 1 +#define STORVSC_MAX_LUNS_PER_TARGET 255 +#define STORVSC_MAX_TARGETS 2 +#define STORVSC_MAX_CHANNELS 8 +#define STORVSC_FC_MAX_LUNS_PER_TARGET 255 +#define STORVSC_FC_MAX_TARGETS 128 +#define STORVSC_FC_MAX_CHANNELS 8 +#define STORVSC_IDE_MAX_LUNS_PER_TARGET 64 +#define STORVSC_IDE_MAX_TARGETS 1 +#define STORVSC_IDE_MAX_CHANNELS 1 struct storvsc_cmd_request { struct list_head entry; @@ -1017,6 +1018,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case ATA_12: set_host_byte(scmnd, DID_PASSTHROUGH); break; + /* + * On Some Windows hosts TEST_UNIT_READY command can return + * SRB_STATUS_ERROR, let the upper level code deal with it + * based on the sense information. + */ + case TEST_UNIT_READY: + break; default: set_host_byte(scmnd, DID_TARGET_FAILURE); } @@ -1518,6 +1526,16 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) return SUCCESS; } +/* + * The host guarantees to respond to each command, although I/O latencies might + * be unbounded on Azure. Reset the timer unconditionally to give the host a + * chance to perform EH. + */ +static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd) +{ + return BLK_EH_RESET_TIMER; +} + static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) { bool allowed = true; @@ -1553,9 +1571,19 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) struct vmscsi_request *vm_srb; struct stor_mem_pools *memp = scmnd->device->hostdata; - if (!storvsc_scsi_cmd_ok(scmnd)) { - scmnd->scsi_done(scmnd); - return 0; + if (vmstor_current_major <= VMSTOR_WIN8_MAJOR) { + /* + * On legacy hosts filter unimplemented commands. + * Future hosts are expected to correctly handle + * unsupported commands. Furthermore, it is + * possible that some of the currently + * unsupported commands maybe supported in + * future versions of the host. + */ + if (!storvsc_scsi_cmd_ok(scmnd)) { + scmnd->scsi_done(scmnd); + return 0; + } } request_size = sizeof(struct storvsc_cmd_request); @@ -1580,26 +1608,24 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) vm_srb = &cmd_request->vstor_packet.vm_srb; vm_srb->win8_extension.time_out_value = 60; + vm_srb->win8_extension.srb_flags |= + (SRB_FLAGS_QUEUE_ACTION_ENABLE | + SRB_FLAGS_DISABLE_SYNCH_TRANSFER); /* Build the SRB */ switch (scmnd->sc_data_direction) { case DMA_TO_DEVICE: vm_srb->data_in = WRITE_TYPE; vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; - vm_srb->win8_extension.srb_flags |= - (SRB_FLAGS_QUEUE_ACTION_ENABLE | - SRB_FLAGS_DISABLE_SYNCH_TRANSFER); break; case DMA_FROM_DEVICE: vm_srb->data_in = READ_TYPE; vm_srb->win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; - vm_srb->win8_extension.srb_flags |= - (SRB_FLAGS_QUEUE_ACTION_ENABLE | - SRB_FLAGS_DISABLE_SYNCH_TRANSFER); break; default: vm_srb->data_in = UNKNOWN_TYPE; - vm_srb->win8_extension.srb_flags = 0; + vm_srb->win8_extension.srb_flags |= (SRB_FLAGS_DATA_IN | + SRB_FLAGS_DATA_OUT); break; } @@ -1664,13 +1690,12 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) if (ret == -EAGAIN) { /* no more space */ - if (cmd_request->bounce_sgl_count) { + if (cmd_request->bounce_sgl_count) destroy_bounce_buffer(cmd_request->bounce_sgl, cmd_request->bounce_sgl_count); - ret = SCSI_MLQUEUE_DEVICE_BUSY; - goto queue_error; - } + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto queue_error; } return 0; @@ -1687,11 +1712,11 @@ static struct scsi_host_template scsi_driver = { .bios_param = storvsc_get_chs, .queuecommand = storvsc_queuecommand, .eh_host_reset_handler = storvsc_host_reset_handler, + .eh_timed_out = storvsc_eh_timed_out, .slave_alloc = storvsc_device_alloc, .slave_destroy = storvsc_device_destroy, .slave_configure = storvsc_device_configure, - .cmd_per_lun = 1, - /* 64 max_queue * 1 target */ + .cmd_per_lun = 255, .can_queue = STORVSC_MAX_IO_REQUESTS*STORVSC_MAX_TARGETS, .this_id = -1, /* no use setting to 0 since ll_blk_rw reset it to 1 */ @@ -1743,19 +1768,25 @@ static int storvsc_probe(struct hv_device *device, * set state to properly communicate with the host. */ - if (vmbus_proto_version == VERSION_WIN8) { - sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE; - vmscsi_size_delta = 0; - vmstor_current_major = VMSTOR_WIN8_MAJOR; - vmstor_current_minor = VMSTOR_WIN8_MINOR; - } else { + switch (vmbus_proto_version) { + case VERSION_WS2008: + case VERSION_WIN7: sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); vmstor_current_major = VMSTOR_WIN7_MAJOR; vmstor_current_minor = VMSTOR_WIN7_MINOR; + break; + default: + sense_buffer_size = POST_WIN7_STORVSC_SENSE_BUFFER_SIZE; + vmscsi_size_delta = 0; + vmstor_current_major = VMSTOR_WIN8_MAJOR; + vmstor_current_minor = VMSTOR_WIN8_MINOR; + break; } - + if (dev_id->driver_data == SFC_GUID) + scsi_driver.can_queue = (STORVSC_MAX_IO_REQUESTS * + STORVSC_FC_MAX_TARGETS); host = scsi_host_alloc(&scsi_driver, sizeof(struct hv_host_device)); if (!host) @@ -1789,12 +1820,25 @@ static int storvsc_probe(struct hv_device *device, host_dev->path = stor_device->path_id; host_dev->target = stor_device->target_id; - /* max # of devices per target */ - host->max_lun = STORVSC_MAX_LUNS_PER_TARGET; - /* max # of targets per channel */ - host->max_id = STORVSC_MAX_TARGETS; - /* max # of channels */ - host->max_channel = STORVSC_MAX_CHANNELS - 1; + switch (dev_id->driver_data) { + case SFC_GUID: + host->max_lun = STORVSC_FC_MAX_LUNS_PER_TARGET; + host->max_id = STORVSC_FC_MAX_TARGETS; + host->max_channel = STORVSC_FC_MAX_CHANNELS - 1; + break; + + case SCSI_GUID: + host->max_lun = STORVSC_MAX_LUNS_PER_TARGET; + host->max_id = STORVSC_MAX_TARGETS; + host->max_channel = STORVSC_MAX_CHANNELS - 1; + break; + + default: + host->max_lun = STORVSC_IDE_MAX_LUNS_PER_TARGET; + host->max_id = STORVSC_IDE_MAX_TARGETS; + host->max_channel = STORVSC_IDE_MAX_CHANNELS - 1; + break; + } /* max cmd length */ host->max_cmd_len = STORVSC_MAX_CMD_LEN; diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c index 636bbe0..fc57c8a 100644 --- a/drivers/scsi/sun3_NCR5380.c +++ b/drivers/scsi/sun3_NCR5380.c @@ -2597,15 +2597,15 @@ static void NCR5380_reselect (struct Scsi_Host *instance) * Purpose : abort a command * * Inputs : cmd - the struct scsi_cmnd to abort, code - code to set the - * host byte of the result field to, if zero DID_ABORTED is + * host byte of the result field to, if zero DID_ABORTED is * used. * - * Returns : 0 - success, -1 on failure. + * Returns : SUCCESS - success, FAILED on failure. * - * XXX - there is no way to abort the command that is currently - * connected, you have to wait for it to complete. If this is + * XXX - there is no way to abort the command that is currently + * connected, you have to wait for it to complete. If this is * a problem, we could implement longjmp() / setjmp(), setjmp() - * called where the loop started in NCR5380_main(). + * called where the loop started in NCR5380_main(). */ static int NCR5380_abort(struct scsi_cmnd *cmd) diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index d92fe40..6b349e3 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -3000,7 +3000,11 @@ sym_dequeue_from_squeue(struct sym_hcb *np, int i, int target, int lun, int task if ((target == -1 || cp->target == target) && (lun == -1 || cp->lun == lun) && (task == -1 || cp->tag == task)) { +#ifdef SYM_OPT_HANDLE_DEVICE_QUEUEING sym_set_cam_status(cp->cmd, DID_SOFT_ERROR); +#else + sym_set_cam_status(cp->cmd, DID_REQUEUE); +#endif sym_remque(&cp->link_ccbq); sym_insque_tail(&cp->link_ccbq, &np->comp_ccbq); } diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b26f1a5..11f5326 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -270,6 +270,16 @@ static void virtscsi_req_done(struct virtqueue *vq) virtscsi_vq_done(vscsi, req_vq, virtscsi_complete_cmd); }; +static void virtscsi_poll_requests(struct virtio_scsi *vscsi) +{ + int i, num_vqs; + + num_vqs = vscsi->num_queues; + for (i = 0; i < num_vqs; i++) + virtscsi_vq_done(vscsi, &vscsi->req_vqs[i], + virtscsi_complete_cmd); +} + static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_cmd *cmd = buf; @@ -288,6 +298,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq) virtscsi_vq_done(vscsi, &vscsi->ctrl_vq, virtscsi_complete_free); }; +static void virtscsi_handle_event(struct work_struct *work); + static int virtscsi_kick_event(struct virtio_scsi *vscsi, struct virtio_scsi_event_node *event_node) { @@ -295,6 +307,7 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi, struct scatterlist sg; unsigned long flags; + INIT_WORK(&event_node->work, virtscsi_handle_event); sg_init_one(&sg, &event_node->event, sizeof(struct virtio_scsi_event)); spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags); @@ -412,7 +425,6 @@ static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf) { struct virtio_scsi_event_node *event_node = buf; - INIT_WORK(&event_node->work, virtscsi_handle_event); schedule_work(&event_node->work); } @@ -602,6 +614,18 @@ static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd) cmd->resp.tmf.response == VIRTIO_SCSI_S_FUNCTION_SUCCEEDED) ret = SUCCESS; + /* + * The spec guarantees that all requests related to the TMF have + * been completed, but the callback might not have run yet if + * we're using independent interrupts (e.g. MSI). Poll the + * virtqueues once. + * + * In the abort case, sc->scsi_done will do nothing, because + * the block layer must have detected a timeout and as a result + * REQ_ATOM_COMPLETE has been set. + */ + virtscsi_poll_requests(vscsi); + out: mempool_free(cmd, virtscsi_cmd_pool); return ret; @@ -751,8 +775,12 @@ static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity) vscsi->affinity_hint_set = true; } else { - for (i = 0; i < vscsi->num_queues; i++) + for (i = 0; i < vscsi->num_queues; i++) { + if (!vscsi->req_vqs[i].vq) + continue; + virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1); + } vscsi->affinity_hint_set = false; } diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index b9f0192..0791c92 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -89,7 +89,13 @@ err_exit: static void mid_spi_dma_exit(struct dw_spi *dws) { + if (!dws->dma_inited) + return; + + dmaengine_terminate_all(dws->txchan); dma_release_channel(dws->txchan); + + dmaengine_terminate_all(dws->rxchan); dma_release_channel(dws->rxchan); } @@ -136,7 +142,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) txconf.dst_addr = dws->dma_addr; txconf.dst_maxburst = LNW_DMA_MSIZE_16; txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - txconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + txconf.dst_addr_width = dws->dma_width; txconf.device_fc = false; txchan->device->device_control(txchan, DMA_SLAVE_CONFIG, @@ -159,7 +165,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) rxconf.src_addr = dws->dma_addr; rxconf.src_maxburst = LNW_DMA_MSIZE_16; rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - rxconf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + rxconf.src_addr_width = dws->dma_width; rxconf.device_fc = false; rxchan->device->device_control(rxchan, DMA_SLAVE_CONFIG, diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 79c958e..bb91bd8 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -394,9 +394,6 @@ static void pump_transfers(unsigned long data) chip = dws->cur_chip; spi = message->spi; - if (unlikely(!chip->clk_div)) - chip->clk_div = dws->max_freq / chip->speed_hz; - if (message->state == ERROR_STATE) { message->status = -EIO; goto early_exit; @@ -438,7 +435,7 @@ static void pump_transfers(unsigned long data) if (transfer->speed_hz) { speed = chip->speed_hz; - if (transfer->speed_hz != speed) { + if ((transfer->speed_hz != speed) || (!chip->clk_div)) { speed = transfer->speed_hz; if (speed > dws->max_freq) { printk(KERN_ERR "MRST SPI0: unsupported" @@ -658,7 +655,6 @@ static int dw_spi_setup(struct spi_device *spi) dev_err(&spi->dev, "No max speed HZ parameter\n"); return -EINVAL; } - chip->speed_hz = spi->max_speed_hz; chip->tmode = 0; /* Tx & Rx */ /* Default SPI mode is SCPOL = 0, SCPH = 0 */ diff --git a/drivers/spi/spi-efm32.c b/drivers/spi/spi-efm32.c index d428a40..6e06623 100644 --- a/drivers/spi/spi-efm32.c +++ b/drivers/spi/spi-efm32.c @@ -491,6 +491,9 @@ static int efm32_spi_remove(struct platform_device *pdev) static const struct of_device_id efm32_spi_dt_ids[] = { { + .compatible = "energymicro,efm32-spi", + }, { + /* doesn't follow the "vendor,device" scheme, don't use */ .compatible = "efm32,spi", }, { /* sentinel */ diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index a6df59e..864f4d5 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -46,7 +46,7 @@ #define SPI_TCR 0x08 -#define SPI_CTAR(x) (0x0c + (x * 4)) +#define SPI_CTAR(x) (0x0c + (((x) & 0x3) * 4)) #define SPI_CTAR_FMSZ(x) (((x) & 0x0000000f) << 27) #define SPI_CTAR_CPOL(x) ((x) << 26) #define SPI_CTAR_CPHA(x) ((x) << 25) @@ -72,7 +72,7 @@ #define SPI_PUSHR 0x34 #define SPI_PUSHR_CONT (1 << 31) -#define SPI_PUSHR_CTAS(x) (((x) & 0x00000007) << 28) +#define SPI_PUSHR_CTAS(x) (((x) & 0x00000003) << 28) #define SPI_PUSHR_EOQ (1 << 27) #define SPI_PUSHR_CTCNT (1 << 26) #define SPI_PUSHR_PCS(x) (((1 << x) & 0x0000003f) << 16) diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 2129fcd..6ee3dc4 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -362,18 +362,28 @@ static int fsl_spi_bufs(struct spi_device *spi, struct spi_transfer *t, static void fsl_spi_do_one_msg(struct spi_message *m) { struct spi_device *spi = m->spi; - struct spi_transfer *t; + struct spi_transfer *t, *first; unsigned int cs_change; const int nsecs = 50; int status; - cs_change = 1; - status = 0; + /* Don't allow changes if CS is active */ + first = list_first_entry(&m->transfers, struct spi_transfer, + transfer_list); list_for_each_entry(t, &m->transfers, transfer_list) { - if (t->bits_per_word || t->speed_hz) { - /* Don't allow changes if CS is active */ + if ((first->bits_per_word != t->bits_per_word) || + (first->speed_hz != t->speed_hz)) { status = -EINVAL; + dev_err(&spi->dev, + "bits_per_word/speed_hz should be same for the same SPI transfer\n"); + return; + } + } + cs_change = 1; + status = -EINVAL; + list_for_each_entry(t, &m->transfers, transfer_list) { + if (t->bits_per_word || t->speed_hz) { if (cs_change) status = fsl_spi_setup_transfer(spi, t); if (status < 0) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index ed4af47..5f19cc9 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -314,7 +314,8 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, disable_fifo: if (t->rx_buf != NULL) chconf &= ~OMAP2_MCSPI_CHCONF_FFER; - else + + if (t->tx_buf != NULL) chconf &= ~OMAP2_MCSPI_CHCONF_FFET; mcspi_write_chconf0(spi, chconf); diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index 1d1d321..72006e6 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -404,8 +404,6 @@ static int orion_spi_probe(struct platform_device *pdev) struct resource *r; unsigned long tclk_hz; int status = 0; - const u32 *iprop; - int size; master = spi_alloc_master(&pdev->dev, sizeof *spi); if (master == NULL) { @@ -416,10 +414,10 @@ static int orion_spi_probe(struct platform_device *pdev) if (pdev->id != -1) master->bus_num = pdev->id; if (pdev->dev.of_node) { - iprop = of_get_property(pdev->dev.of_node, "cell-index", - &size); - if (iprop && size == sizeof(*iprop)) - master->bus_num = *iprop; + u32 cell_index; + if (!of_property_read_u32(pdev->dev.of_node, "cell-index", + &cell_index)) + master->bus_num = cell_index; } /* we support only mode 0, and no options */ diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 9c511a9..b1a9ba8 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1075,7 +1075,7 @@ err_rxdesc: pl022->sgt_tx.nents, DMA_TO_DEVICE); err_tx_sgmap: dma_unmap_sg(rxchan->device->dev, pl022->sgt_rx.sgl, - pl022->sgt_tx.nents, DMA_FROM_DEVICE); + pl022->sgt_rx.nents, DMA_FROM_DEVICE); err_rx_sgmap: sg_free_table(&pl022->sgt_tx); err_alloc_tx_sg: diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 7b69e93..5b0e572 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1082,6 +1082,7 @@ static struct acpi_device_id pxa2xx_spi_acpi_match[] = { { "INT3430", 0 }, { "INT3431", 0 }, { "80860F0E", 0 }, + { "8086228E", 0 }, { }, }; MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match); @@ -1286,7 +1287,9 @@ static int pxa2xx_spi_suspend(struct device *dev) if (status != 0) return status; write_SSCR0(0, drv_data->ioaddr); - clk_disable_unprepare(ssp->clk); + + if (!pm_runtime_suspended(dev)) + clk_disable_unprepare(ssp->clk); return 0; } @@ -1300,7 +1303,8 @@ static int pxa2xx_spi_resume(struct device *dev) pxa2xx_spi_dma_resume(drv_data); /* Enable the SSP clock */ - clk_prepare_enable(ssp->clk); + if (!pm_runtime_suspended(dev)) + clk_prepare_enable(ssp->clk); /* Start the queue running */ status = spi_master_resume(drv_data->master); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9e039c6..d254477 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1370,7 +1370,7 @@ EXPORT_SYMBOL_GPL(spi_busnum_to_master); */ int spi_setup(struct spi_device *spi) { - unsigned bad_bits; + unsigned bad_bits, ugly_bits; int status = 0; /* check mode to prevent that DUAL and QUAD set at the same time @@ -1390,6 +1390,15 @@ int spi_setup(struct spi_device *spi) * that aren't supported with their current master */ bad_bits = spi->mode & ~spi->master->mode_bits; + ugly_bits = bad_bits & + (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD); + if (ugly_bits) { + dev_warn(&spi->dev, + "setup: ignoring unsupported mode bits %x\n", + ugly_bits); + spi->mode &= ~ugly_bits; + bad_bits &= ~ugly_bits; + } if (bad_bits) { dev_err(&spi->dev, "setup: unsupported mode bits %x\n", bad_bits); diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index 404f83d..40ca2be 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -118,6 +118,8 @@ static int daq700_ai_rinsn(struct comedi_device *dev, /* write channel to multiplexer */ /* set mask scan bit high to disable scanning */ outb(chan | 0x80, dev->iobase + CMD_R1); + /* mux needs 2us to really settle [Fred Brooks]. */ + udelay(2); /* convert n samples */ for (n = 0; n < insn->n; n++) { diff --git a/drivers/staging/comedi/drivers/usbdux.c b/drivers/staging/comedi/drivers/usbdux.c index 701ad1a..4fbe303 100644 --- a/drivers/staging/comedi/drivers/usbdux.c +++ b/drivers/staging/comedi/drivers/usbdux.c @@ -494,7 +494,7 @@ static void usbduxsub_ao_isoc_irq(struct urb *urb) /* pointer to the DA */ *datap++ = val & 0xff; *datap++ = (val >> 8) & 0xff; - *datap++ = chan; + *datap++ = chan << 6; devpriv->ao_readback[chan] = val; s->async->events |= COMEDI_CB_BLOCK; @@ -1040,11 +1040,8 @@ static int usbdux_ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s) /* set current channel of the running acquisition to zero */ s->async->cur_chan = 0; - for (i = 0; i < cmd->chanlist_len; ++i) { - unsigned int chan = CR_CHAN(cmd->chanlist[i]); - - devpriv->ao_chanlist[i] = chan << 6; - } + for (i = 0; i < cmd->chanlist_len; ++i) + devpriv->ao_chanlist[i] = CR_CHAN(cmd->chanlist[i]); /* we count in steps of 1ms (125us) */ /* 125us mode not used yet */ diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 6330af6..bc23d66 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -115,6 +115,7 @@ static const struct iio_chan_spec ad5933_channels[] = { .channel = 0, .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), .address = AD5933_REG_TEMP_DATA, + .scan_index = -1, .scan_type = { .sign = 's', .realbits = 14, @@ -124,9 +125,7 @@ static const struct iio_chan_spec ad5933_channels[] = { .type = IIO_VOLTAGE, .indexed = 1, .channel = 0, - .extend_name = "real_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "real", .address = AD5933_REG_REAL_DATA, .scan_index = 0, .scan_type = { @@ -138,9 +137,7 @@ static const struct iio_chan_spec ad5933_channels[] = { .type = IIO_VOLTAGE, .indexed = 1, .channel = 0, - .extend_name = "imag_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "imag", .address = AD5933_REG_IMAG_DATA, .scan_index = 1, .scan_type = { @@ -746,14 +743,14 @@ static int ad5933_probe(struct i2c_client *client, indio_dev->name = id->name; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = ad5933_channels; - indio_dev->num_channels = 1; /* only register temp0_input */ + indio_dev->num_channels = ARRAY_SIZE(ad5933_channels); ret = ad5933_register_ring_funcs_and_init(indio_dev); if (ret) goto error_disable_reg; - /* skip temp0_input, register in0_(real|imag)_raw */ - ret = iio_buffer_register(indio_dev, &ad5933_channels[1], 2); + ret = iio_buffer_register(indio_dev, ad5933_channels, + ARRAY_SIZE(ad5933_channels)); if (ret) goto error_unreg_ring; diff --git a/drivers/staging/iio/light/tsl2x7x_core.c b/drivers/staging/iio/light/tsl2x7x_core.c index c99f890..64c73ad 100644 --- a/drivers/staging/iio/light/tsl2x7x_core.c +++ b/drivers/staging/iio/light/tsl2x7x_core.c @@ -672,9 +672,13 @@ static int tsl2x7x_chip_on(struct iio_dev *indio_dev) chip->tsl2x7x_config[TSL2X7X_PRX_COUNT] = chip->tsl2x7x_settings.prox_pulse_count; chip->tsl2x7x_config[TSL2X7X_PRX_MINTHRESHLO] = - chip->tsl2x7x_settings.prox_thres_low; + (chip->tsl2x7x_settings.prox_thres_low) & 0xFF; + chip->tsl2x7x_config[TSL2X7X_PRX_MINTHRESHHI] = + (chip->tsl2x7x_settings.prox_thres_low >> 8) & 0xFF; chip->tsl2x7x_config[TSL2X7X_PRX_MAXTHRESHLO] = - chip->tsl2x7x_settings.prox_thres_high; + (chip->tsl2x7x_settings.prox_thres_high) & 0xFF; + chip->tsl2x7x_config[TSL2X7X_PRX_MAXTHRESHHI] = + (chip->tsl2x7x_settings.prox_thres_high >> 8) & 0xFF; /* and make sure we're not already on */ if (chip->tsl2x7x_chip_status == TSL2X7X_CHIP_WORKING) { diff --git a/drivers/staging/iio/meter/ade7758.h b/drivers/staging/iio/meter/ade7758.h index 0731820..e8c98cf 100644 --- a/drivers/staging/iio/meter/ade7758.h +++ b/drivers/staging/iio/meter/ade7758.h @@ -119,7 +119,6 @@ struct ade7758_state { u8 *tx; u8 *rx; struct mutex buf_lock; - const struct iio_chan_spec *ade7758_ring_channels; struct spi_transfer ring_xfer[4]; struct spi_message ring_msg; /* diff --git a/drivers/staging/iio/meter/ade7758_core.c b/drivers/staging/iio/meter/ade7758_core.c index 6005d4a..6f0886f 100644 --- a/drivers/staging/iio/meter/ade7758_core.c +++ b/drivers/staging/iio/meter/ade7758_core.c @@ -630,9 +630,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_VOLTAGE, .indexed = 1, .channel = 0, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_A, AD7758_VOLTAGE), .scan_index = 0, .scan_type = { @@ -644,9 +641,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_CURRENT, .indexed = 1, .channel = 0, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_A, AD7758_CURRENT), .scan_index = 1, .scan_type = { @@ -658,9 +652,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 0, - .extend_name = "apparent_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "apparent", .address = AD7758_WT(AD7758_PHASE_A, AD7758_APP_PWR), .scan_index = 2, .scan_type = { @@ -672,9 +664,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 0, - .extend_name = "active_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "active", .address = AD7758_WT(AD7758_PHASE_A, AD7758_ACT_PWR), .scan_index = 3, .scan_type = { @@ -686,9 +676,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 0, - .extend_name = "reactive_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "reactive", .address = AD7758_WT(AD7758_PHASE_A, AD7758_REACT_PWR), .scan_index = 4, .scan_type = { @@ -700,9 +688,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_VOLTAGE, .indexed = 1, .channel = 1, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_B, AD7758_VOLTAGE), .scan_index = 5, .scan_type = { @@ -714,9 +699,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_CURRENT, .indexed = 1, .channel = 1, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_B, AD7758_CURRENT), .scan_index = 6, .scan_type = { @@ -728,9 +710,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 1, - .extend_name = "apparent_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "apparent", .address = AD7758_WT(AD7758_PHASE_B, AD7758_APP_PWR), .scan_index = 7, .scan_type = { @@ -742,9 +722,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 1, - .extend_name = "active_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "active", .address = AD7758_WT(AD7758_PHASE_B, AD7758_ACT_PWR), .scan_index = 8, .scan_type = { @@ -756,9 +734,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 1, - .extend_name = "reactive_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "reactive", .address = AD7758_WT(AD7758_PHASE_B, AD7758_REACT_PWR), .scan_index = 9, .scan_type = { @@ -770,9 +746,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_VOLTAGE, .indexed = 1, .channel = 2, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_C, AD7758_VOLTAGE), .scan_index = 10, .scan_type = { @@ -784,9 +757,6 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_CURRENT, .indexed = 1, .channel = 2, - .extend_name = "raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), .address = AD7758_WT(AD7758_PHASE_C, AD7758_CURRENT), .scan_index = 11, .scan_type = { @@ -798,9 +768,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 2, - .extend_name = "apparent_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "apparent", .address = AD7758_WT(AD7758_PHASE_C, AD7758_APP_PWR), .scan_index = 12, .scan_type = { @@ -812,9 +780,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 2, - .extend_name = "active_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "active", .address = AD7758_WT(AD7758_PHASE_C, AD7758_ACT_PWR), .scan_index = 13, .scan_type = { @@ -826,9 +792,7 @@ static const struct iio_chan_spec ade7758_channels[] = { .type = IIO_POWER, .indexed = 1, .channel = 2, - .extend_name = "reactive_raw", - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW), - .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), + .extend_name = "reactive", .address = AD7758_WT(AD7758_PHASE_C, AD7758_REACT_PWR), .scan_index = 14, .scan_type = { @@ -872,13 +836,14 @@ static int ade7758_probe(struct spi_device *spi) goto error_free_rx; } st->us = spi; - st->ade7758_ring_channels = &ade7758_channels[0]; mutex_init(&st->buf_lock); indio_dev->name = spi->dev.driver->name; indio_dev->dev.parent = &spi->dev; indio_dev->info = &ade7758_info; indio_dev->modes = INDIO_DIRECT_MODE; + indio_dev->channels = ade7758_channels; + indio_dev->num_channels = ARRAY_SIZE(ade7758_channels); ret = ade7758_configure_ring(indio_dev); if (ret) diff --git a/drivers/staging/iio/meter/ade7758_ring.c b/drivers/staging/iio/meter/ade7758_ring.c index 7d5db71..46eb15d 100644 --- a/drivers/staging/iio/meter/ade7758_ring.c +++ b/drivers/staging/iio/meter/ade7758_ring.c @@ -89,11 +89,10 @@ static irqreturn_t ade7758_trigger_handler(int irq, void *p) **/ static int ade7758_ring_preenable(struct iio_dev *indio_dev) { - struct ade7758_state *st = iio_priv(indio_dev); unsigned channel; int ret; - if (!bitmap_empty(indio_dev->active_scan_mask, indio_dev->masklength)) + if (bitmap_empty(indio_dev->active_scan_mask, indio_dev->masklength)) return -EINVAL; ret = iio_sw_buffer_preenable(indio_dev); @@ -104,7 +103,7 @@ static int ade7758_ring_preenable(struct iio_dev *indio_dev) indio_dev->masklength); ade7758_write_waveform_type(&indio_dev->dev, - st->ade7758_ring_channels[channel].address); + indio_dev->channels[channel].address); return 0; } diff --git a/drivers/staging/iio/meter/ade7758_trigger.c b/drivers/staging/iio/meter/ade7758_trigger.c index 7a94ddd..8c4f289 100644 --- a/drivers/staging/iio/meter/ade7758_trigger.c +++ b/drivers/staging/iio/meter/ade7758_trigger.c @@ -85,7 +85,7 @@ int ade7758_probe_trigger(struct iio_dev *indio_dev) ret = iio_trigger_register(st->trig); /* select default trigger */ - indio_dev->trig = st->trig; + indio_dev->trig = iio_trigger_get(st->trig); if (ret) goto error_free_irq; diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig index 2156a44..3e0e607 100644 --- a/drivers/staging/lustre/lustre/Kconfig +++ b/drivers/staging/lustre/lustre/Kconfig @@ -57,4 +57,5 @@ config LUSTRE_TRANSLATE_ERRNOS config LUSTRE_LLITE_LLOOP bool "Lustre virtual block device" depends on LUSTRE_FS && BLOCK + depends on !PPC_64K_PAGES && !ARM64_64K_PAGES default m diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c index e7629be..c6ca9ca 100644 --- a/drivers/staging/lustre/lustre/llite/dcache.c +++ b/drivers/staging/lustre/lustre/llite/dcache.c @@ -278,7 +278,7 @@ void ll_invalidate_aliases(struct inode *inode) inode->i_ino, inode->i_generation, inode); ll_lock_dcache(inode); - ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { + ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_u.d_alias) { CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p " "inode %p flags %d\n", dentry->d_name.len, dentry->d_name.name, dentry, dentry->d_parent, diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index b868c2b..0431329 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -665,7 +665,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) return; list_for_each(tmp, &dentry->d_subdirs) { - struct dentry *d = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *d = list_entry(tmp, struct dentry, d_child); lustre_dump_dentry(d, recur - 1); } } diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index 34815b5..cd90a65 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -175,14 +175,14 @@ static void ll_invalidate_negative_children(struct inode *dir) struct ll_d_hlist_node *p; ll_lock_dcache(dir); - ll_d_hlist_for_each_entry(dentry, p, &dir->i_dentry, d_alias) { + ll_d_hlist_for_each_entry(dentry, p, &dir->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!list_empty(&dentry->d_subdirs)) { struct dentry *child; list_for_each_entry_safe(child, tmp_subdir, &dentry->d_subdirs, - d_u.d_child) { + d_child) { if (child->d_inode == NULL) d_lustre_invalidate(child, 1); } @@ -363,7 +363,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry) discon_alias = invalid_alias = NULL; ll_lock_dcache(inode); - ll_d_hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { + ll_d_hlist_for_each_entry(alias, p, &inode->i_dentry, d_u.d_alias) { LASSERT(alias != dentry); spin_lock(&alias->d_lock); @@ -953,7 +953,7 @@ static void ll_get_child_fid(struct inode * dir, struct qstr *name, { struct dentry *parent, *child; - parent = ll_d_hlist_entry(dir->i_dentry, struct dentry, d_alias); + parent = ll_d_hlist_entry(dir->i_dentry, struct dentry, d_u.d_alias); child = d_lookup(parent, name); if (child) { if (child->d_inode) diff --git a/drivers/staging/rtl8188eu/core/rtw_recv.c b/drivers/staging/rtl8188eu/core/rtw_recv.c index 2011657..33243ed 100644 --- a/drivers/staging/rtl8188eu/core/rtw_recv.c +++ b/drivers/staging/rtl8188eu/core/rtw_recv.c @@ -555,7 +555,7 @@ _func_exit_; /* set the security information in the recv_frame */ static union recv_frame *portctrl(struct adapter *adapter, union recv_frame *precv_frame) { - u8 *psta_addr = NULL, *ptr; + u8 *psta_addr, *ptr; uint auth_alg; struct recv_frame_hdr *pfhdr; struct sta_info *psta; @@ -569,7 +569,6 @@ static union recv_frame *portctrl(struct adapter *adapter, union recv_frame *pre _func_enter_; pstapriv = &adapter->stapriv; - psta = rtw_get_stainfo(pstapriv, psta_addr); auth_alg = adapter->securitypriv.dot11AuthAlgrthm; @@ -577,6 +576,7 @@ _func_enter_; pfhdr = &precv_frame->u.hdr; pattrib = &pfhdr->attrib; psta_addr = pattrib->ta; + psta = rtw_get_stainfo(pstapriv, psta_addr); prtnframe = NULL; diff --git a/drivers/staging/rtl8188eu/core/rtw_wlan_util.c b/drivers/staging/rtl8188eu/core/rtw_wlan_util.c index 8018edd..ce638d1 100644 --- a/drivers/staging/rtl8188eu/core/rtw_wlan_util.c +++ b/drivers/staging/rtl8188eu/core/rtw_wlan_util.c @@ -1607,13 +1607,18 @@ int update_sta_support_rate(struct adapter *padapter, u8 *pvar_ie, uint var_ie_l pIE = (struct ndis_802_11_var_ie *)rtw_get_ie(pvar_ie, _SUPPORTEDRATES_IE_, &ie_len, var_ie_len); if (pIE == NULL) return _FAIL; + if (ie_len > NDIS_802_11_LENGTH_RATES_EX) + return _FAIL; memcpy(pmlmeinfo->FW_sta_info[cam_idx].SupportedRates, pIE->data, ie_len); supportRateNum = ie_len; pIE = (struct ndis_802_11_var_ie *)rtw_get_ie(pvar_ie, _EXT_SUPPORTEDRATES_IE_, &ie_len, var_ie_len); - if (pIE) + if (pIE) { + if (supportRateNum + ie_len > NDIS_802_11_LENGTH_RATES_EX) + return _FAIL; memcpy((pmlmeinfo->FW_sta_info[cam_idx].SupportedRates + supportRateNum), pIE->data, ie_len); + } return _SUCCESS; } diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 85f692d..6a2c8ab 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -53,9 +53,12 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(USB_VENDER_ID_REALTEK, 0x0179)}, /* 8188ETV */ /*=== Customer ID ===*/ /****** 8188EUS ********/ + {USB_DEVICE(0x056e, 0x4008)}, /* Elecom WDC-150SU2M */ {USB_DEVICE(0x07b8, 0x8179)}, /* Abocom - Abocom */ {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ + {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ + {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {} /* Terminating entry */ }; diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c index 23ec684..274c359 100644 --- a/drivers/staging/rtl8712/rtl871x_recv.c +++ b/drivers/staging/rtl8712/rtl871x_recv.c @@ -254,7 +254,7 @@ union recv_frame *r8712_portctrl(struct _adapter *adapter, struct sta_info *psta; struct sta_priv *pstapriv; union recv_frame *prtnframe; - u16 ether_type = 0; + u16 ether_type; pstapriv = &adapter->stapriv; ptr = get_recvframe_data(precv_frame); @@ -263,15 +263,14 @@ union recv_frame *r8712_portctrl(struct _adapter *adapter, psta = r8712_get_stainfo(pstapriv, psta_addr); auth_alg = adapter->securitypriv.AuthAlgrthm; if (auth_alg == 2) { + /* get ether_type */ + ptr = ptr + pfhdr->attrib.hdrlen + LLC_HEADER_SIZE; + memcpy(ðer_type, ptr, 2); + ether_type = ntohs((unsigned short)ether_type); + if ((psta != NULL) && (psta->ieee8021x_blocked)) { /* blocked * only accept EAPOL frame */ - prtnframe = precv_frame; - /*get ether_type */ - ptr = ptr + pfhdr->attrib.hdrlen + - pfhdr->attrib.iv_len + LLC_HEADER_SIZE; - memcpy(ðer_type, ptr, 2); - ether_type = ntohs((unsigned short)ether_type); if (ether_type == 0x888e) prtnframe = precv_frame; else { diff --git a/drivers/staging/serqt_usb2/serqt_usb2.c b/drivers/staging/serqt_usb2/serqt_usb2.c index 73fc3cc..18d4856 100644 --- a/drivers/staging/serqt_usb2/serqt_usb2.c +++ b/drivers/staging/serqt_usb2/serqt_usb2.c @@ -725,7 +725,7 @@ static int qt_startup(struct usb_serial *serial) goto startup_error; } - switch (serial->dev->descriptor.idProduct) { + switch (le16_to_cpu(serial->dev->descriptor.idProduct)) { case QUATECH_DSU100: case QUATECH_QSU100: case QUATECH_ESU100A: diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 14079c4..2239fdd 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -2220,6 +2220,7 @@ static void __exit speakup_exit(void) unregister_keyboard_notifier(&keyboard_notifier_block); unregister_vt_notifier(&vt_notifier_block); speakup_unregister_devsynth(); + speakup_cancel_paste(); del_timer(&cursor_timer); kthread_stop(speakup_task); speakup_task = NULL; diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index f0fb003..ca04d36 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -4,6 +4,10 @@ #include <linux/sched.h> #include <linux/device.h> /* for dev_warn */ #include <linux/selection.h> +#include <linux/workqueue.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <asm/cmpxchg.h> #include "speakup.h" @@ -121,31 +125,61 @@ int speakup_set_selection(struct tty_struct *tty) return 0; } -/* TODO: move to some helper thread, probably. That'd fix having to check for - * in_atomic(). */ -int speakup_paste_selection(struct tty_struct *tty) +struct speakup_paste_work { + struct work_struct work; + struct tty_struct *tty; +}; + +static void __speakup_paste_selection(struct work_struct *work) { + struct speakup_paste_work *spw = + container_of(work, struct speakup_paste_work, work); + struct tty_struct *tty = xchg(&spw->tty, NULL); struct vc_data *vc = (struct vc_data *) tty->driver_data; int pasted = 0, count; + struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + + ld = tty_ldisc_ref_wait(tty); + tty_buffer_lock_exclusive(&vc->port); + add_wait_queue(&vc->paste_wait, &wait); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); if (test_bit(TTY_THROTTLED, &tty->flags)) { - if (in_atomic()) - /* if we are in an interrupt handler, abort */ - break; schedule(); continue; } count = sel_buffer_lth - pasted; - count = min_t(int, count, tty->receive_room); - tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, - NULL, count); + count = tty_ldisc_receive_buf(ld, sel_buffer + pasted, NULL, + count); pasted += count; } remove_wait_queue(&vc->paste_wait, &wait); current->state = TASK_RUNNING; + + tty_buffer_unlock_exclusive(&vc->port); + tty_ldisc_deref(ld); + tty_kref_put(tty); +} + +static struct speakup_paste_work speakup_paste_work = { + .work = __WORK_INITIALIZER(speakup_paste_work.work, + __speakup_paste_selection) +}; + +int speakup_paste_selection(struct tty_struct *tty) +{ + if (cmpxchg(&speakup_paste_work.tty, NULL, tty) != NULL) + return -EBUSY; + + tty_kref_get(tty); + schedule_work_on(WORK_CPU_UNBOUND, &speakup_paste_work.work); return 0; } +void speakup_cancel_paste(void) +{ + cancel_work_sync(&speakup_paste_work.work); + tty_kref_put(speakup_paste_work.tty); +} diff --git a/drivers/staging/speakup/speakup.h b/drivers/staging/speakup/speakup.h index 0126f71..74fe724 100644 --- a/drivers/staging/speakup/speakup.h +++ b/drivers/staging/speakup/speakup.h @@ -77,6 +77,7 @@ extern void synth_buffer_clear(void); extern void speakup_clear_selection(void); extern int speakup_set_selection(struct tty_struct *tty); extern int speakup_paste_selection(struct tty_struct *tty); +extern void speakup_cancel_paste(void); extern void speakup_register_devsynth(void); extern void speakup_unregister_devsynth(void); extern void synth_write(const char *buf, size_t count); diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c index 2f084e18..a1aca44 100644 --- a/drivers/staging/tidspbridge/core/dsp-clock.c +++ b/drivers/staging/tidspbridge/core/dsp-clock.c @@ -226,7 +226,7 @@ int dsp_clk_enable(enum dsp_clk_id clk_id) case GPT_CLK: status = omap_dm_timer_start(timer[clk_id - 1]); break; -#ifdef CONFIG_OMAP_MCBSP +#ifdef CONFIG_SND_OMAP_SOC_MCBSP case MCBSP_CLK: omap_mcbsp_request(MCBSP_ID(clk_id)); omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC); @@ -302,7 +302,7 @@ int dsp_clk_disable(enum dsp_clk_id clk_id) case GPT_CLK: status = omap_dm_timer_stop(timer[clk_id - 1]); break; -#ifdef CONFIG_OMAP_MCBSP +#ifdef CONFIG_SND_OMAP_SOC_MCBSP case MCBSP_CLK: omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PRCM_SRC); omap_mcbsp_free(MCBSP_ID(clk_id)); diff --git a/drivers/staging/vt6655/bssdb.c b/drivers/staging/vt6655/bssdb.c index f983915..3496a77 100644 --- a/drivers/staging/vt6655/bssdb.c +++ b/drivers/staging/vt6655/bssdb.c @@ -1026,7 +1026,7 @@ start: pDevice->byERPFlag &= ~(WLAN_SET_ERP_USE_PROTECTION(1)); } - { + if (pDevice->eCommandState == WLAN_ASSOCIATE_WAIT) { pDevice->byReAssocCount++; if ((pDevice->byReAssocCount > 10) && (pDevice->bLinkPass != true)) { //10 sec timeout printk("Re-association timeout!!!\n"); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 7f36a71..7268354 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -2434,6 +2434,7 @@ static irqreturn_t device_intr(int irq, void *dev_instance) { int handled = 0; unsigned char byData = 0; int ii = 0; + unsigned long flags; // unsigned char byRSSI; MACvReadISR(pDevice->PortOffset, &pDevice->dwIsr); @@ -2459,7 +2460,8 @@ static irqreturn_t device_intr(int irq, void *dev_instance) { handled = 1; MACvIntDisable(pDevice->PortOffset); - spin_lock_irq(&pDevice->lock); + + spin_lock_irqsave(&pDevice->lock, flags); //Make sure current page is 0 VNSvInPortB(pDevice->PortOffset + MAC_REG_PAGE1SEL, &byOrgPageSel); @@ -2700,7 +2702,8 @@ static irqreturn_t device_intr(int irq, void *dev_instance) { MACvSelectPage1(pDevice->PortOffset); } - spin_unlock_irq(&pDevice->lock); + spin_unlock_irqrestore(&pDevice->lock, flags); + MACvIntEnable(pDevice->PortOffset, IMR_MASK_VALUE); return IRQ_RETVAL(handled); diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index d02088f..162e01a 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -430,7 +430,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (page_zero_filled(uncmem)) { - kunmap_atomic(user_mem); + if (user_mem) + kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ zram_free_page(zram, index); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b5e5746..c60277e 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -460,6 +460,7 @@ int iscsit_del_np(struct iscsi_np *np) spin_lock_bh(&np->np_thread_lock); np->np_exports--; if (np->np_exports) { + np->enabled = true; spin_unlock_bh(&np->np_thread_lock); return 0; } @@ -1304,7 +1305,7 @@ iscsit_check_dataout_hdr(struct iscsi_conn *conn, unsigned char *buf, if (cmd->data_direction != DMA_TO_DEVICE) { pr_err("Command ITT: 0x%08x received DataOUT for a" " NON-WRITE command.\n", cmd->init_task_tag); - return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf); + return iscsit_dump_data_payload(conn, payload_length, 1); } se_cmd = &cmd->se_cmd; iscsit_mod_dataout_timer(cmd); @@ -1579,7 +1580,9 @@ int iscsit_process_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * Initiator is expecting a NopIN ping reply.. */ if (hdr->itt != RESERVED_ITT) { - BUG_ON(!cmd); + if (!cmd) + return iscsit_add_reject(conn, ISCSI_REASON_PROTOCOL_ERROR, + (unsigned char *)hdr); spin_lock_bh(&conn->cmd_lock); list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list); @@ -2476,6 +2479,7 @@ static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn) { struct iscsi_cmd *cmd; struct iscsi_conn *conn_p; + bool found = false; /* * Only send a Asynchronous Message on connections whos network @@ -2484,11 +2488,12 @@ static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn) list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) { if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) { iscsit_inc_conn_usage_count(conn_p); + found = true; break; } } - if (!conn_p) + if (!found) return; cmd = iscsit_allocate_cmd(conn_p, GFP_ATOMIC); @@ -3373,7 +3378,9 @@ static bool iscsit_check_inaddr_any(struct iscsi_np *np) #define SENDTARGETS_BUF_LIMIT 32768U -static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) +static int +iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, + enum iscsit_transport_type network_transport) { char *payload = NULL; struct iscsi_conn *conn = cmd->conn; @@ -3445,6 +3452,9 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + if (np->np_network_transport != network_transport) + continue; + len = sprintf(buf, "TargetAddress=" "%s:%hu,%hu", (inaddr_any == false) ? @@ -3482,11 +3492,12 @@ eob: int iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, - struct iscsi_text_rsp *hdr) + struct iscsi_text_rsp *hdr, + enum iscsit_transport_type network_transport) { int text_length, padding; - text_length = iscsit_build_sendtargets_response(cmd); + text_length = iscsit_build_sendtargets_response(cmd, network_transport); if (text_length < 0) return text_length; @@ -3524,7 +3535,7 @@ static int iscsit_send_text_rsp( u32 tx_size = 0; int text_length, iov_count = 0, rc; - rc = iscsit_build_text_rsp(cmd, conn, hdr); + rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP); if (rc < 0) return rc; @@ -4198,8 +4209,6 @@ int iscsit_close_connection( if (conn->conn_transport->iscsit_wait_conn) conn->conn_transport->iscsit_wait_conn(conn); - iscsit_free_queue_reqs_for_conn(conn); - /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4216,6 +4225,7 @@ int iscsit_close_connection( iscsit_clear_ooo_cmdsns_for_conn(conn); iscsit_release_commands_from_conn(conn); } + iscsit_free_queue_reqs_for_conn(conn); /* * Handle decrementing session or connection usage count if @@ -4501,6 +4511,7 @@ static void iscsit_logout_post_handler_diffcid( { struct iscsi_conn *l_conn; struct iscsi_session *sess = conn->sess; + bool conn_found = false; if (!sess) return; @@ -4509,12 +4520,13 @@ static void iscsit_logout_post_handler_diffcid( list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) { if (l_conn->cid == cid) { iscsit_inc_conn_usage_count(l_conn); + conn_found = true; break; } } spin_unlock_bh(&sess->conn_lock); - if (!l_conn) + if (!conn_found) return; if (l_conn->sock) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index 3e80188..b25bba5 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -314,6 +314,16 @@ static int chap_server_compute_md5( goto out; } /* + * During mutual authentication, the CHAP_C generated by the + * initiator must not match the original CHAP_C generated by + * the target. + */ + if (!memcmp(challenge_binhex, chap->challenge, CHAP_CHALLENGE_LENGTH)) { + pr_err("initiator CHAP_C matches target CHAP_C, failing" + " login attempt\n"); + goto out; + } + /* * Generate CHAP_N and CHAP_R for mutual authentication. */ tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 9a5721b..e2e1e63 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -777,6 +777,7 @@ struct iscsi_np { int np_ip_proto; int np_sock_type; enum np_thread_state_table np_thread_state; + bool enabled; enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index f442a9c..0c15772 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -983,6 +983,7 @@ int iscsi_target_setup_login_socket( } np->np_transport = t; + np->enabled = true; return 0; } @@ -1187,6 +1188,9 @@ old_sess_out: conn->sock = NULL; } + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); @@ -1197,7 +1201,7 @@ old_sess_out: static int __iscsi_target_login_thread(struct iscsi_np *np) { u8 *buffer, zero_tsih = 0; - int ret = 0, rc, stop; + int ret = 0, rc; struct iscsi_conn *conn = NULL; struct iscsi_login *login; struct iscsi_portal_group *tpg = NULL; @@ -1211,6 +1215,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; complete(&np->np_restart_comp); + } else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) { + spin_unlock_bh(&np->np_thread_lock); + goto exit; } else { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; } @@ -1403,10 +1410,8 @@ old_sess_out: } out: - stop = kthread_should_stop(); - /* Wait for another socket.. */ - if (!stop) - return 1; + return 1; + exit: iscsi_stop_login_thread_timer(np); spin_lock_bh(&np->np_thread_lock); @@ -1423,7 +1428,7 @@ int iscsi_target_login_thread(void *arg) allow_signal(SIGINT); - while (!kthread_should_stop()) { + while (1) { ret = __iscsi_target_login_thread(np); /* * We break and exit here unless another sock_accept() call diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 4d2e23f..43b7e6a 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -601,7 +601,7 @@ int iscsi_copy_param_list( param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); if (!param_list) { pr_err("Unable to allocate memory for struct iscsi_param_list.\n"); - goto err_out; + return -1; } INIT_LIST_HEAD(¶m_list->param_list); INIT_LIST_HEAD(¶m_list->extra_response_list); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 3cf77c0..b713d63 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -184,6 +184,8 @@ static void iscsit_clear_tpg_np_login_thread( return; } + if (shutdown) + tpg_np->tpg_np->enabled = false; iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown); } diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 1039de4..c5c9855 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1294,6 +1294,8 @@ int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_deta login->login_failed = 1; iscsit_collect_login_stats(conn, status_class, status_detail); + memset(&login->rsp[0], 0, ISCSI_HDR_LEN); + hdr = (struct iscsi_login_rsp *)&login->rsp[0]; hdr->opcode = ISCSI_OP_LOGIN_RSP; hdr->status_class = status_class; @@ -1353,15 +1355,15 @@ static int iscsit_do_tx_data( struct iscsi_conn *conn, struct iscsi_data_count *count) { - int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; + int ret, iov_len; struct kvec *iov_p; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) return -1; - if (data <= 0) { - pr_err("Data length is: %d\n", data); + if (count->data_length <= 0) { + pr_err("Data length is: %d\n", count->data_length); return -1; } @@ -1370,20 +1372,16 @@ static int iscsit_do_tx_data( iov_p = count->iov; iov_len = count->iov_count; - while (total_tx < data) { - tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, - (data - total_tx)); - if (tx_loop <= 0) { - pr_debug("tx_loop: %d total_tx %d\n", - tx_loop, total_tx); - return tx_loop; - } - total_tx += tx_loop; - pr_debug("tx_loop: %d, total_tx: %d, data: %d\n", - tx_loop, total_tx, data); + ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, + count->data_length); + if (ret != count->data_length) { + pr_err("Unexpected ret: %d send data %d\n", + ret, count->data_length); + return -EPIPE; } + pr_debug("ret: %d, sent data: %d\n", ret, count->data_length); - return total_tx; + return ret; } int rx_data( diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 4724410..792424f 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -409,7 +409,16 @@ static inline int core_alua_state_standby( case REPORT_LUNS: case RECEIVE_DIAGNOSTIC: case SEND_DIAGNOSTIC: + case READ_CAPACITY: return 0; + case SERVICE_ACTION_IN: + switch (cdb[1] & 0x1f) { + case SAI_READ_CAPACITY_16: + return 0; + default: + *alua_ascq = ASCQ_04H_ALUA_TG_PT_STANDBY; + return 1; + } case MAINTENANCE_IN: switch (cdb[1] & 0x1f) { case MI_REPORT_TARGET_PGS: diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 82e81c5..45d0867 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -2040,6 +2040,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state( " tg_pt_gp ID: %hu\n", tg_pt_gp->tg_pt_gp_valid_id); return -EINVAL; } + if (!(dev->dev_flags & DF_CONFIGURED)) { + pr_err("Unable to set alua_access_state while device is" + " not configured\n"); + return -ENODEV; + } ret = kstrtoul(page, 0, &tmp); if (ret < 0) { diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index e5e3965..a3ce912 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -617,6 +617,7 @@ void core_dev_unexport( dev->export_count--; spin_unlock(&hba->device_lock); + lun->lun_sep = NULL; lun->lun_se_dev = NULL; } @@ -799,10 +800,10 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) pr_err("emulate_write_cache not supported for pSCSI\n"); return -EINVAL; } - if (dev->transport->get_write_cache) { - pr_warn("emulate_write_cache cannot be changed when underlying" - " HW reports WriteCacheEnabled, ignoring request\n"); - return 0; + if (flag && + dev->transport->get_write_cache) { + pr_err("emulate_write_cache not supported for this device\n"); + return -EINVAL; } dev->dev_attrib.emulate_write_cache = flag; @@ -1321,7 +1322,8 @@ int core_dev_add_initiator_node_lun_acl( * Check to see if there are any existing persistent reservation APTPL * pre-registrations that need to be enabled for this LUN ACL.. */ - core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, lacl); + core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl, + lacl->mapped_lun); return 0; } diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index dfe3db7..a1e1ecd 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -944,10 +944,10 @@ int core_scsi3_check_aptpl_registration( struct se_device *dev, struct se_portal_group *tpg, struct se_lun *lun, - struct se_lun_acl *lun_acl) + struct se_node_acl *nacl, + u32 mapped_lun) { - struct se_node_acl *nacl = lun_acl->se_lun_nacl; - struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun]; + struct se_dev_entry *deve = nacl->device_list[mapped_lun]; if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) return 0; diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h index ed75cdd..14a0a2e 100644 --- a/drivers/target/target_core_pr.h +++ b/drivers/target/target_core_pr.h @@ -55,7 +55,7 @@ extern int core_scsi3_alloc_aptpl_registration( unsigned char *, u16, u32, int, int, u8); extern int core_scsi3_check_aptpl_registration(struct se_device *, struct se_portal_group *, struct se_lun *, - struct se_lun_acl *); + struct se_node_acl *, u32); extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *, struct se_node_acl *); extern void core_scsi3_free_all_registrations(struct se_device *); diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 131327a..9f6bede 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -179,7 +179,7 @@ static int rd_build_device_space(struct rd_dev *rd_dev) - 1; for (j = 0; j < sg_per_table; j++) { - pg = alloc_pages(GFP_KERNEL, 0); + pg = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); if (!pg) { pr_err("Unable to allocate scatterlist" " pages for struct rd_dev_sg_table\n"); diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index d9b92b2..2145222 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -80,7 +80,7 @@ sbc_emulate_readcapacity(struct se_cmd *cmd) transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 8); return 0; } @@ -118,7 +118,7 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 32); return 0; } @@ -409,13 +409,14 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd) goto out; } - write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents, + write_sg = kmalloc(sizeof(struct scatterlist) * cmd->t_data_nents, GFP_KERNEL); if (!write_sg) { pr_err("Unable to allocate compare_and_write sg\n"); ret = TCM_OUT_OF_RESOURCES; goto out; } + sg_init_table(write_sg, cmd->t_data_nents); /* * Setup verify and write data payloads from total NumberLBAs. */ diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 0745395..ee400df 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -639,6 +639,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) unsigned char buf[SE_INQUIRY_BUF]; sense_reason_t ret; int p; + int len = 0; memset(buf, 0, SE_INQUIRY_BUF); @@ -656,6 +657,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) } ret = spc_emulate_inquiry_std(cmd, buf); + len = buf[4] + 5; goto out; } @@ -663,6 +665,7 @@ spc_emulate_inquiry(struct se_cmd *cmd) if (cdb[2] == evpd_handlers[p].page) { buf[1] = cdb[2]; ret = evpd_handlers[p].emulate(cmd, buf); + len = get_unaligned_be16(&buf[2]) + 4; goto out; } } @@ -678,7 +681,7 @@ out: } if (!ret) - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, len); return ret; } @@ -996,7 +999,7 @@ set_length: transport_kunmap_data_sg(cmd); } - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, length); return 0; } @@ -1173,7 +1176,7 @@ done: buf[3] = (lun_count & 0xff); transport_kunmap_data_sg(cmd); - target_complete_cmd(cmd, GOOD); + target_complete_cmd_with_length(cmd, GOOD, 8 + lun_count * 8); return 0; } EXPORT_SYMBOL(spc_emulate_report_luns); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index b9a6ec0..d725835 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -40,6 +40,7 @@ #include <target/target_core_fabric.h> #include "target_core_internal.h" +#include "target_core_pr.h" extern struct se_device *g_lun0_dev; @@ -165,6 +166,13 @@ void core_tpg_add_node_to_devs( core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun, lun_access, acl, tpg); + /* + * Check to see if there are any existing persistent reservation + * APTPL pre-registrations that need to be enabled for this dynamic + * LUN ACL now.. + */ + core_scsi3_check_aptpl_registration(dev, tpg, lun, acl, + lun->unpacked_lun); spin_lock(&tpg->tpg_lun_lock); } spin_unlock(&tpg->tpg_lun_lock); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0b0009b..3931b50 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -552,7 +552,7 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return 1; } @@ -674,7 +674,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) if (cmd->transport_state & CMD_T_ABORTED && cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return; } else if (cmd->transport_state & CMD_T_FAILED) { INIT_WORK(&cmd->work, target_complete_failure_work); @@ -690,6 +690,23 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) } EXPORT_SYMBOL(target_complete_cmd); +void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +{ + if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) { + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + cmd->residual_count += cmd->data_length - length; + } else { + cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT; + cmd->residual_count = cmd->data_length - length; + } + + cmd->data_length = length; + } + + target_complete_cmd(cmd, scsi_status); +} +EXPORT_SYMBOL(target_complete_cmd_with_length); + static void target_add_to_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -1103,6 +1120,7 @@ void transport_init_se_cmd( init_completion(&cmd->cmd_wait_comp); init_completion(&cmd->task_stop_comp); spin_lock_init(&cmd->t_state_lock); + kref_init(&cmd->cmd_kref); cmd->transport_state = CMD_T_DEV_ACTIVE; cmd->se_tfo = tfo; @@ -1748,7 +1766,7 @@ void target_execute_cmd(struct se_cmd *cmd) cmd->se_tfo->get_task_tag(cmd)); spin_unlock_irq(&cmd->t_state_lock); - complete(&cmd->t_transport_stop_comp); + complete_all(&cmd->t_transport_stop_comp); return; } @@ -1838,8 +1856,7 @@ static void transport_complete_qf(struct se_cmd *cmd) if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) { trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); - if (ret) - goto out; + goto out; } switch (cmd->data_direction) { @@ -2214,7 +2231,7 @@ transport_generic_new_cmd(struct se_cmd *cmd) * and let it call back once the write buffers are ready. */ target_add_to_state_list(cmd); - if (cmd->data_direction != DMA_TO_DEVICE) { + if (cmd->data_direction != DMA_TO_DEVICE || cmd->data_length == 0) { target_execute_cmd(cmd); return 0; } @@ -2293,7 +2310,6 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, unsigned long flags; int ret = 0; - kref_init(&se_cmd->cmd_kref); /* * Add a second kref if the fabric caller is expecting to handle * fabric acknowledgement that requires two target_put_sess_cmd() @@ -2343,6 +2359,10 @@ static void target_release_cmd_kref(struct kref *kref) */ int target_put_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd) { + if (!se_sess) { + se_cmd->se_tfo->release_cmd(se_cmd); + return 1; + } return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref, &se_sess->sess_cmd_lock); } @@ -2997,6 +3017,12 @@ static void target_tmr_work(struct work_struct *work) int transport_generic_handle_tmr( struct se_cmd *cmd) { + unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + cmd->transport_state |= CMD_T_ACTIVE; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + INIT_WORK(&cmd->work, target_tmr_work); queue_work(cmd->se_dev->tmr_wq, &cmd->work); return 0; diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 4859505..639fdb3 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -68,6 +68,7 @@ static struct ft_tport *ft_tport_create(struct fc_lport *lport) if (tport) { tport->tpg = tpg; + tpg->tport = tport; return tport; } diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index b40b37c..d54388a 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -426,7 +426,6 @@ static int clamp_thread(void *arg) * allowed. thus jiffies are updated properly. */ preempt_disable(); - tick_nohz_idle_enter(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; @@ -444,7 +443,6 @@ static int clamp_thread(void *arg) start_critical_timings(); atomic_inc(&idle_wakeup_counter); } - tick_nohz_idle_exit(); preempt_enable_no_resched(); } del_timer_sync(&wakeup_timer); diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 4962a6a..4f35f1c 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1747,10 +1747,10 @@ static int __init thermal_init(void) return 0; -unregister_governors: - thermal_unregister_governors(); unregister_class: class_unregister(&thermal_class); +unregister_governors: + thermal_unregister_governors(); error: idr_destroy(&thermal_tz_idr); idr_destroy(&thermal_cdev_idr); diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index fdb0719..1967bee 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -140,6 +140,12 @@ thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, return NULL; } +static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz) +{ + unsigned long temp; + return tz->ops->get_crit_temp && !tz->ops->get_crit_temp(tz, &temp); +} + int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) { struct thermal_hwmon_device *hwmon; @@ -189,21 +195,18 @@ int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) if (result) goto free_temp_mem; - if (tz->ops->get_crit_temp) { - unsigned long temperature; - if (!tz->ops->get_crit_temp(tz, &temperature)) { - snprintf(temp->temp_crit.name, - sizeof(temp->temp_crit.name), + if (thermal_zone_crit_temp_valid(tz)) { + snprintf(temp->temp_crit.name, + sizeof(temp->temp_crit.name), "temp%d_crit", hwmon->count); - temp->temp_crit.attr.attr.name = temp->temp_crit.name; - temp->temp_crit.attr.attr.mode = 0444; - temp->temp_crit.attr.show = temp_crit_show; - sysfs_attr_init(&temp->temp_crit.attr.attr); - result = device_create_file(hwmon->device, - &temp->temp_crit.attr); - if (result) - goto unregister_input; - } + temp->temp_crit.attr.attr.name = temp->temp_crit.name; + temp->temp_crit.attr.attr.mode = 0444; + temp->temp_crit.attr.show = temp_crit_show; + sysfs_attr_init(&temp->temp_crit.attr.attr); + result = device_create_file(hwmon->device, + &temp->temp_crit.attr); + if (result) + goto unregister_input; } mutex_lock(&thermal_hwmon_list_lock); @@ -250,7 +253,7 @@ void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) } device_remove_file(hwmon->device, &temp->temp_input.attr); - if (tz->ops->get_crit_temp) + if (thermal_zone_crit_temp_valid(tz)) device_remove_file(hwmon->device, &temp->temp_crit.attr); mutex_lock(&thermal_hwmon_list_lock); diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 9eba119..e8e06d8 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -31,6 +31,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/major.h> +#include <linux/atomic.h> #include <linux/sysrq.h> #include <linux/tty.h> #include <linux/tty_flip.h> @@ -70,6 +71,9 @@ static struct task_struct *hvc_task; /* Picks up late kicks after list walk but before schedule() */ static int hvc_kicked; +/* hvc_init is triggered from hvc_alloc, i.e. only when actually used */ +static atomic_t hvc_needs_init __read_mostly = ATOMIC_INIT(-1); + static int hvc_init(void); #ifdef CONFIG_MAGIC_SYSRQ @@ -186,7 +190,7 @@ static struct tty_driver *hvc_console_device(struct console *c, int *index) return hvc_driver; } -static int __init hvc_console_setup(struct console *co, char *options) +static int hvc_console_setup(struct console *co, char *options) { if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES) return -ENODEV; @@ -851,7 +855,7 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, int i; /* We wait until a driver actually comes along */ - if (!hvc_driver) { + if (atomic_inc_not_zero(&hvc_needs_init)) { int err = hvc_init(); if (err) return ERR_PTR(err); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index bd73dc2..d711dbb 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -319,7 +319,8 @@ static void n_tty_check_unthrottle(struct tty_struct *tty) static inline void put_tty_queue(unsigned char c, struct n_tty_data *ldata) { - *read_buf_addr(ldata, ldata->read_head++) = c; + *read_buf_addr(ldata, ldata->read_head) = c; + ldata->read_head++; } /** @@ -1210,15 +1211,16 @@ static void n_tty_receive_parity_error(struct tty_struct *tty, unsigned char c) { struct n_tty_data *ldata = tty->disc_data; - if (I_IGNPAR(tty)) - return; - if (I_PARMRK(tty)) { - put_tty_queue('\377', ldata); - put_tty_queue('\0', ldata); - put_tty_queue(c, ldata); - } else if (I_INPCK(tty)) - put_tty_queue('\0', ldata); - else + if (I_INPCK(tty)) { + if (I_IGNPAR(tty)) + return; + if (I_PARMRK(tty)) { + put_tty_queue('\377', ldata); + put_tty_queue('\0', ldata); + put_tty_queue(c, ldata); + } else + put_tty_queue('\0', ldata); + } else put_tty_queue(c, ldata); wake_up_interruptible(&tty->read_wait); } @@ -2409,12 +2411,17 @@ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file, poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); + if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) + mask |= POLLHUP; if (input_available_p(tty, TIME_CHAR(tty) ? 0 : MIN_CHAR(tty))) mask |= POLLIN | POLLRDNORM; + else if (mask & POLLHUP) { + tty_flush_to_ldisc(tty); + if (input_available_p(tty, TIME_CHAR(tty) ? 0 : MIN_CHAR(tty))) + mask |= POLLIN | POLLRDNORM; + } if (tty->packet && tty->link->ctrl_status) mask |= POLLPRI | POLLIN | POLLRDNORM; - if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) - mask |= POLLHUP; if (tty_hung_up_p(file)) mask |= POLLHUP; if (!(mask & (POLLHUP | POLLIN | POLLRDNORM))) { diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 312a83f..d164304 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -573,7 +573,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) */ if ((p->port.type == PORT_XR17V35X) || (p->port.type == PORT_XR17D15X)) { - serial_out(p, UART_EXAR_SLEEP, 0xff); + serial_out(p, UART_EXAR_SLEEP, sleep ? 0xff : 0); return; } @@ -1548,7 +1548,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) status = serial8250_rx_chars(up, status); } serial8250_modem_status(up); - if (status & UART_LSR_THRE) + if (!up->dma && (status & UART_LSR_THRE)) serial8250_tx_chars(up); spin_unlock_irqrestore(&port->lock, flags); @@ -2384,7 +2384,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (termios->c_iflag & INPCK) port->read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= UART_LSR_BI; /* diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index 7046769..148ffe4 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -20,12 +20,15 @@ static void __dma_tx_complete(void *param) struct uart_8250_port *p = param; struct uart_8250_dma *dma = p->dma; struct circ_buf *xmit = &p->port.state->xmit; - - dma->tx_running = 0; + unsigned long flags; dma_sync_single_for_cpu(dma->txchan->device->dev, dma->tx_addr, UART_XMIT_SIZE, DMA_TO_DEVICE); + spin_lock_irqsave(&p->port.lock, flags); + + dma->tx_running = 0; + xmit->tail += dma->tx_size; xmit->tail &= UART_XMIT_SIZE - 1; p->port.icount.tx += dma->tx_size; @@ -35,6 +38,8 @@ static void __dma_tx_complete(void *param) if (!uart_circ_empty(xmit) && !uart_tx_stopped(&p->port)) serial8250_tx_dma(p); + + spin_unlock_irqrestore(&p->port.lock, flags); } static void __dma_rx_complete(void *param) @@ -187,21 +192,28 @@ int serial8250_request_dma(struct uart_8250_port *p) dma->rx_buf = dma_alloc_coherent(dma->rxchan->device->dev, dma->rx_size, &dma->rx_addr, GFP_KERNEL); - if (!dma->rx_buf) { - dma_release_channel(dma->rxchan); - dma_release_channel(dma->txchan); - return -ENOMEM; - } + if (!dma->rx_buf) + goto err; /* TX buffer */ dma->tx_addr = dma_map_single(dma->txchan->device->dev, p->port.state->xmit.buf, UART_XMIT_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(dma->txchan->device->dev, dma->tx_addr)) { + dma_free_coherent(dma->rxchan->device->dev, dma->rx_size, + dma->rx_buf, dma->rx_addr); + goto err; + } dev_dbg_ratelimited(p->port.dev, "got both dma channels\n"); return 0; +err: + dma_release_channel(dma->rxchan); + dma_release_channel(dma->txchan); + + return -ENOMEM; } EXPORT_SYMBOL_GPL(serial8250_request_dma); diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 8b2accb..70ecf54 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -56,7 +56,6 @@ struct dw8250_data { - int last_lcr; int last_mcr; int line; struct clk *clk; @@ -76,17 +75,34 @@ static inline int dw8250_modify_msr(struct uart_port *p, int offset, int value) return value; } +static void dw8250_force_idle(struct uart_port *p) +{ + serial8250_clear_and_reinit_fifos(container_of + (p, struct uart_8250_port, port)); + (void)p->serial_in(p, UART_RX); +} + static void dw8250_serial_out(struct uart_port *p, int offset, int value) { struct dw8250_data *d = p->private_data; - if (offset == UART_LCR) - d->last_lcr = value; - if (offset == UART_MCR) d->last_mcr = value; writeb(value, p->membase + (offset << p->regshift)); + + /* Make sure LCR write wasn't ignored */ + if (offset == UART_LCR) { + int tries = 1000; + while (tries--) { + unsigned int lcr = p->serial_in(p, UART_LCR); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) + return; + dw8250_force_idle(p); + writeb(value, p->membase + (UART_LCR << p->regshift)); + } + dev_err(p->dev, "Couldn't set LCR to %d\n", value); + } } static unsigned int dw8250_serial_in(struct uart_port *p, int offset) @@ -107,13 +123,23 @@ static void dw8250_serial_out32(struct uart_port *p, int offset, int value) { struct dw8250_data *d = p->private_data; - if (offset == UART_LCR) - d->last_lcr = value; - if (offset == UART_MCR) d->last_mcr = value; writel(value, p->membase + (offset << p->regshift)); + + /* Make sure LCR write wasn't ignored */ + if (offset == UART_LCR) { + int tries = 1000; + while (tries--) { + unsigned int lcr = p->serial_in(p, UART_LCR); + if ((value & ~UART_LCR_SPAR) == (lcr & ~UART_LCR_SPAR)) + return; + dw8250_force_idle(p); + writel(value, p->membase + (UART_LCR << p->regshift)); + } + dev_err(p->dev, "Couldn't set LCR to %d\n", value); + } } static unsigned int dw8250_serial_in32(struct uart_port *p, int offset) @@ -131,9 +157,8 @@ static int dw8250_handle_irq(struct uart_port *p) if (serial8250_handle_irq(p, iir)) { return 1; } else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { - /* Clear the USR and write the LCR again. */ + /* Clear the USR */ (void)p->serial_in(p, d->usr_reg); - p->serial_out(p, UART_LCR, d->last_lcr); return 1; } diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index f5df8b7..ee1f7c5 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1558,6 +1558,7 @@ pci_wch_ch353_setup(struct serial_private *priv, #define PCI_DEVICE_ID_WCH_CH352_2S 0x3253 #define PCI_DEVICE_ID_WCH_CH353_4S 0x3453 #define PCI_DEVICE_ID_WCH_CH353_2S1PF 0x5046 +#define PCI_DEVICE_ID_WCH_CH353_1S1P 0x5053 #define PCI_DEVICE_ID_WCH_CH353_2S1P 0x7053 #define PCI_VENDOR_ID_AGESTAR 0x5372 #define PCI_DEVICE_ID_AGESTAR_9375 0x6872 @@ -1567,6 +1568,7 @@ pci_wch_ch353_setup(struct serial_private *priv, #define PCI_DEVICE_ID_COMMTECH_4222PCIE 0x0022 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e +#define PCI_DEVICE_ID_INTEL_QRK_UART 0x0936 #define PCI_VENDOR_ID_SUNIX 0x1fd4 #define PCI_DEVICE_ID_SUNIX_1999 0x1999 @@ -1875,6 +1877,13 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = sbs_setup, .exit = sbs_exit, }, + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_QRK_UART, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_default_setup, + }, /* * SBS Technologies, Inc., PMC-OCTALPRO 422 */ @@ -2151,6 +2160,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_omegapci_setup, }, + /* WCH CH353 1S1P card (16550 clone) */ + { + .vendor = PCI_VENDOR_ID_WCH, + .device = PCI_DEVICE_ID_WCH_CH353_1S1P, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_wch_ch353_setup, + }, /* WCH CH353 2S1P card (16550 clone) */ { .vendor = PCI_VENDOR_ID_WCH, @@ -2450,6 +2467,7 @@ enum pci_board_num_t { pbn_ADDIDATA_PCIe_4_3906250, pbn_ADDIDATA_PCIe_8_3906250, pbn_ce4100_1_115200, + pbn_qrk, pbn_omegapci, pbn_NETMOS9900_2s_115200, pbn_brcm_trumanage, @@ -3186,6 +3204,12 @@ static struct pciserial_board pci_boards[] = { .base_baud = 921600, .reg_shift = 2, }, + [pbn_qrk] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 2764800, + .reg_shift = 2, + }, [pbn_omegapci] = { .flags = FL_BASE0, .num_ports = 8, @@ -3213,6 +3237,7 @@ static const struct pci_device_id blacklist[] = { /* multi-io cards handled by parport_serial */ { PCI_DEVICE(0x4348, 0x7053), }, /* WCH CH353 2S1P */ + { PCI_DEVICE(0x4348, 0x5053), }, /* WCH CH353 1S1P */ }; /* @@ -4854,6 +4879,12 @@ static struct pci_device_id serial_pci_tbl[] = { pbn_ce4100_1_115200 }, /* + * Intel Quark x1000 + */ + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_QRK_UART, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_qrk }, + /* * Cronyx Omega PCI */ { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_CRONYX_OMEGA, diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c index 501667e..3233766 100644 --- a/drivers/tty/serial/altera_uart.c +++ b/drivers/tty/serial/altera_uart.c @@ -185,6 +185,12 @@ static void altera_uart_set_termios(struct uart_port *port, uart_update_timeout(port, termios->c_cflag, baud); altera_uart_writel(port, baudclk, ALTERA_UART_DIVISOR_REG); spin_unlock_irqrestore(&port->lock, flags); + + /* + * FIXME: port->read_status_mask and port->ignore_status_mask + * need to be initialized based on termios settings for + * INPCK, IGNBRK, IGNPAR, PARMRK, BRKINT + */ } static void altera_uart_rx_chars(struct altera_uart *pp) diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c index 8b90f0b..40bff81 100644 --- a/drivers/tty/serial/amba-pl010.c +++ b/drivers/tty/serial/amba-pl010.c @@ -420,7 +420,7 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, uap->port.read_status_mask = UART01x_RSR_OE; if (termios->c_iflag & INPCK) uap->port.read_status_mask |= UART01x_RSR_FE | UART01x_RSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) uap->port.read_status_mask |= UART01x_RSR_BE; /* diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 729198c..0cc0f6f 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1731,7 +1731,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios, port->read_status_mask = UART011_DR_OE | 255; if (termios->c_iflag & INPCK) port->read_status_mask |= UART011_DR_FE | UART011_DR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= UART011_DR_BE; /* diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 41bb838..3b301a7 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1794,7 +1794,7 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, port->read_status_mask = ATMEL_US_OVRE; if (termios->c_iflag & INPCK) port->read_status_mask |= (ATMEL_US_FRAME | ATMEL_US_PARE); - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= ATMEL_US_RXBRK; if (atmel_use_pdc_rx(port)) diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c index 649d512..88b07ad 100644 --- a/drivers/tty/serial/bcm63xx_uart.c +++ b/drivers/tty/serial/bcm63xx_uart.c @@ -568,7 +568,7 @@ static void bcm_uart_set_termios(struct uart_port *port, port->read_status_mask |= UART_FIFO_FRAMEERR_MASK; port->read_status_mask |= UART_FIFO_PARERR_MASK; } - if (new->c_iflag & (BRKINT)) + if (new->c_iflag & (IGNBRK | BRKINT)) port->read_status_mask |= UART_FIFO_BRKDET_MASK; port->ignore_status_mask = 0; diff --git a/drivers/tty/serial/bfin_uart.c b/drivers/tty/serial/bfin_uart.c index 3c75e8e..8d30469 100644 --- a/drivers/tty/serial/bfin_uart.c +++ b/drivers/tty/serial/bfin_uart.c @@ -833,7 +833,7 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios, port->read_status_mask = OE; if (termios->c_iflag & INPCK) port->read_status_mask |= (FE | PE); - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= BI; /* diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c index 2f2b2e5..cdbbc78 100644 --- a/drivers/tty/serial/dz.c +++ b/drivers/tty/serial/dz.c @@ -625,7 +625,7 @@ static void dz_set_termios(struct uart_port *uport, struct ktermios *termios, dport->port.read_status_mask = DZ_OERR; if (termios->c_iflag & INPCK) dport->port.read_status_mask |= DZ_FERR | DZ_PERR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) dport->port.read_status_mask |= DZ_BREAK; /* characters to ignore */ diff --git a/drivers/tty/serial/efm32-uart.c b/drivers/tty/serial/efm32-uart.c index 0eb5b56..948f17b 100644 --- a/drivers/tty/serial/efm32-uart.c +++ b/drivers/tty/serial/efm32-uart.c @@ -407,7 +407,7 @@ static void efm32_uart_set_termios(struct uart_port *port, if (new->c_iflag & INPCK) port->read_status_mask |= UARTn_RXDATAX_FERR | UARTn_RXDATAX_PERR; - if (new->c_iflag & (BRKINT | PARMRK)) + if (new->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= SW_UARTn_RXDATAX_BERR; port->ignore_status_mask = 0; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 1c5d020..426bd5a 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -890,7 +890,7 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios, sport->port.read_status_mask = 0; if (termios->c_iflag & INPCK) sport->port.read_status_mask |= (UARTSR1_FE | UARTSR1_PE); - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) sport->port.read_status_mask |= UARTSR1_FE; /* characters to ignore */ diff --git a/drivers/tty/serial/ip22zilog.c b/drivers/tty/serial/ip22zilog.c index cb3c81e..a90f408 100644 --- a/drivers/tty/serial/ip22zilog.c +++ b/drivers/tty/serial/ip22zilog.c @@ -850,7 +850,7 @@ ip22zilog_convert_to_zs(struct uart_ip22zilog_port *up, unsigned int cflag, up->port.read_status_mask = Rx_OVR; if (iflag & INPCK) up->port.read_status_mask |= CRC_ERR | PAR_ERR; - if (iflag & (BRKINT | PARMRK)) + if (iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= BRK_ABRT; up->port.ignore_status_mask = 0; diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c index 9cd9b4e..68f2c53 100644 --- a/drivers/tty/serial/m32r_sio.c +++ b/drivers/tty/serial/m32r_sio.c @@ -737,7 +737,7 @@ static void m32r_sio_set_termios(struct uart_port *port, up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (termios->c_iflag & INPCK) up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= UART_LSR_BI; /* diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index b2e707a..5183643 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -842,7 +842,7 @@ static void max310x_set_termios(struct uart_port *port, if (termios->c_iflag & INPCK) port->read_status_mask |= MAX310X_LSR_RXPAR_BIT | MAX310X_LSR_FRERR_BIT; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= MAX310X_LSR_RXBRK_BIT; /* Set status ignore mask */ diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c index 0edfaf8..a6f0857 100644 --- a/drivers/tty/serial/mcf.c +++ b/drivers/tty/serial/mcf.c @@ -248,6 +248,12 @@ static void mcf_set_termios(struct uart_port *port, struct ktermios *termios, mr1 |= MCFUART_MR1_PARITYNONE; } + /* + * FIXME: port->read_status_mask and port->ignore_status_mask + * need to be initialized based on termios settings for + * INPCK, IGNBRK, IGNPAR, PARMRK, BRKINT + */ + if (termios->c_cflag & CSTOPB) mr2 |= MCFUART_MR2_STOP2; else diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c index d3db042..81ad559 100644 --- a/drivers/tty/serial/mfd.c +++ b/drivers/tty/serial/mfd.c @@ -975,7 +975,7 @@ serial_hsu_set_termios(struct uart_port *port, struct ktermios *termios, up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (termios->c_iflag & INPCK) up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= UART_LSR_BI; /* Characters to ignore */ diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c index 8d70267..76749f4 100644 --- a/drivers/tty/serial/mpsc.c +++ b/drivers/tty/serial/mpsc.c @@ -1458,7 +1458,7 @@ static void mpsc_set_termios(struct uart_port *port, struct ktermios *termios, pi->port.read_status_mask |= SDMA_DESC_CMDSTAT_PE | SDMA_DESC_CMDSTAT_FR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) pi->port.read_status_mask |= SDMA_DESC_CMDSTAT_BR; /* Characters/events to ignore */ diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index b5d779c..c0f2b3e 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -570,7 +570,7 @@ static void msm_set_termios(struct uart_port *port, struct ktermios *termios, port->read_status_mask = 0; if (termios->c_iflag & INPCK) port->read_status_mask |= UART_SR_PAR_FRAME_ERR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= UART_SR_RX_BREAK; uart_update_timeout(port, termios->c_cflag, baud); diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 10e9d70..ea96c39 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -600,7 +600,7 @@ static void mxs_auart_settermios(struct uart_port *u, if (termios->c_iflag & INPCK) u->read_status_mask |= AUART_STAT_PERR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) u->read_status_mask |= AUART_STAT_BERR; /* diff --git a/drivers/tty/serial/netx-serial.c b/drivers/tty/serial/netx-serial.c index 0a4dd70..7a67456 100644 --- a/drivers/tty/serial/netx-serial.c +++ b/drivers/tty/serial/netx-serial.c @@ -419,7 +419,7 @@ netx_set_termios(struct uart_port *port, struct ktermios *termios, } port->read_status_mask = 0; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= SR_BE; if (termios->c_iflag & INPCK) port->read_status_mask |= SR_PE | SR_FE; diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 22c8149..59117ea 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -240,8 +240,16 @@ serial_omap_baud_is_mode16(struct uart_port *port, unsigned int baud) { unsigned int n13 = port->uartclk / (13 * baud); unsigned int n16 = port->uartclk / (16 * baud); - int baudAbsDiff13 = baud - (port->uartclk / (13 * n13)); - int baudAbsDiff16 = baud - (port->uartclk / (16 * n16)); + int baudAbsDiff13; + int baudAbsDiff16; + + if (n13 == 0) + n13 = 1; + if (n16 == 0) + n16 = 1; + + baudAbsDiff13 = baud - (port->uartclk / (13 * n13)); + baudAbsDiff16 = baud - (port->uartclk / (16 * n16)); if(baudAbsDiff13 < 0) baudAbsDiff13 = -baudAbsDiff13; if(baudAbsDiff16 < 0) diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index 5ba30e0..409d7ad 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -1090,7 +1090,7 @@ static void pmz_convert_to_zs(struct uart_pmac_port *uap, unsigned int cflag, uap->port.read_status_mask = Rx_OVR; if (iflag & INPCK) uap->port.read_status_mask |= CRC_ERR | PAR_ERR; - if (iflag & (BRKINT | PARMRK)) + if (iflag & (IGNBRK | BRKINT | PARMRK)) uap->port.read_status_mask |= BRK_ABRT; uap->port.ignore_status_mask = 0; diff --git a/drivers/tty/serial/pnx8xxx_uart.c b/drivers/tty/serial/pnx8xxx_uart.c index de6c05c..2ba24a4 100644 --- a/drivers/tty/serial/pnx8xxx_uart.c +++ b/drivers/tty/serial/pnx8xxx_uart.c @@ -477,7 +477,7 @@ pnx8xxx_set_termios(struct uart_port *port, struct ktermios *termios, sport->port.read_status_mask |= FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE) | FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR); - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) sport->port.read_status_mask |= ISTAT_TO_SM(PNX8XXX_UART_INT_BREAK); diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c index f9f20f3..fc3f308 100644 --- a/drivers/tty/serial/pxa.c +++ b/drivers/tty/serial/pxa.c @@ -492,7 +492,7 @@ serial_pxa_set_termios(struct uart_port *port, struct ktermios *termios, up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (termios->c_iflag & INPCK) up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= UART_LSR_BI; /* diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index f3dfa19..6b0adfb 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -537,11 +537,15 @@ static void s3c24xx_serial_pm(struct uart_port *port, unsigned int level, unsigned int old) { struct s3c24xx_uart_port *ourport = to_ourport(port); + int timeout = 10000; ourport->pm_level = level; switch (level) { case 3: + while (--timeout && !s3c24xx_serial_txempty_nofifo(port)) + udelay(100); + if (!IS_ERR(ourport->baudclk)) clk_disable_unprepare(ourport->baudclk); diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index a7cdec2..771f361 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -596,7 +596,7 @@ static void sbd_set_termios(struct uart_port *uport, struct ktermios *termios, if (termios->c_iflag & INPCK) uport->read_status_mask |= M_DUART_FRM_ERR | M_DUART_PARITY_ERR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) uport->read_status_mask |= M_DUART_RCVD_BRK; uport->ignore_status_mask = 0; diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c index 49e9bbf..0ea128a 100644 --- a/drivers/tty/serial/sccnxp.c +++ b/drivers/tty/serial/sccnxp.c @@ -667,7 +667,7 @@ static void sccnxp_set_termios(struct uart_port *port, port->read_status_mask = SR_OVR; if (termios->c_iflag & INPCK) port->read_status_mask |= SR_PE | SR_FE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= SR_BRK; /* Set status ignore mask */ diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 0f02351..6015b6c 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -235,6 +235,9 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) /* * Turn off DTR and RTS early. */ + if (uart_console(uport) && tty) + uport->cons->cflag = tty->termios.c_cflag; + if (!tty || (tty->termios.c_cflag & HUPCL)) uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); @@ -350,7 +353,7 @@ uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, * The spd_hi, spd_vhi, spd_shi, spd_warp kludge... * Die! Die! Die! */ - if (baud == 38400) + if (try == 0 && baud == 38400) baud = altbaud; /* diff --git a/drivers/tty/serial/serial_ks8695.c b/drivers/tty/serial/serial_ks8695.c index e1caa99..5c79bda 100644 --- a/drivers/tty/serial/serial_ks8695.c +++ b/drivers/tty/serial/serial_ks8695.c @@ -437,7 +437,7 @@ static void ks8695uart_set_termios(struct uart_port *port, struct ktermios *term port->read_status_mask = URLS_URROE; if (termios->c_iflag & INPCK) port->read_status_mask |= (URLS_URFE | URLS_URPE); - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= URLS_URBI; /* diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index 440a962..ce13f42 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -702,7 +702,7 @@ serial_txx9_set_termios(struct uart_port *port, struct ktermios *termios, TXX9_SIDISR_TDIS | TXX9_SIDISR_RDIS; if (termios->c_iflag & INPCK) up->port.read_status_mask |= TXX9_SIDISR_UFER | TXX9_SIDISR_UPER; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= TXX9_SIDISR_UBRK; /* diff --git a/drivers/tty/serial/sirfsoc_uart.c b/drivers/tty/serial/sirfsoc_uart.c index a72c33f..25aecf0 100644 --- a/drivers/tty/serial/sirfsoc_uart.c +++ b/drivers/tty/serial/sirfsoc_uart.c @@ -359,9 +359,11 @@ static irqreturn_t sirfsoc_uart_usp_cts_handler(int irq, void *dev_id) { struct sirfsoc_uart_port *sirfport = (struct sirfsoc_uart_port *)dev_id; struct uart_port *port = &sirfport->port; + spin_lock(&port->lock); if (gpio_is_valid(sirfport->cts_gpio) && sirfport->ms_enabled) uart_handle_cts_change(port, !gpio_get_value(sirfport->cts_gpio)); + spin_unlock(&port->lock); return IRQ_HANDLED; } @@ -429,10 +431,6 @@ sirfsoc_uart_pio_rx_chars(struct uart_port *port, unsigned int max_rx_count) sirfport->rx_io_count += rx_count; port->icount.rx += rx_count; - spin_unlock(&port->lock); - tty_flip_buffer_push(&port->state->port); - spin_lock(&port->lock); - return rx_count; } @@ -466,6 +464,7 @@ static void sirfsoc_uart_tx_dma_complete_callback(void *param) struct circ_buf *xmit = &port->state->xmit; unsigned long flags; + spin_lock_irqsave(&port->lock, flags); xmit->tail = (xmit->tail + sirfport->transfer_size) & (UART_XMIT_SIZE - 1); port->icount.tx += sirfport->transfer_size; @@ -474,10 +473,9 @@ static void sirfsoc_uart_tx_dma_complete_callback(void *param) if (sirfport->tx_dma_addr) dma_unmap_single(port->dev, sirfport->tx_dma_addr, sirfport->transfer_size, DMA_TO_DEVICE); - spin_lock_irqsave(&sirfport->tx_lock, flags); sirfport->tx_dma_state = TX_DMA_IDLE; sirfsoc_uart_tx_with_dma(sirfport); - spin_unlock_irqrestore(&sirfport->tx_lock, flags); + spin_unlock_irqrestore(&port->lock, flags); } static void sirfsoc_uart_insert_rx_buf_to_tty( @@ -490,7 +488,6 @@ static void sirfsoc_uart_insert_rx_buf_to_tty( inserted = tty_insert_flip_string(tport, sirfport->rx_dma_items[sirfport->rx_completed].xmit.buf, count); port->icount.rx += inserted; - tty_flip_buffer_push(tport); } static void sirfsoc_rx_submit_one_dma_desc(struct uart_port *port, int index) @@ -525,7 +522,7 @@ static void sirfsoc_rx_tmo_process_tl(unsigned long param) unsigned int count; unsigned long flags; - spin_lock_irqsave(&sirfport->rx_lock, flags); + spin_lock_irqsave(&port->lock, flags); while (sirfport->rx_completed != sirfport->rx_issued) { sirfsoc_uart_insert_rx_buf_to_tty(sirfport, SIRFSOC_RX_DMA_BUF_SIZE); @@ -540,12 +537,8 @@ static void sirfsoc_rx_tmo_process_tl(unsigned long param) wr_regl(port, ureg->sirfsoc_rx_dma_io_ctrl, rd_regl(port, ureg->sirfsoc_rx_dma_io_ctrl) | SIRFUART_IO_MODE); - spin_unlock_irqrestore(&sirfport->rx_lock, flags); - spin_lock(&port->lock); sirfsoc_uart_pio_rx_chars(port, 4 - sirfport->rx_io_count); - spin_unlock(&port->lock); if (sirfport->rx_io_count == 4) { - spin_lock_irqsave(&sirfport->rx_lock, flags); sirfport->rx_io_count = 0; wr_regl(port, ureg->sirfsoc_int_st_reg, uint_st->sirfsoc_rx_done); @@ -556,11 +549,8 @@ static void sirfsoc_rx_tmo_process_tl(unsigned long param) else wr_regl(port, SIRFUART_INT_EN_CLR, uint_en->sirfsoc_rx_done_en); - spin_unlock_irqrestore(&sirfport->rx_lock, flags); - sirfsoc_uart_start_next_rx_dma(port); } else { - spin_lock_irqsave(&sirfport->rx_lock, flags); wr_regl(port, ureg->sirfsoc_int_st_reg, uint_st->sirfsoc_rx_done); if (!sirfport->is_marco) @@ -570,8 +560,9 @@ static void sirfsoc_rx_tmo_process_tl(unsigned long param) else wr_regl(port, ureg->sirfsoc_int_en_reg, uint_en->sirfsoc_rx_done_en); - spin_unlock_irqrestore(&sirfport->rx_lock, flags); } + spin_unlock_irqrestore(&port->lock, flags); + tty_flip_buffer_push(&port->state->port); } static void sirfsoc_uart_handle_rx_tmo(struct sirfsoc_uart_port *sirfport) @@ -580,8 +571,6 @@ static void sirfsoc_uart_handle_rx_tmo(struct sirfsoc_uart_port *sirfport) struct sirfsoc_register *ureg = &sirfport->uart_reg->uart_reg; struct sirfsoc_int_en *uint_en = &sirfport->uart_reg->uart_int_en; struct dma_tx_state tx_state; - spin_lock(&sirfport->rx_lock); - dmaengine_tx_status(sirfport->rx_dma_chan, sirfport->rx_dma_items[sirfport->rx_issued].cookie, &tx_state); dmaengine_terminate_all(sirfport->rx_dma_chan); @@ -594,7 +583,6 @@ static void sirfsoc_uart_handle_rx_tmo(struct sirfsoc_uart_port *sirfport) else wr_regl(port, SIRFUART_INT_EN_CLR, uint_en->sirfsoc_rx_timeout_en); - spin_unlock(&sirfport->rx_lock); tasklet_schedule(&sirfport->rx_tmo_process_tasklet); } @@ -658,7 +646,6 @@ static irqreturn_t sirfsoc_uart_isr(int irq, void *dev_id) intr_status &= port->read_status_mask; uart_insert_char(port, intr_status, uint_en->sirfsoc_rx_oflow_en, 0, flag); - tty_flip_buffer_push(&state->port); } recv_char: if ((sirfport->uart_reg->uart_type == SIRF_REAL_UART) && @@ -683,6 +670,9 @@ recv_char: sirfsoc_uart_pio_rx_chars(port, SIRFSOC_UART_IO_RX_MAX_CNT); } + spin_unlock(&port->lock); + tty_flip_buffer_push(&state->port); + spin_lock(&port->lock); if (intr_status & uint_st->sirfsoc_txfifo_empty) { if (IS_DMA_CHAN_VALID(sirfport->tx_dma_no)) sirfsoc_uart_tx_with_dma(sirfport); @@ -701,6 +691,7 @@ recv_char: } } spin_unlock(&port->lock); + return IRQ_HANDLED; } @@ -709,24 +700,27 @@ static void sirfsoc_uart_rx_dma_complete_tl(unsigned long param) struct sirfsoc_uart_port *sirfport = (struct sirfsoc_uart_port *)param; struct uart_port *port = &sirfport->port; unsigned long flags; - spin_lock_irqsave(&sirfport->rx_lock, flags); + spin_lock_irqsave(&port->lock, flags); while (sirfport->rx_completed != sirfport->rx_issued) { sirfsoc_uart_insert_rx_buf_to_tty(sirfport, SIRFSOC_RX_DMA_BUF_SIZE); sirfsoc_rx_submit_one_dma_desc(port, sirfport->rx_completed++); sirfport->rx_completed %= SIRFSOC_RX_LOOP_BUF_CNT; } - spin_unlock_irqrestore(&sirfport->rx_lock, flags); + spin_unlock_irqrestore(&port->lock, flags); + tty_flip_buffer_push(&port->state->port); } static void sirfsoc_uart_rx_dma_complete_callback(void *param) { struct sirfsoc_uart_port *sirfport = (struct sirfsoc_uart_port *)param; - spin_lock(&sirfport->rx_lock); + unsigned long flags; + + spin_lock_irqsave(&sirfport->port.lock, flags); sirfport->rx_issued++; sirfport->rx_issued %= SIRFSOC_RX_LOOP_BUF_CNT; - spin_unlock(&sirfport->rx_lock); tasklet_schedule(&sirfport->rx_dma_complete_tasklet); + spin_unlock_irqrestore(&sirfport->port.lock, flags); } /* submit rx dma task into dmaengine */ @@ -735,18 +729,14 @@ static void sirfsoc_uart_start_next_rx_dma(struct uart_port *port) struct sirfsoc_uart_port *sirfport = to_sirfport(port); struct sirfsoc_register *ureg = &sirfport->uart_reg->uart_reg; struct sirfsoc_int_en *uint_en = &sirfport->uart_reg->uart_int_en; - unsigned long flags; int i; - spin_lock_irqsave(&sirfport->rx_lock, flags); sirfport->rx_io_count = 0; wr_regl(port, ureg->sirfsoc_rx_dma_io_ctrl, rd_regl(port, ureg->sirfsoc_rx_dma_io_ctrl) & ~SIRFUART_IO_MODE); - spin_unlock_irqrestore(&sirfport->rx_lock, flags); for (i = 0; i < SIRFSOC_RX_LOOP_BUF_CNT; i++) sirfsoc_rx_submit_one_dma_desc(port, i); sirfport->rx_completed = sirfport->rx_issued = 0; - spin_lock_irqsave(&sirfport->rx_lock, flags); if (!sirfport->is_marco) wr_regl(port, ureg->sirfsoc_int_en_reg, rd_regl(port, ureg->sirfsoc_int_en_reg) | @@ -754,7 +744,6 @@ static void sirfsoc_uart_start_next_rx_dma(struct uart_port *port) else wr_regl(port, ureg->sirfsoc_int_en_reg, SIRFUART_RX_DMA_INT_EN(port, uint_en)); - spin_unlock_irqrestore(&sirfport->rx_lock, flags); } static void sirfsoc_uart_start_rx(struct uart_port *port) @@ -897,7 +886,7 @@ static void sirfsoc_uart_set_termios(struct uart_port *port, if (termios->c_iflag & INPCK) port->read_status_mask |= uint_en->sirfsoc_frm_err_en; } - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= uint_en->sirfsoc_rxd_brk_en; if (sirfport->uart_reg->uart_type == SIRF_REAL_UART) { if (termios->c_iflag & IGNPAR) @@ -1455,8 +1444,6 @@ usp_no_flow_control: ret = -EFAULT; goto err; } - spin_lock_init(&sirfport->rx_lock); - spin_lock_init(&sirfport->tx_lock); tasklet_init(&sirfport->rx_dma_complete_tasklet, sirfsoc_uart_rx_dma_complete_tl, (unsigned long)sirfport); tasklet_init(&sirfport->rx_tmo_process_tasklet, diff --git a/drivers/tty/serial/sirfsoc_uart.h b/drivers/tty/serial/sirfsoc_uart.h index fb8d0a0..38cb159 100644 --- a/drivers/tty/serial/sirfsoc_uart.h +++ b/drivers/tty/serial/sirfsoc_uart.h @@ -438,8 +438,6 @@ struct sirfsoc_uart_port { struct dma_chan *tx_dma_chan; dma_addr_t tx_dma_addr; struct dma_async_tx_descriptor *tx_dma_desc; - spinlock_t rx_lock; - spinlock_t tx_lock; struct tasklet_struct rx_dma_complete_tasklet; struct tasklet_struct rx_tmo_process_tasklet; unsigned int rx_io_count; diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index 21e6e84..0ee3175 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -547,7 +547,7 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, ascport->port.read_status_mask = ASC_RXBUF_DUMMY_OE; if (termios->c_iflag & INPCK) ascport->port.read_status_mask |= ASC_RXBUF_FE | ASC_RXBUF_PE; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) ascport->port.read_status_mask |= ASC_RXBUF_DUMMY_BE; /* diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c index 1bf2774..d1773b0 100644 --- a/drivers/tty/serial/sunsab.c +++ b/drivers/tty/serial/sunsab.c @@ -157,6 +157,15 @@ receive_chars(struct uart_sunsab_port *up, (up->port.line == up->port.cons->index)) saw_console_brk = 1; + if (count == 0) { + if (unlikely(stat->sreg.isr1 & SAB82532_ISR1_BRK)) { + stat->sreg.isr0 &= ~(SAB82532_ISR0_PERR | + SAB82532_ISR0_FERR); + up->port.icount.brk++; + uart_handle_break(&up->port); + } + } + for (i = 0; i < count; i++) { unsigned char ch = buf[i], flag; @@ -719,7 +728,7 @@ static void sunsab_convert_to_sab(struct uart_sunsab_port *up, unsigned int cfla if (iflag & INPCK) up->port.read_status_mask |= (SAB82532_ISR0_PERR | SAB82532_ISR0_FERR); - if (iflag & (BRKINT | PARMRK)) + if (iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= (SAB82532_ISR1_BRK << 8); /* diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index d88fb63..3079c75 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -834,7 +834,7 @@ sunsu_change_speed(struct uart_port *port, unsigned int cflag, up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; if (iflag & INPCK) up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (iflag & (BRKINT | PARMRK)) + if (iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= UART_LSR_BI; /* diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c index 3103c3b..e7e0aad 100644 --- a/drivers/tty/serial/sunzilog.c +++ b/drivers/tty/serial/sunzilog.c @@ -915,7 +915,7 @@ sunzilog_convert_to_zs(struct uart_sunzilog_port *up, unsigned int cflag, up->port.read_status_mask = Rx_OVR; if (iflag & INPCK) up->port.read_status_mask |= CRC_ERR | PAR_ERR; - if (iflag & (BRKINT | PARMRK)) + if (iflag & (IGNBRK | BRKINT | PARMRK)) up->port.read_status_mask |= BRK_ABRT; up->port.ignore_status_mask = 0; diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c index 2c9a87c..8e981ec 100644 --- a/drivers/tty/serial/ucc_uart.c +++ b/drivers/tty/serial/ucc_uart.c @@ -934,7 +934,7 @@ static void qe_uart_set_termios(struct uart_port *port, port->read_status_mask = BD_SC_EMPTY | BD_SC_OV; if (termios->c_iflag & INPCK) port->read_status_mask |= BD_SC_FR | BD_SC_PR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= BD_SC_BR; /* diff --git a/drivers/tty/serial/vr41xx_siu.c b/drivers/tty/serial/vr41xx_siu.c index a63c14b..db0c8a4 100644 --- a/drivers/tty/serial/vr41xx_siu.c +++ b/drivers/tty/serial/vr41xx_siu.c @@ -559,7 +559,7 @@ static void siu_set_termios(struct uart_port *port, struct ktermios *new, port->read_status_mask = UART_LSR_THRE | UART_LSR_OE | UART_LSR_DR; if (c_iflag & INPCK) port->read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (c_iflag & (BRKINT | PARMRK)) + if (c_iflag & (IGNBRK | BRKINT | PARMRK)) port->read_status_mask |= UART_LSR_BI; port->ignore_status_mask = 0; diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c index 6a16987..2b65bb7 100644 --- a/drivers/tty/serial/zs.c +++ b/drivers/tty/serial/zs.c @@ -923,7 +923,7 @@ static void zs_set_termios(struct uart_port *uport, struct ktermios *termios, uport->read_status_mask = Rx_OVR; if (termios->c_iflag & INPCK) uport->read_status_mask |= FRM_ERR | PAR_ERR; - if (termios->c_iflag & (BRKINT | PARMRK)) + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) uport->read_status_mask |= Rx_BRK; uport->ignore_status_mask = 0; diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 2b52d80..d9d216e 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -61,6 +61,7 @@ void tty_buffer_lock_exclusive(struct tty_port *port) atomic_inc(&buf->priority); mutex_lock(&buf->lock); } +EXPORT_SYMBOL_GPL(tty_buffer_lock_exclusive); void tty_buffer_unlock_exclusive(struct tty_port *port) { @@ -74,6 +75,7 @@ void tty_buffer_unlock_exclusive(struct tty_port *port) if (restart) queue_work(system_unbound_wq, &buf->work); } +EXPORT_SYMBOL_GPL(tty_buffer_unlock_exclusive); /** * tty_buffer_space_avail - return unused buffer space @@ -248,7 +250,11 @@ int tty_buffer_request_room(struct tty_port *port, size_t size) if ((n = tty_buffer_alloc(port, size)) != NULL) { buf->tail = n; b->commit = b->used; - smp_mb(); + /* paired w/ barrier in flush_to_ldisc(); ensures the + * latest commit value can be read before the head is + * advanced to the next buffer + */ + smp_wmb(); b->next = n; } else size = left; @@ -449,17 +455,24 @@ static void flush_to_ldisc(struct work_struct *work) while (1) { struct tty_buffer *head = buf->head; + struct tty_buffer *next; int count; /* Ldisc or user is trying to gain exclusive access */ if (atomic_read(&buf->priority)) break; + next = head->next; + /* paired w/ barrier in __tty_buffer_request_room(); + * ensures commit value read is not stale if the head + * is advancing to the next buffer + */ + smp_rmb(); count = head->commit - head->read; if (!count) { - if (head->next == NULL) + if (next == NULL) break; - buf->head = head->next; + buf->head = next; tty_buffer_free(port, head); continue; } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index d3448a9..25d0741 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1701,6 +1701,7 @@ int tty_release(struct inode *inode, struct file *filp) int pty_master, tty_closing, o_tty_closing, do_sleep; int idx; char buf[64]; + long timeout = 0; if (tty_paranoia_check(tty, inode, __func__)) return 0; @@ -1785,7 +1786,11 @@ int tty_release(struct inode *inode, struct file *filp) __func__, tty_name(tty, buf)); tty_unlock_pair(tty, o_tty); mutex_unlock(&tty_mutex); - schedule(); + schedule_timeout_killable(timeout); + if (timeout < 120 * HZ) + timeout = 2 * timeout + 1; + else + timeout = MAX_SCHEDULE_TIMEOUT; } /* diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c index 2d51d85..ca1123d 100644 --- a/drivers/usb/chipidea/ci_hdrc_msm.c +++ b/drivers/usb/chipidea/ci_hdrc_msm.c @@ -20,13 +20,13 @@ static void ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event) { struct device *dev = ci->gadget.dev.parent; - int val; switch (event) { case CI_HDRC_CONTROLLER_RESET_EVENT: dev_dbg(dev, "CI_HDRC_CONTROLLER_RESET_EVENT received\n"); writel(0, USB_AHBBURST); writel(0, USB_AHBMODE); + usb_phy_init(ci->transceiver); break; case CI_HDRC_CONTROLLER_STOPPED_EVENT: dev_dbg(dev, "CI_HDRC_CONTROLLER_STOPPED_EVENT received\n"); @@ -34,10 +34,7 @@ static void ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event) * Put the transceiver in non-driving mode. Otherwise host * may not detect soft-disconnection. */ - val = usb_phy_io_read(ci->transceiver, ULPI_FUNC_CTRL); - val &= ~ULPI_FUNC_CTRL_OPMODE_MASK; - val |= ULPI_FUNC_CTRL_OPMODE_NONDRIVING; - usb_phy_io_write(ci->transceiver, val, ULPI_FUNC_CTRL); + usb_phy_notify_disconnect(ci->transceiver, USB_SPEED_UNKNOWN); break; default: dev_dbg(dev, "unknown ci_hdrc event\n"); diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 938426a..455e4e6 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1178,8 +1178,8 @@ static int ep_enable(struct usb_ep *ep, if (hwep->type == USB_ENDPOINT_XFER_CONTROL) cap |= QH_IOS; - if (hwep->num) - cap |= QH_ZLT; + + cap |= QH_ZLT; cap |= (hwep->ep.maxpacket << __ffs(QH_MAX_PKT)) & QH_MAX_PKT; /* * For ISO-TX, we set mult at QH as the largest value, and use @@ -1325,6 +1325,7 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req) struct ci_hw_ep *hwep = container_of(ep, struct ci_hw_ep, ep); struct ci_hw_req *hwreq = container_of(req, struct ci_hw_req, req); unsigned long flags; + struct td_node *node, *tmpnode; if (ep == NULL || req == NULL || hwreq->req.status != -EALREADY || hwep->ep.desc == NULL || list_empty(&hwreq->queue) || @@ -1335,6 +1336,12 @@ static int ep_dequeue(struct usb_ep *ep, struct usb_request *req) hw_ep_flush(hwep->ci, hwep->num, hwep->dir); + list_for_each_entry_safe(node, tmpnode, &hwreq->tds, td) { + dma_pool_free(hwep->td_pool, node->ptr, node->dma); + list_del(&node->td); + kfree(node); + } + /* pop request */ list_del_init(&hwreq->queue); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e840431..2574b24 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -122,13 +122,23 @@ static void acm_release_minor(struct acm *acm) static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len) { - int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), + int retval; + + retval = usb_autopm_get_interface(acm->control); + if (retval) + return retval; + + retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), request, USB_RT_ACM, value, acm->control->altsetting[0].desc.bInterfaceNumber, buf, len, 5000); + dev_dbg(&acm->control->dev, "%s - rq 0x%02x, val %#x, len %#x, result %d\n", __func__, request, value, len, retval); + + usb_autopm_put_interface(acm->control); + return retval < 0 ? retval : 0; } @@ -484,6 +494,7 @@ static int acm_port_activate(struct tty_port *port, struct tty_struct *tty) { struct acm *acm = container_of(port, struct acm, port); int retval = -ENODEV; + int i; dev_dbg(&acm->control->dev, "%s\n", __func__); @@ -532,6 +543,8 @@ static int acm_port_activate(struct tty_port *port, struct tty_struct *tty) return 0; error_submit_read_urbs: + for (i = 0; i < acm->rx_buflimit; i++) + usb_kill_urb(acm->read_urbs[i]); acm->ctrlout = 0; acm_set_control(acm, acm->ctrlout); error_set_control: @@ -559,21 +572,35 @@ static void acm_port_destruct(struct tty_port *port) static void acm_port_shutdown(struct tty_port *port) { struct acm *acm = container_of(port, struct acm, port); + struct urb *urb; + struct acm_wb *wb; int i; + int pm_err; dev_dbg(&acm->control->dev, "%s\n", __func__); mutex_lock(&acm->mutex); if (!acm->disconnected) { - usb_autopm_get_interface(acm->control); + pm_err = usb_autopm_get_interface(acm->control); acm_set_control(acm, acm->ctrlout = 0); + + for (;;) { + urb = usb_get_from_anchor(&acm->delayed); + if (!urb) + break; + wb = urb->context; + wb->use = 0; + usb_autopm_put_interface_async(acm->control); + } + usb_kill_urb(acm->ctrlurb); for (i = 0; i < ACM_NW; i++) usb_kill_urb(acm->wb[i].urb); for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->read_urbs[i]); acm->control->needs_remote_wakeup = 0; - usb_autopm_put_interface(acm->control); + if (!pm_err) + usb_autopm_put_interface(acm->control); } mutex_unlock(&acm->mutex); } @@ -632,14 +659,17 @@ static int acm_tty_write(struct tty_struct *tty, memcpy(wb->buf, buf, count); wb->len = count; - usb_autopm_get_interface_async(acm->control); + stat = usb_autopm_get_interface_async(acm->control); + if (stat) { + wb->use = 0; + spin_unlock_irqrestore(&acm->write_lock, flags); + return stat; + } + if (acm->susp_count) { - if (!acm->delayed_wb) - acm->delayed_wb = wb; - else - usb_autopm_put_interface_async(acm->control); + usb_anchor_urb(wb->urb, &acm->delayed); spin_unlock_irqrestore(&acm->write_lock, flags); - return count; /* A white lie */ + return count; } usb_mark_last_busy(acm->dev); @@ -845,11 +875,12 @@ static void acm_tty_set_termios(struct tty_struct *tty, /* FIXME: Needs to clear unsupported bits in the termios */ acm->clocal = ((termios->c_cflag & CLOCAL) != 0); - if (!newline.dwDTERate) { + if (C_BAUD(tty) == B0) { newline.dwDTERate = acm->line.dwDTERate; newctrl &= ~ACM_CTRL_DTR; - } else + } else if (termios_old && (termios_old->c_cflag & CBAUD) == B0) { newctrl |= ACM_CTRL_DTR; + } if (newctrl != acm->ctrlout) acm_set_control(acm, acm->ctrlout = newctrl); @@ -1048,10 +1079,11 @@ next_desc: } else { control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0)); - if (!control_interface || !data_interface) { - dev_dbg(&intf->dev, "no interfaces\n"); - return -ENODEV; - } + } + + if (!control_interface || !data_interface) { + dev_dbg(&intf->dev, "no interfaces\n"); + return -ENODEV; } if (data_interface_num != call_interface_num) @@ -1176,6 +1208,7 @@ made_compressed_probe: acm->bInterval = epread->bInterval; tty_port_init(&acm->port); acm->port.ops = &acm_port_ops; + init_usb_anchor(&acm->delayed); buf = usb_alloc_coherent(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma); if (!buf) { @@ -1325,6 +1358,7 @@ alloc_fail8: &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, &dev_attr_iCountryCodeRelDate); + kfree(acm->country_codes); } device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities); alloc_fail7: @@ -1420,18 +1454,15 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) struct acm *acm = usb_get_intfdata(intf); int cnt; + spin_lock_irq(&acm->read_lock); + spin_lock(&acm->write_lock); if (PMSG_IS_AUTO(message)) { - int b; - - spin_lock_irq(&acm->write_lock); - b = acm->transmitting; - spin_unlock_irq(&acm->write_lock); - if (b) + if (acm->transmitting) { + spin_unlock(&acm->write_lock); + spin_unlock_irq(&acm->read_lock); return -EBUSY; + } } - - spin_lock_irq(&acm->read_lock); - spin_lock(&acm->write_lock); cnt = acm->susp_count++; spin_unlock(&acm->write_lock); spin_unlock_irq(&acm->read_lock); @@ -1439,8 +1470,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) if (cnt) return 0; - if (test_bit(ASYNCB_INITIALIZED, &acm->port.flags)) - stop_data_traffic(acm); + stop_data_traffic(acm); return 0; } @@ -1448,29 +1478,24 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) static int acm_resume(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); - struct acm_wb *wb; + struct urb *urb; int rv = 0; - int cnt; spin_lock_irq(&acm->read_lock); - acm->susp_count -= 1; - cnt = acm->susp_count; - spin_unlock_irq(&acm->read_lock); + spin_lock(&acm->write_lock); - if (cnt) - return 0; + if (--acm->susp_count) + goto out; if (test_bit(ASYNCB_INITIALIZED, &acm->port.flags)) { - rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO); - - spin_lock_irq(&acm->write_lock); - if (acm->delayed_wb) { - wb = acm->delayed_wb; - acm->delayed_wb = NULL; - spin_unlock_irq(&acm->write_lock); - acm_start_wb(acm, wb); - } else { - spin_unlock_irq(&acm->write_lock); + rv = usb_submit_urb(acm->ctrlurb, GFP_ATOMIC); + + for (;;) { + urb = usb_get_from_anchor(&acm->delayed); + if (!urb) + break; + + acm_start_wb(acm, urb->context); } /* @@ -1478,12 +1503,14 @@ static int acm_resume(struct usb_interface *intf) * do the write path at all cost */ if (rv < 0) - goto err_out; + goto out; - rv = acm_submit_read_urbs(acm, GFP_NOIO); + rv = acm_submit_read_urbs(acm, GFP_ATOMIC); } +out: + spin_unlock(&acm->write_lock); + spin_unlock_irq(&acm->read_lock); -err_out: return rv; } @@ -1556,17 +1583,32 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, /* Motorola H24 HSPA module: */ { USB_DEVICE(0x22b8, 0x2d91) }, /* modem */ - { USB_DEVICE(0x22b8, 0x2d92) }, /* modem + diagnostics */ - { USB_DEVICE(0x22b8, 0x2d93) }, /* modem + AT port */ - { USB_DEVICE(0x22b8, 0x2d95) }, /* modem + AT port + diagnostics */ - { USB_DEVICE(0x22b8, 0x2d96) }, /* modem + NMEA */ - { USB_DEVICE(0x22b8, 0x2d97) }, /* modem + diagnostics + NMEA */ - { USB_DEVICE(0x22b8, 0x2d99) }, /* modem + AT port + NMEA */ - { USB_DEVICE(0x22b8, 0x2d9a) }, /* modem + AT port + diagnostics + NMEA */ + { USB_DEVICE(0x22b8, 0x2d92), /* modem + diagnostics */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d93), /* modem + AT port */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d95), /* modem + AT port + diagnostics */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d96), /* modem + NMEA */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d97), /* modem + diagnostics + NMEA */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d99), /* modem + AT port + NMEA */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, + { USB_DEVICE(0x22b8, 0x2d9a), /* modem + AT port + diagnostics + NMEA */ + .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ + }, { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */ .driver_info = NO_UNION_NORMAL, /* union descriptor misplaced on diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 0f76e4a..1683ac1 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -117,7 +117,7 @@ struct acm { unsigned int throttled:1; /* actually throttled */ unsigned int throttle_req:1; /* throttle requested */ u8 bInterval; - struct acm_wb *delayed_wb; /* write queued for a device about to be woken */ + struct usb_anchor delayed; /* writes queued for a device about to be woken */ }; #define CDC_DATA_INTERFACE_TYPE 0x0a diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 6524383..98cb096 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -201,6 +201,17 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, if (n == 0) n = 9; /* 32 ms = 2^(9-1) uframes */ j = 16; + + /* + * Adjust bInterval for quirked devices. + * This quirk fixes bIntervals reported in + * linear microframes. + */ + if (to_usb_device(ddev)->quirks & + USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL) { + n = clamp(fls(d->bInterval), i, j); + i = j = n; + } break; default: /* USB_SPEED_FULL or _LOW */ /* For low-speed, 10 ms is the official minimum. diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 71dc5d7..31ffd84 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1413,7 +1413,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, u = (is_in ? URB_DIR_IN : URB_DIR_OUT); if (uurb->flags & USBDEVFS_URB_ISO_ASAP) u |= URB_ISO_ASAP; - if (uurb->flags & USBDEVFS_URB_SHORT_NOT_OK) + if (uurb->flags & USBDEVFS_URB_SHORT_NOT_OK && is_in) u |= URB_SHORT_NOT_OK; if (uurb->flags & USBDEVFS_URB_NO_FSBR) u |= URB_NO_FSBR; diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 689433c..bfddeb3 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -959,8 +959,7 @@ EXPORT_SYMBOL_GPL(usb_deregister); * it doesn't support pre_reset/post_reset/reset_resume or * because it doesn't support suspend/resume. * - * The caller must hold @intf's device's lock, but not its pm_mutex - * and not @intf->dev.sem. + * The caller must hold @intf's device's lock, but not @intf's lock. */ void usb_forced_unbind_intf(struct usb_interface *intf) { @@ -973,16 +972,37 @@ void usb_forced_unbind_intf(struct usb_interface *intf) intf->needs_binding = 1; } +/* + * Unbind drivers for @udev's marked interfaces. These interfaces have + * the needs_binding flag set, for example by usb_resume_interface(). + * + * The caller must hold @udev's device lock. + */ +static void unbind_marked_interfaces(struct usb_device *udev) +{ + struct usb_host_config *config; + int i; + struct usb_interface *intf; + + config = udev->actconfig; + if (config) { + for (i = 0; i < config->desc.bNumInterfaces; ++i) { + intf = config->interface[i]; + if (intf->dev.driver && intf->needs_binding) + usb_forced_unbind_intf(intf); + } + } +} + /* Delayed forced unbinding of a USB interface driver and scan * for rebinding. * - * The caller must hold @intf's device's lock, but not its pm_mutex - * and not @intf->dev.sem. + * The caller must hold @intf's device's lock, but not @intf's lock. * * Note: Rebinds will be skipped if a system sleep transition is in * progress and the PM "complete" callback hasn't occurred yet. */ -void usb_rebind_intf(struct usb_interface *intf) +static void usb_rebind_intf(struct usb_interface *intf) { int rc; @@ -999,68 +1019,66 @@ void usb_rebind_intf(struct usb_interface *intf) } } -#ifdef CONFIG_PM - -/* Unbind drivers for @udev's interfaces that don't support suspend/resume - * There is no check for reset_resume here because it can be determined - * only during resume whether reset_resume is needed. +/* + * Rebind drivers to @udev's marked interfaces. These interfaces have + * the needs_binding flag set. * * The caller must hold @udev's device lock. */ -static void unbind_no_pm_drivers_interfaces(struct usb_device *udev) +static void rebind_marked_interfaces(struct usb_device *udev) { struct usb_host_config *config; int i; struct usb_interface *intf; - struct usb_driver *drv; config = udev->actconfig; if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { intf = config->interface[i]; - - if (intf->dev.driver) { - drv = to_usb_driver(intf->dev.driver); - if (!drv->suspend || !drv->resume) - usb_forced_unbind_intf(intf); - } + if (intf->needs_binding) + usb_rebind_intf(intf); } } } -/* Unbind drivers for @udev's interfaces that failed to support reset-resume. - * These interfaces have the needs_binding flag set by usb_resume_interface(). +/* + * Unbind all of @udev's marked interfaces and then rebind all of them. + * This ordering is necessary because some drivers claim several interfaces + * when they are first probed. * * The caller must hold @udev's device lock. */ -static void unbind_no_reset_resume_drivers_interfaces(struct usb_device *udev) +void usb_unbind_and_rebind_marked_interfaces(struct usb_device *udev) { - struct usb_host_config *config; - int i; - struct usb_interface *intf; - - config = udev->actconfig; - if (config) { - for (i = 0; i < config->desc.bNumInterfaces; ++i) { - intf = config->interface[i]; - if (intf->dev.driver && intf->needs_binding) - usb_forced_unbind_intf(intf); - } - } + unbind_marked_interfaces(udev); + rebind_marked_interfaces(udev); } -static void do_rebind_interfaces(struct usb_device *udev) +#ifdef CONFIG_PM + +/* Unbind drivers for @udev's interfaces that don't support suspend/resume + * There is no check for reset_resume here because it can be determined + * only during resume whether reset_resume is needed. + * + * The caller must hold @udev's device lock. + */ +static void unbind_no_pm_drivers_interfaces(struct usb_device *udev) { struct usb_host_config *config; int i; struct usb_interface *intf; + struct usb_driver *drv; config = udev->actconfig; if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { intf = config->interface[i]; - if (intf->needs_binding) - usb_rebind_intf(intf); + + if (intf->dev.driver) { + drv = to_usb_driver(intf->dev.driver); + if (!drv->suspend || !drv->resume) + usb_forced_unbind_intf(intf); + } } } } @@ -1389,7 +1407,7 @@ int usb_resume_complete(struct device *dev) * whose needs_binding flag is set */ if (udev->state != USB_STATE_NOTATTACHED) - do_rebind_interfaces(udev); + rebind_marked_interfaces(udev); return 0; } @@ -1411,7 +1429,7 @@ int usb_resume(struct device *dev, pm_message_t msg) pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); - unbind_no_reset_resume_drivers_interfaces(udev); + unbind_marked_interfaces(udev); } /* Avoid PM error messages for devices disconnected while suspended @@ -1752,10 +1770,13 @@ int usb_runtime_suspend(struct device *dev) if (status == -EAGAIN || status == -EBUSY) usb_mark_last_busy(udev); - /* The PM core reacts badly unless the return code is 0, - * -EAGAIN, or -EBUSY, so always return -EBUSY on an error. + /* + * The PM core reacts badly unless the return code is 0, + * -EAGAIN, or -EBUSY, so always return -EBUSY on an error + * (except for root hubs, because they don't suspend through + * an upstream port like other USB devices). */ - if (status != 0) + if (status != 0 && udev->parent) return -EBUSY; return status; } diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index b9d3c43..5e1a179 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -75,7 +75,7 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd, PCI_SLOT(companion->devfn) != slot) continue; companion_hcd = pci_get_drvdata(companion); - if (!companion_hcd) + if (!companion_hcd || !companion_hcd->self.root_hub) continue; fn(pdev, hcd, companion, companion_hcd); } @@ -377,6 +377,8 @@ void usb_hcd_pci_shutdown(struct pci_dev *dev) if (test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags) && hcd->driver->shutdown) { hcd->driver->shutdown(hcd); + if (usb_hcd_is_primary_hcd(hcd) && hcd->irq > 0) + free_irq(hcd->irq, hcd); pci_disable_device(dev); } } diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index bf21a21..465605b 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2053,6 +2053,8 @@ int usb_alloc_streams(struct usb_interface *interface, return -EINVAL; if (dev->speed != USB_SPEED_SUPER) return -EINVAL; + if (dev->state < USB_STATE_CONFIGURED) + return -ENODEV; /* Streams only apply to bulk endpoints. */ for (i = 0; i < num_eps; i++) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 19f45e1..f37e92f 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -891,6 +891,25 @@ static int hub_usb3_port_disable(struct usb_hub *hub, int port1) if (!hub_is_superspeed(hub->hdev)) return -EINVAL; + ret = hub_port_status(hub, port1, &portstatus, &portchange); + if (ret < 0) + return ret; + + /* + * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI + * Controller [1022:7814] will have spurious result making the following + * usb 3.0 device hotplugging route to the 2.0 root hub and recognized + * as high-speed device if we set the usb 3.0 port link state to + * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we + * check the state here to avoid the bug. + */ + if ((portstatus & USB_PORT_STAT_LINK_STATE) == + USB_SS_PORT_LS_RX_DETECT) { + dev_dbg(&hub->ports[port1 - 1]->dev, + "Not disabling port; link state is RxDetect\n"); + return ret; + } + ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED); if (ret) return ret; @@ -1152,7 +1171,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Tell khubd to disconnect the device or * check for a new connection */ - if (udev || (portstatus & USB_PORT_STAT_CONNECTION)) + if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || + (portstatus & USB_PORT_STAT_OVERCURRENT)) set_bit(port1, hub->change_bits); } else if (portstatus & USB_PORT_STAT_ENABLE) { @@ -1685,11 +1705,28 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) * - Change autosuspend delay of hub can avoid unnecessary auto * suspend timer for hub, also may decrease power consumption * of USB bus. + * + * - If user has indicated to prevent autosuspend by passing + * usbcore.autosuspend = -1 then keep autosuspend disabled. + */ +#ifdef CONFIG_PM_RUNTIME + if (hdev->dev.power.autosuspend_delay >= 0) + pm_runtime_set_autosuspend_delay(&hdev->dev, 0); +#endif + + /* + * Hubs have proper suspend/resume support, except for root hubs + * where the controller driver doesn't have bus_suspend and + * bus_resume methods. */ - pm_runtime_set_autosuspend_delay(&hdev->dev, 0); + if (hdev->parent) { /* normal device */ + usb_enable_autosuspend(hdev); + } else { /* root hub */ + const struct hc_driver *drv = bus_to_hcd(hdev->bus)->driver; - /* Hubs have proper suspend/resume support. */ - usb_enable_autosuspend(hdev); + if (drv->bus_suspend && drv->bus_resume) + usb_enable_autosuspend(hdev); + } if (hdev->level == MAX_TOPO_LEVEL) { dev_err(&intf->dev, @@ -1921,8 +1958,10 @@ void usb_set_device_state(struct usb_device *udev, || new_state == USB_STATE_SUSPENDED) ; /* No change to wakeup settings */ else if (new_state == USB_STATE_CONFIGURED) - wakeup = udev->actconfig->desc.bmAttributes - & USB_CONFIG_ATT_WAKEUP; + wakeup = (udev->quirks & + USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 : + udev->actconfig->desc.bmAttributes & + USB_CONFIG_ATT_WAKEUP; else wakeup = 0; } @@ -3138,6 +3177,43 @@ static int finish_port_resume(struct usb_device *udev) } /* + * There are some SS USB devices which take longer time for link training. + * XHCI specs 4.19.4 says that when Link training is successful, port + * sets CSC bit to 1. So if SW reads port status before successful link + * training, then it will not find device to be present. + * USB Analyzer log with such buggy devices show that in some cases + * device switch on the RX termination after long delay of host enabling + * the VBUS. In few other cases it has been seen that device fails to + * negotiate link training in first attempt. It has been + * reported till now that few devices take as long as 2000 ms to train + * the link after host enabling its VBUS and termination. Following + * routine implements a 2000 ms timeout for link training. If in a case + * link trains before timeout, loop will exit earlier. + * + * FIXME: If a device was connected before suspend, but was removed + * while system was asleep, then the loop in the following routine will + * only exit at timeout. + * + * This routine should only be called when persist is enabled for a SS + * device. + */ +static int wait_for_ss_port_enable(struct usb_device *udev, + struct usb_hub *hub, int *port1, + u16 *portchange, u16 *portstatus) +{ + int status = 0, delay_ms = 0; + + while (delay_ms < 2000) { + if (status || *portstatus & USB_PORT_STAT_CONNECTION) + break; + msleep(20); + delay_ms += 20; + status = hub_port_status(hub, *port1, portstatus, portchange); + } + return status; +} + +/* * usb_port_resume - re-activate a suspended usb device's upstream port * @udev: device to re-activate, not a root hub * Context: must be able to sleep; device not locked; pm locks held @@ -3239,6 +3315,10 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg) clear_bit(port1, hub->busy_bits); + if (udev->persist_enabled && hub_is_superspeed(hub->hdev)) + status = wait_for_ss_port_enable(udev, hub, &port1, &portchange, + &portstatus); + status = check_port_resume_type(udev, hub, port1, status, portchange, portstatus); if (status == 0) @@ -4269,6 +4349,9 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1) struct usb_qualifier_descriptor *qual; int status; + if (udev->quirks & USB_QUIRK_DEVICE_QUALIFIER) + return; + qual = kmalloc (sizeof *qual, GFP_KERNEL); if (qual == NULL) return; @@ -4657,9 +4740,10 @@ static void hub_events(void) hub = list_entry(tmp, struct usb_hub, event_list); kref_get(&hub->kref); + hdev = hub->hdev; + usb_get_dev(hdev); spin_unlock_irq(&hub_event_lock); - hdev = hub->hdev; hub_dev = hub->intfdev; intf = to_usb_interface(hub_dev); dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", @@ -4872,6 +4956,7 @@ static void hub_events(void) usb_autopm_put_interface(intf); loop_disconnected: usb_unlock_device(hdev); + usb_put_dev(hdev); kref_put(&hub->kref, hub_release); } /* end while (1) */ @@ -5310,10 +5395,11 @@ int usb_reset_device(struct usb_device *udev) else if (cintf->condition == USB_INTERFACE_BOUND) rebind = 1; + if (rebind) + cintf->needs_binding = 1; } - if (ret == 0 && rebind) - usb_rebind_intf(cintf); } + usb_unbind_and_rebind_marked_interfaces(udev); } usb_autosuspend_device(udev); diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1053eb6..aa77595 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -43,6 +43,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Creative SB Audigy 2 NX */ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Microsoft Wireless Laser Mouse 6000 Receiver */ + { USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -92,6 +95,16 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x04e8, 0x6601), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Elan Touchscreen */ + { USB_DEVICE(0x04f3, 0x0089), .driver_info = + USB_QUIRK_DEVICE_QUALIFIER }, + + { USB_DEVICE(0x04f3, 0x009b), .driver_info = + USB_QUIRK_DEVICE_QUALIFIER }, + + { USB_DEVICE(0x04f3, 0x016f), .driver_info = + USB_QUIRK_DEVICE_QUALIFIER }, + /* Roland SC-8820 */ { USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -147,6 +160,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* SKYMEDI USB_DRIVE */ { USB_DEVICE(0x1516, 0x8628), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Razer - Razer Blade Keyboard */ + { USB_DEVICE(0x1532, 0x0116), .driver_info = + USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, @@ -154,6 +171,13 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* USB3503 */ + { USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME }, + + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + { } /* terminating entry must be last */ }; @@ -162,6 +186,10 @@ static const struct usb_device_id usb_interface_quirk_list[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x046d, USB_CLASS_VIDEO, 1, 0), .driver_info = USB_QUIRK_RESET_RESUME }, + /* ASUS Base Station(T100) */ + { USB_DEVICE(0x0b05, 0x17e0), .driver_info = + USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + { } /* terminating entry must be last */ }; diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 8238577..0923add7 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -55,7 +55,7 @@ extern int usb_match_one_id_intf(struct usb_device *dev, extern int usb_match_device(struct usb_device *dev, const struct usb_device_id *id); extern void usb_forced_unbind_intf(struct usb_interface *intf); -extern void usb_rebind_intf(struct usb_interface *intf); +extern void usb_unbind_and_rebind_marked_interfaces(struct usb_device *udev); extern int usb_hub_claim_port(struct usb_device *hdev, unsigned port, struct dev_state *owner); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 4ccc437..bd1a8b6 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -575,12 +575,6 @@ static int dwc3_remove(struct platform_device *pdev) { struct dwc3 *dwc = platform_get_drvdata(pdev); - usb_phy_set_suspend(dwc->usb2_phy, 1); - usb_phy_set_suspend(dwc->usb3_phy, 1); - - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); - dwc3_debugfs_exit(dwc); switch (dwc->dr_mode) { @@ -601,8 +595,15 @@ static int dwc3_remove(struct platform_device *pdev) dwc3_event_buffers_cleanup(dwc); dwc3_free_event_buffers(dwc); + + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + dwc3_core_exit(dwc); + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index f8af8d4..546e67a 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -815,15 +815,15 @@ struct dwc3_event_depevt { * 12 - VndrDevTstRcved * @reserved15_12: Reserved, not used * @event_info: Information about this event - * @reserved31_24: Reserved, not used + * @reserved31_25: Reserved, not used */ struct dwc3_event_devt { u32 one_bit:1; u32 device_event:7; u32 type:4; u32 reserved15_12:4; - u32 event_info:8; - u32 reserved31_24:8; + u32 event_info:9; + u32 reserved31_25:7; } __packed; /** @@ -856,6 +856,19 @@ union dwc3_event { struct dwc3_event_gevt gevt; }; +/** + * struct dwc3_gadget_ep_cmd_params - representation of endpoint command + * parameters + * @param2: third parameter + * @param1: second parameter + * @param0: first parameter + */ +struct dwc3_gadget_ep_cmd_params { + u32 param2; + u32 param1; + u32 param0; +}; + /* * DWC3 Features to be used as Driver Data */ @@ -881,11 +894,31 @@ static inline void dwc3_host_exit(struct dwc3 *dwc) #if IS_ENABLED(CONFIG_USB_DWC3_GADGET) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_gadget_init(struct dwc3 *dwc); void dwc3_gadget_exit(struct dwc3 *dwc); +int dwc3_gadget_set_test_mode(struct dwc3 *dwc, int mode); +int dwc3_gadget_get_link_state(struct dwc3 *dwc); +int dwc3_gadget_set_link_state(struct dwc3 *dwc, enum dwc3_link_state state); +int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, + unsigned cmd, struct dwc3_gadget_ep_cmd_params *params); +int dwc3_send_gadget_generic_command(struct dwc3 *dwc, int cmd, u32 param); #else static inline int dwc3_gadget_init(struct dwc3 *dwc) { return 0; } static inline void dwc3_gadget_exit(struct dwc3 *dwc) { } +static inline int dwc3_gadget_set_test_mode(struct dwc3 *dwc, int mode) +{ return 0; } +static inline int dwc3_gadget_get_link_state(struct dwc3 *dwc) +{ return 0; } +static inline int dwc3_gadget_set_link_state(struct dwc3 *dwc, + enum dwc3_link_state state) +{ return 0; } + +static inline int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, + unsigned cmd, struct dwc3_gadget_ep_cmd_params *params) +{ return 0; } +static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc, + int cmd, u32 param) +{ return 0; } #endif /* power management interface */ diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 7f7ea62..2a0422b 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -592,9 +592,9 @@ static int dwc3_omap_remove(struct platform_device *pdev) if (omap->extcon_id_dev.edev) extcon_unregister_interest(&omap->extcon_id_dev); dwc3_omap_disable_irqs(omap); + device_for_each_child(&pdev->dev, NULL, dwc3_omap_remove_core); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - device_for_each_child(&pdev->dev, NULL, dwc3_omap_remove_core); return 0; } diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 056da97..4a1922c 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -251,7 +251,7 @@ static void dwc3_ep0_stall_and_restart(struct dwc3 *dwc) /* stall is always issued on EP0 */ dep = dwc->eps[0]; - __dwc3_gadget_ep_set_halt(dep, 1); + __dwc3_gadget_ep_set_halt(dep, 1, false); dep->flags = DWC3_EP_ENABLED; dwc->delayed_status = false; @@ -461,7 +461,7 @@ static int dwc3_ep0_handle_feature(struct dwc3 *dwc, return -EINVAL; if (set == 0 && (dep->flags & DWC3_EP_WEDGE)) break; - ret = __dwc3_gadget_ep_set_halt(dep, set); + ret = __dwc3_gadget_ep_set_halt(dep, set, true); if (ret) return -EINVAL; break; diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 02e44fc..d19564d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -532,12 +532,11 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, if (!usb_endpoint_xfer_isoc(desc)) return 0; - memset(&trb_link, 0, sizeof(trb_link)); - /* Link TRB for ISOC. The HWO bit is never reset */ trb_st_hw = &dep->trb_pool[0]; trb_link = &dep->trb_pool[DWC3_TRB_NUM - 1]; + memset(trb_link, 0, sizeof(*trb_link)); trb_link->bpl = lower_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw)); trb_link->bph = upper_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw)); @@ -586,6 +585,10 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dwc3_remove_requests(dwc, dep); + /* make sure HW endpoint isn't stalled */ + if (dep->flags & DWC3_EP_STALL) + __dwc3_gadget_ep_set_halt(dep, 0, false); + reg = dwc3_readl(dwc->regs, DWC3_DALEPENA); reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); @@ -884,8 +887,7 @@ static void dwc3_prepare_trbs(struct dwc3_ep *dep, bool starting) if (i == (request->num_mapped_sgs - 1) || sg_is_last(s)) { - if (list_is_last(&req->list, - &dep->request_list)) + if (list_empty(&dep->request_list)) last_one = true; chain = false; } @@ -903,6 +905,9 @@ static void dwc3_prepare_trbs(struct dwc3_ep *dep, bool starting) if (last_one) break; } + + if (last_one) + break; } else { dma = req->request.dma; length = req->request.length; @@ -1182,7 +1187,7 @@ out0: return ret; } -int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value) +int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol) { struct dwc3_gadget_ep_cmd_params params; struct dwc3 *dwc = dep->dwc; @@ -1191,6 +1196,14 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value) memset(¶ms, 0x00, sizeof(params)); if (value) { + if (!protocol && ((dep->direction && dep->flags & DWC3_EP_BUSY) || + (!list_empty(&dep->req_queued) || + !list_empty(&dep->request_list)))) { + dev_dbg(dwc->dev, "%s: pending request, cannot halt\n", + dep->name); + return -EAGAIN; + } + ret = dwc3_send_gadget_ep_cmd(dwc, dep->number, DWC3_DEPCMD_SETSTALL, ¶ms); if (ret) @@ -1230,7 +1243,7 @@ static int dwc3_gadget_ep_set_halt(struct usb_ep *ep, int value) goto out; } - ret = __dwc3_gadget_ep_set_halt(dep, value); + ret = __dwc3_gadget_ep_set_halt(dep, value, false); out: spin_unlock_irqrestore(&dwc->lock, flags); @@ -1250,7 +1263,7 @@ static int dwc3_gadget_ep_set_wedge(struct usb_ep *ep) if (dep->number == 0 || dep->number == 1) return dwc3_gadget_ep0_set_halt(ep, 1); else - return dwc3_gadget_ep_set_halt(ep, 1); + return __dwc3_gadget_ep_set_halt(dep, 1, false); } /* -------------------------------------------------------------------------- */ diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index febe1aa..ac625582 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -56,12 +56,6 @@ struct dwc3; /* DEPXFERCFG parameter 0 */ #define DWC3_DEPXFERCFG_NUM_XFER_RES(n) ((n) & 0xffff) -struct dwc3_gadget_ep_cmd_params { - u32 param2; - u32 param1; - u32 param0; -}; - /* -------------------------------------------------------------------------- */ #define to_dwc3_request(r) (container_of(r, struct dwc3_request, request)) @@ -85,19 +79,13 @@ static inline void dwc3_gadget_move_request_queued(struct dwc3_request *req) void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, int status); -int dwc3_gadget_set_test_mode(struct dwc3 *dwc, int mode); -int dwc3_gadget_set_link_state(struct dwc3 *dwc, enum dwc3_link_state state); - void dwc3_ep0_interrupt(struct dwc3 *dwc, const struct dwc3_event_depevt *event); void dwc3_ep0_out_start(struct dwc3 *dwc); int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value); int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request, gfp_t gfp_flags); -int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value); -int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep, - unsigned cmd, struct dwc3_gadget_ep_cmd_params *params); -int dwc3_send_gadget_generic_command(struct dwc3 *dwc, int cmd, u32 param); +int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol); /** * dwc3_gadget_ep_get_transfer_index - Gets transfer index from HW diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 48cddf3..53c95e9 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -424,7 +424,7 @@ config USB_GOKU gadget drivers to also be dynamically linked. config USB_EG20T - tristate "Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7831) UDC" + tristate "Intel QUARK X1000/EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7831) UDC" depends on PCI help This is a USB device driver for EG20T PCH. @@ -445,6 +445,7 @@ config USB_EG20T ML7213/ML7831 is companion chip for Intel Atom E6xx series. ML7213/ML7831 is completely compatible for Intel EG20T PCH. + This driver can be used with Intel's Quark X1000 SOC platform # # LAST -- dummy/emulated controller # diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 4cc4fd6..e3101ce 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -871,12 +871,10 @@ static void clk_on(struct at91_udc *udc) return; udc->clocked = 1; - if (IS_ENABLED(CONFIG_COMMON_CLK)) { - clk_set_rate(udc->uclk, 48000000); - clk_prepare_enable(udc->uclk); - } - clk_prepare_enable(udc->iclk); - clk_prepare_enable(udc->fclk); + if (IS_ENABLED(CONFIG_COMMON_CLK)) + clk_enable(udc->uclk); + clk_enable(udc->iclk); + clk_enable(udc->fclk); } static void clk_off(struct at91_udc *udc) @@ -885,10 +883,10 @@ static void clk_off(struct at91_udc *udc) return; udc->clocked = 0; udc->gadget.speed = USB_SPEED_UNKNOWN; - clk_disable_unprepare(udc->fclk); - clk_disable_unprepare(udc->iclk); + clk_disable(udc->fclk); + clk_disable(udc->iclk); if (IS_ENABLED(CONFIG_COMMON_CLK)) - clk_disable_unprepare(udc->uclk); + clk_disable(udc->uclk); } /* @@ -1710,16 +1708,6 @@ static int at91udc_probe(struct platform_device *pdev) return -ENODEV; } - if (pdev->num_resources != 2) { - DBG("invalid num_resources\n"); - return -ENODEV; - } - if ((pdev->resource[0].flags != IORESOURCE_MEM) - || (pdev->resource[1].flags != IORESOURCE_IRQ)) { - DBG("invalid resource type\n"); - return -ENODEV; - } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENXIO; @@ -1791,14 +1779,24 @@ static int at91udc_probe(struct platform_device *pdev) } /* don't do anything until we have both gadget driver and VBUS */ + if (IS_ENABLED(CONFIG_COMMON_CLK)) { + clk_set_rate(udc->uclk, 48000000); + retval = clk_prepare(udc->uclk); + if (retval) + goto fail1; + } + retval = clk_prepare(udc->fclk); + if (retval) + goto fail1a; + retval = clk_prepare_enable(udc->iclk); if (retval) - goto fail1; + goto fail1b; at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS); at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff); /* Clear all pending interrupts - UDP may be used by bootloader. */ at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff); - clk_disable_unprepare(udc->iclk); + clk_disable(udc->iclk); /* request UDC and maybe VBUS irqs */ udc->udp_irq = platform_get_irq(pdev, 0); @@ -1806,7 +1804,7 @@ static int at91udc_probe(struct platform_device *pdev) 0, driver_name, udc); if (retval < 0) { DBG("request irq %d failed\n", udc->udp_irq); - goto fail1; + goto fail1c; } if (gpio_is_valid(udc->board.vbus_pin)) { retval = gpio_request(udc->board.vbus_pin, "udc_vbus"); @@ -1859,6 +1857,13 @@ fail3: gpio_free(udc->board.vbus_pin); fail2: free_irq(udc->udp_irq, udc); +fail1c: + clk_unprepare(udc->iclk); +fail1b: + clk_unprepare(udc->fclk); +fail1a: + if (IS_ENABLED(CONFIG_COMMON_CLK)) + clk_unprepare(udc->uclk); fail1: if (IS_ENABLED(CONFIG_COMMON_CLK) && !IS_ERR(udc->uclk)) clk_put(udc->uclk); @@ -1907,6 +1912,11 @@ static int __exit at91udc_remove(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); + if (IS_ENABLED(CONFIG_COMMON_CLK)) + clk_unprepare(udc->uclk); + clk_unprepare(udc->fclk); + clk_unprepare(udc->iclk); + clk_put(udc->iclk); clk_put(udc->fclk); if (IS_ENABLED(CONFIG_COMMON_CLK)) diff --git a/drivers/usb/gadget/atmel_usba_udc.c b/drivers/usb/gadget/atmel_usba_udc.c index 2cb52e0..1e53092 100644 --- a/drivers/usb/gadget/atmel_usba_udc.c +++ b/drivers/usb/gadget/atmel_usba_udc.c @@ -1827,12 +1827,12 @@ static int atmel_usba_stop(struct usb_gadget *gadget, toggle_bias(0); usba_writel(udc, CTRL, USBA_DISABLE_MASK); - udc->driver = NULL; - clk_disable_unprepare(udc->hclk); clk_disable_unprepare(udc->pclk); - DBG(DBG_GADGET, "unregistered driver `%s'\n", driver->driver.name); + DBG(DBG_GADGET, "unregistered driver `%s'\n", udc->driver->driver.name); + + udc->driver = NULL; return 0; } diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c index ab1065a..3384486 100644 --- a/drivers/usb/gadget/f_acm.c +++ b/drivers/usb/gadget/f_acm.c @@ -430,11 +430,12 @@ static int acm_set_alt(struct usb_function *f, unsigned intf, unsigned alt) if (acm->notify->driver_data) { VDBG(cdev, "reset acm control interface %d\n", intf); usb_ep_disable(acm->notify); - } else { - VDBG(cdev, "init acm ctrl interface %d\n", intf); + } + + if (!acm->notify->desc) if (config_ep_by_speed(cdev->gadget, f, acm->notify)) return -EINVAL; - } + usb_ep_enable(acm->notify); acm->notify->driver_data = acm; diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index fd2fe19..32d9183 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -1389,11 +1389,13 @@ static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev) ffs->ep0req->context = ffs; lang = ffs->stringtabs; - for (lang = ffs->stringtabs; *lang; ++lang) { - struct usb_string *str = (*lang)->strings; - int id = first_id; - for (; str->s; ++id, ++str) - str->id = id; + if (lang) { + for (; *lang; ++lang) { + struct usb_string *str = (*lang)->strings; + int id = first_id; + for (; str->s; ++id, ++str) + str->id = id; + } } ffs->gadget = cdev->gadget; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 1033ecc..cd83b70 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1504,7 +1504,7 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) } break; -#ifndef CONFIG_USB_GADGET_PXA25X +#ifndef CONFIG_USB_PXA25X /* PXA automagically handles this request too */ case USB_REQ_GET_CONFIGURATION: if (ctrl->bRequestType != 0x80) diff --git a/drivers/usb/gadget/pch_udc.c b/drivers/usb/gadget/pch_udc.c index 24174e1..ea00b27 100644 --- a/drivers/usb/gadget/pch_udc.c +++ b/drivers/usb/gadget/pch_udc.c @@ -343,6 +343,7 @@ struct pch_vbus_gpio_data { * @setup_data: Received setup data * @phys_addr: of device memory * @base_addr: for mapped device memory + * @bar: Indicates which PCI BAR for USB regs * @irq: IRQ line for the device * @cfg_data: current cfg, intf, and alt in use * @vbus_gpio: GPIO informaton for detecting VBUS @@ -370,14 +371,17 @@ struct pch_udc_dev { struct usb_ctrlrequest setup_data; unsigned long phys_addr; void __iomem *base_addr; + unsigned bar; unsigned irq; struct pch_udc_cfg_data cfg_data; struct pch_vbus_gpio_data vbus_gpio; }; #define to_pch_udc(g) (container_of((g), struct pch_udc_dev, gadget)) +#define PCH_UDC_PCI_BAR_QUARK_X1000 0 #define PCH_UDC_PCI_BAR 1 #define PCI_DEVICE_ID_INTEL_EG20T_UDC 0x8808 +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC 0x0939 #define PCI_VENDOR_ID_ROHM 0x10DB #define PCI_DEVICE_ID_ML7213_IOH_UDC 0x801D #define PCI_DEVICE_ID_ML7831_IOH_UDC 0x8808 @@ -3076,7 +3080,7 @@ static void pch_udc_remove(struct pci_dev *pdev) iounmap(dev->base_addr); if (dev->mem_region) release_mem_region(dev->phys_addr, - pci_resource_len(pdev, PCH_UDC_PCI_BAR)); + pci_resource_len(pdev, dev->bar)); if (dev->active) pci_disable_device(pdev); kfree(dev); @@ -3145,9 +3149,15 @@ static int pch_udc_probe(struct pci_dev *pdev, dev->active = 1; pci_set_drvdata(pdev, dev); + /* Determine BAR based on PCI ID */ + if (id->device == PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC) + dev->bar = PCH_UDC_PCI_BAR_QUARK_X1000; + else + dev->bar = PCH_UDC_PCI_BAR; + /* PCI resource allocation */ - resource = pci_resource_start(pdev, 1); - len = pci_resource_len(pdev, 1); + resource = pci_resource_start(pdev, dev->bar); + len = pci_resource_len(pdev, dev->bar); if (!request_mem_region(resource, len, KBUILD_MODNAME)) { dev_err(&pdev->dev, "%s: pci device used already\n", __func__); @@ -3213,6 +3223,12 @@ finished: static DEFINE_PCI_DEVICE_TABLE(pch_udc_pcidev_id) = { { + PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC), + .class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe, + .class_mask = 0xffffffff, + }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EG20T_UDC), .class = (PCI_CLASS_SERIAL_USB << 8) | 0xfe, .class_mask = 0xffffffff, diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c index 0ff3339..d9e15e8 100644 --- a/drivers/usb/gadget/tcm_usb_gadget.c +++ b/drivers/usb/gadget/tcm_usb_gadget.c @@ -1613,7 +1613,7 @@ static struct se_wwn *usbg_make_tport( return ERR_PTR(-ENOMEM); } tport->tport_wwpn = wwpn; - snprintf(tport->tport_name, sizeof(tport->tport_name), wnn_name); + snprintf(tport->tport_name, sizeof(tport->tport_name), "%s", wnn_name); return &tport->tport_wwn; } diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c index 59891b1..a4aa923 100644 --- a/drivers/usb/gadget/udc-core.c +++ b/drivers/usb/gadget/udc-core.c @@ -455,6 +455,11 @@ static ssize_t usb_udc_softconn_store(struct device *dev, { struct usb_udc *udc = container_of(dev, struct usb_udc, dev); + if (!udc->driver) { + dev_err(dev, "soft-connect without a gadget driver\n"); + return -EOPNOTSUPP; + } + if (sysfs_streq(buf, "connect")) { usb_gadget_udc_start(udc->gadget, udc->driver); usb_gadget_connect(udc->gadget); diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 0deb9d6..d31814c 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -280,7 +280,7 @@ static int __init zero_bind(struct usb_composite_dev *cdev) ss_opts->isoc_interval = gzero_options.isoc_interval; ss_opts->isoc_maxpacket = gzero_options.isoc_maxpacket; ss_opts->isoc_mult = gzero_options.isoc_mult; - ss_opts->isoc_maxburst = gzero_options.isoc_maxpacket; + ss_opts->isoc_maxburst = gzero_options.isoc_maxburst; ss_opts->bulk_buflen = gzero_options.bulk_buflen; func_ss = usb_get_function(func_inst_ss); diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 784f624..51b1f4e 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -968,8 +968,6 @@ rescan: } qh->exception = 1; - if (ehci->rh_state < EHCI_RH_RUNNING) - qh->qh_state = QH_STATE_IDLE; switch (qh->qh_state) { case QH_STATE_LINKED: WARN_ON(!list_empty(&qh->qtd_list)); diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index d87444c..561864d 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -1250,7 +1250,7 @@ static int ehci_hub_control ( if (selector == EHSET_TEST_SINGLE_STEP_SET_FEATURE) { spin_unlock_irqrestore(&ehci->lock, flags); retval = ehset_single_step_set_feature(hcd, - wIndex); + wIndex + 1); spin_lock_irqsave(&ehci->lock, flags); break; } diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 3e86bf4..ca7b964 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -35,6 +35,21 @@ static const char hcd_name[] = "ehci-pci"; #define PCI_DEVICE_ID_INTEL_CE4100_USB 0x2e70 /*-------------------------------------------------------------------------*/ +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC 0x0939 +static inline bool is_intel_quark_x1000(struct pci_dev *pdev) +{ + return pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_QUARK_X1000_SOC; +} + +/* + * 0x84 is the offset of in/out threshold register, + * and it is the same offset as the register of 'hostpc'. + */ +#define intel_quark_x1000_insnreg01 hostpc + +/* Maximum usable threshold value is 0x7f dwords for both IN and OUT */ +#define INTEL_QUARK_X1000_EHCI_MAX_THRESHOLD 0x007f007f /* called after powerup, by probe or system-pm "wakeup" */ static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev) @@ -50,6 +65,16 @@ static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev) if (!retval) ehci_dbg(ehci, "MWI active\n"); + /* Reset the threshold limit */ + if (is_intel_quark_x1000(pdev)) { + /* + * For the Intel QUARK X1000, raise the I/O threshold to the + * maximum usable value in order to improve performance. + */ + ehci_writel(ehci, INTEL_QUARK_X1000_EHCI_MAX_THRESHOLD, + ehci->regs->intel_quark_x1000_insnreg01); + } + return 0; } diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index 85dd24e..8ecf164 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1384,6 +1384,10 @@ iso_stream_schedule ( now = ehci_read_frame_index(ehci) & (mod - 1); + /* If needed, initialize last_iso_frame so that this URB will be seen */ + if (ehci->isoc_count == 0) + ehci->last_iso_frame = now >> 3; + /* Typical case: reuse current schedule, stream is still active. * Hopefully there are no gaps from the host falling behind * (irq delays etc). If there are, the behavior depends on @@ -1493,10 +1497,6 @@ iso_stream_schedule ( urb->start_frame = stream->next_uframe; if (!stream->highspeed) urb->start_frame >>= 3; - - /* Make sure scan_isoc() sees these */ - if (ehci->isoc_count == 0) - ehci->last_iso_frame = now >> 3; return 0; fail: diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index 31b81f9..17e698f 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -289,7 +289,7 @@ ohci_dump_roothub ( } } -static void ohci_dump (struct ohci_hcd *controller, int verbose) +static void ohci_dump(struct ohci_hcd *controller) { ohci_dbg (controller, "OHCI controller state\n"); @@ -408,7 +408,7 @@ ohci_dump_ed (const struct ohci_hcd *ohci, const char *label, } #else -static inline void ohci_dump (struct ohci_hcd *controller, int verbose) {} +static inline void ohci_dump (struct ohci_hcd *controller) {} #undef OHCI_VERBOSE_DEBUG @@ -531,15 +531,16 @@ show_list (struct ohci_hcd *ohci, char *buf, size_t count, struct ed *ed) static ssize_t fill_async_buffer(struct debug_buffer *buf) { struct ohci_hcd *ohci; - size_t temp; + size_t temp, size; unsigned long flags; ohci = buf->ohci; + size = PAGE_SIZE; /* display control and bulk lists together, for simplicity */ spin_lock_irqsave (&ohci->lock, flags); - temp = show_list(ohci, buf->page, buf->count, ohci->ed_controltail); - temp += show_list(ohci, buf->page + temp, buf->count - temp, + temp = show_list(ohci, buf->page, size, ohci->ed_controltail); + temp += show_list(ohci, buf->page + temp, size - temp, ohci->ed_bulktail); spin_unlock_irqrestore (&ohci->lock, flags); diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index b691278..61acf4c 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -78,8 +78,8 @@ static const char hcd_name [] = "ohci_hcd"; #include "ohci.h" #include "pci-quirks.h" -static void ohci_dump (struct ohci_hcd *ohci, int verbose); -static void ohci_stop (struct usb_hcd *hcd); +static void ohci_dump(struct ohci_hcd *ohci); +static void ohci_stop(struct usb_hcd *hcd); #include "ohci-hub.c" #include "ohci-dbg.c" @@ -754,7 +754,7 @@ retry: ohci->ed_to_check = NULL; } - ohci_dump (ohci, 1); + ohci_dump(ohci); return 0; } @@ -835,7 +835,7 @@ static irqreturn_t ohci_irq (struct usb_hcd *hcd) usb_hc_died(hcd); } - ohci_dump (ohci, 1); + ohci_dump(ohci); ohci_usb_reset (ohci); } @@ -939,7 +939,7 @@ static void ohci_stop (struct usb_hcd *hcd) { struct ohci_hcd *ohci = hcd_to_ohci (hcd); - ohci_dump (ohci, 1); + ohci_dump(ohci); if (quirk_nec(ohci)) flush_work(&ohci->nec_work); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 2347ab8..dcf5708 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -90,6 +90,24 @@ __acquires(ohci->lock) dl_done_list (ohci); finish_unlinks (ohci, ohci_frame_no(ohci)); + /* + * Some controllers don't handle "global" suspend properly if + * there are unsuspended ports. For these controllers, put all + * the enabled ports into suspend before suspending the root hub. + */ + if (ohci->flags & OHCI_QUIRK_GLOBAL_SUSPEND) { + __hc32 __iomem *portstat = ohci->regs->roothub.portstatus; + int i; + unsigned temp; + + for (i = 0; i < ohci->num_ports; (++i, ++portstat)) { + temp = ohci_readl(ohci, portstat); + if ((temp & (RH_PS_PES | RH_PS_PSS)) == + RH_PS_PES) + ohci_writel(ohci, RH_PS_PSS, portstat); + } + } + /* maybe resume can wake root hub */ if (ohci_to_hcd(ohci)->self.root_hub->do_remote_wakeup || autostop) { ohci->hc_control |= OHCI_CTRL_RWE; diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c index 659cde1..fd9f77b 100644 --- a/drivers/usb/host/ohci-pci.c +++ b/drivers/usb/host/ohci-pci.c @@ -160,6 +160,7 @@ static int ohci_quirk_amd700(struct usb_hcd *hcd) ohci_dbg(ohci, "enabled AMD prefetch quirk\n"); } + ohci->flags |= OHCI_QUIRK_GLOBAL_SUSPEND; return 0; } diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index e7f577e..4e9f6a4 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -315,8 +315,7 @@ static void periodic_unlink (struct ohci_hcd *ohci, struct ed *ed) * - ED_OPER: when there's any request queued, the ED gets rescheduled * immediately. HC should be working on them. * - * - ED_IDLE: when there's no TD queue. there's no reason for the HC - * to care about this ED; safe to disable the endpoint. + * - ED_IDLE: when there's no TD queue or the HC isn't running. * * When finish_unlinks() runs later, after SOF interrupt, it will often * complete one or more URB unlinks before making that state change. @@ -930,6 +929,10 @@ rescan_all: int completed, modified; __hc32 *prev; + /* Is this ED already invisible to the hardware? */ + if (ed->state == ED_IDLE) + goto ed_idle; + /* only take off EDs that the HC isn't using, accounting for * frame counter wraps and EDs with partially retired TDs */ @@ -959,12 +962,20 @@ skip_ed: } } + /* ED's now officially unlinked, hc doesn't see */ + ed->state = ED_IDLE; + if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT) + ohci->eds_scheduled--; + ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); + ed->hwNextED = 0; + wmb(); + ed->hwINFO &= ~cpu_to_hc32(ohci, ED_SKIP | ED_DEQUEUE); +ed_idle: + /* reentrancy: if we drop the schedule lock, someone might * have modified this list. normally it's just prepending * entries (which we'd ignore), but paranoia won't hurt. */ - *last = ed->ed_next; - ed->ed_next = NULL; modified = 0; /* unlink urbs as requested, but rescan the list after @@ -1022,19 +1033,20 @@ rescan_this: if (completed && !list_empty (&ed->td_list)) goto rescan_this; - /* ED's now officially unlinked, hc doesn't see */ - ed->state = ED_IDLE; - if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT) - ohci->eds_scheduled--; - ed->hwHeadP &= ~cpu_to_hc32(ohci, ED_H); - ed->hwNextED = 0; - wmb (); - ed->hwINFO &= ~cpu_to_hc32 (ohci, ED_SKIP | ED_DEQUEUE); - - /* but if there's work queued, reschedule */ - if (!list_empty (&ed->td_list)) { - if (ohci->rh_state == OHCI_RH_RUNNING) - ed_schedule (ohci, ed); + /* + * If no TDs are queued, take ED off the ed_rm_list. + * Otherwise, if the HC is running, reschedule. + * If not, leave it on the list for further dequeues. + */ + if (list_empty(&ed->td_list)) { + *last = ed->ed_next; + ed->ed_next = NULL; + } else if (ohci->rh_state == OHCI_RH_RUNNING) { + *last = ed->ed_next; + ed->ed_next = NULL; + ed_schedule(ohci, ed); + } else { + last = &ed->ed_next; } if (modified) diff --git a/drivers/usb/host/ohci-spear.c b/drivers/usb/host/ohci-spear.c index cc9dd9e..45f8773 100644 --- a/drivers/usb/host/ohci-spear.c +++ b/drivers/usb/host/ohci-spear.c @@ -53,7 +53,7 @@ static int ohci_spear_start(struct usb_hcd *hcd) create_debug_files(ohci); #ifdef DEBUG - ohci_dump(ohci, 1); + ohci_dump(ohci); #endif return 0; } diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index e2e5faa..0b2e58c 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -405,6 +405,8 @@ struct ohci_hcd { #define OHCI_QUIRK_HUB_POWER 0x100 /* distrust firmware power/oc setup */ #define OHCI_QUIRK_AMD_PLL 0x200 /* AMD PLL quirk*/ #define OHCI_QUIRK_AMD_PREFETCH 0x400 /* pre-fetch for ISO transfer */ +#define OHCI_QUIRK_GLOBAL_SUSPEND 0x800 /* must suspend ports */ + // there are also chip quirks/bugs in init logic struct work_struct nec_work; /* Worker for NEC quirk */ diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 463156d..5a45437 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -560,7 +560,8 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) { void __iomem *base; u32 control; - u32 fminterval; + u32 fminterval = 0; + bool no_fminterval = false; int cnt; if (!mmio_resource_enabled(pdev, 0)) @@ -570,6 +571,13 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) if (base == NULL) return; + /* + * ULi M5237 OHCI controller locks the whole system when accessing + * the OHCI_FMINTERVAL offset. + */ + if (pdev->vendor == PCI_VENDOR_ID_AL && pdev->device == 0x5237) + no_fminterval = true; + control = readl(base + OHCI_CONTROL); /* On PA-RISC, PDC can leave IR set incorrectly; ignore it there. */ @@ -608,7 +616,9 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) } /* software reset of the controller, preserving HcFmInterval */ - fminterval = readl(base + OHCI_FMINTERVAL); + if (!no_fminterval) + fminterval = readl(base + OHCI_FMINTERVAL); + writel(OHCI_HCR, base + OHCI_CMDSTATUS); /* reset requires max 10 us delay */ @@ -617,7 +627,9 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) break; udelay(1); } - writel(fminterval, base + OHCI_FMINTERVAL); + + if (!no_fminterval) + writel(fminterval, base + OHCI_FMINTERVAL); /* Now the controller is safely in SUSPEND and nothing can wake it up */ iounmap(base); @@ -645,6 +657,14 @@ static const struct dmi_system_id ehci_dmi_nohandoff_table[] = { DMI_MATCH(DMI_BIOS_VERSION, "Lucid-"), }, }, + { + /* HASEE E200 */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HASEE"), + DMI_MATCH(DMI_BOARD_NAME, "E210"), + DMI_MATCH(DMI_BIOS_VERSION, "6.00"), + }, + }, { } }; @@ -654,9 +674,14 @@ static void ehci_bios_handoff(struct pci_dev *pdev, { int try_handoff = 1, tried_handoff = 0; - /* The Pegatron Lucid tablet sporadically waits for 98 seconds trying - * the handoff on its unused controller. Skip it. */ - if (pdev->vendor == 0x8086 && pdev->device == 0x283a) { + /* + * The Pegatron Lucid tablet sporadically waits for 98 seconds trying + * the handoff on its unused controller. Skip it. + * + * The HASEE E200 hangs when the semaphore is set (bugzilla #77021). + */ + if (pdev->vendor == 0x8086 && (pdev->device == 0x283a || + pdev->device == 0x27cc)) { if (dmi_check_system(ehci_dmi_nohandoff_table)) try_handoff = 0; } @@ -836,6 +861,13 @@ void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev) bool ehci_found = false; struct pci_dev *companion = NULL; + /* Sony VAIO t-series with subsystem device ID 90a8 is not capable of + * switching ports from EHCI to xHCI + */ + if (xhci_pdev->subsystem_vendor == PCI_VENDOR_ID_SONY && + xhci_pdev->subsystem_device == 0x90a8) + return; + /* make sure an intel EHCI controller exists */ for_each_pci_dev(companion) { if (companion->class == PCI_CLASS_SERIAL_USB_EHCI && diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index e8b4c56..cd47840 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -470,7 +470,8 @@ static void xhci_hub_report_usb2_link_state(u32 *status, u32 status_reg) } /* Updates Link Status for super Speed port */ -static void xhci_hub_report_usb3_link_state(u32 *status, u32 status_reg) +static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci, + u32 *status, u32 status_reg) { u32 pls = status_reg & PORT_PLS_MASK; @@ -509,7 +510,8 @@ static void xhci_hub_report_usb3_link_state(u32 *status, u32 status_reg) * in which sometimes the port enters compliance mode * caused by a delay on the host-device negotiation. */ - if (pls == USB_SS_PORT_LS_COMP_MOD) + if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && + (pls == USB_SS_PORT_LS_COMP_MOD)) pls |= USB_PORT_STAT_CONNECTION; } @@ -668,7 +670,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, } /* Update Port Link State */ if (hcd->speed == HCD_USB3) { - xhci_hub_report_usb3_link_state(&status, raw_port_status); + xhci_hub_report_usb3_link_state(xhci, &status, raw_port_status); /* * Verify if all USB3 Ports Have entered U0 already. * Delete Compliance Mode Timer if so. diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 49b8bd0..837c333 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1722,6 +1722,16 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) kfree(cur_cd); } + num_ports = HCS_MAX_PORTS(xhci->hcs_params1); + for (i = 0; i < num_ports && xhci->rh_bw; i++) { + struct xhci_interval_bw_table *bwt = &xhci->rh_bw[i].bw_table; + for (j = 0; j < XHCI_MAX_INTERVAL; j++) { + struct list_head *ep = &bwt->interval_bw[j].endpoints; + while (!list_empty(ep)) + list_del_init(ep->next); + } + } + for (i = 1; i < MAX_HC_SLOTS; ++i) xhci_free_virt_device(xhci, i); @@ -1757,16 +1767,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) if (!xhci->rh_bw) goto no_bw; - num_ports = HCS_MAX_PORTS(xhci->hcs_params1); - for (i = 0; i < num_ports; i++) { - struct xhci_interval_bw_table *bwt = &xhci->rh_bw[i].bw_table; - for (j = 0; j < XHCI_MAX_INTERVAL; j++) { - struct list_head *ep = &bwt->interval_bw[j].endpoints; - while (!list_empty(ep)) - list_del_init(ep->next); - } - } - for (i = 0; i < num_ports; i++) { struct xhci_tt_bw_info *tt, *n; list_for_each_entry_safe(tt, n, &xhci->rh_bw[i].tts, tt_list) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1af67a2..7dad9e5 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -101,6 +101,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* AMD PLL quirk */ if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) xhci->quirks |= XHCI_AMD_PLL_FIX; + + if (pdev->vendor == PCI_VENDOR_ID_AMD) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; @@ -135,6 +139,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->subsystem_vendor == PCI_VENDOR_ID_HP) xhci->quirks |= XHCI_SPURIOUS_WAKEUP; } + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) { + xhci->quirks |= XHCI_SPURIOUS_REBOOT; + } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_ASROCK_P67) { xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -143,9 +151,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && - pdev->device == 0x0015 && - pdev->subsystem_vendor == PCI_VENDOR_ID_SAMSUNG && - pdev->subsystem_device == 0xc0cd) + pdev->device == 0x0015) xhci->quirks |= XHCI_RESET_ON_RESUME; if (pdev->vendor == PCI_VENDOR_ID_VIA) xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -190,6 +196,10 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) struct usb_hcd *hcd; driver = (struct hc_driver *)id->driver_data; + + /* Prevent runtime suspending between USB-2 and USB-3 initialization */ + pm_runtime_get_noresume(&dev->dev); + /* Register the USB 2.0 roothub. * FIXME: USB core must know to register the USB 2.0 roothub first. * This is sort of silly, because we could just set the HCD driver flags @@ -199,7 +209,7 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) retval = usb_hcd_pci_probe(dev, id); if (retval) - return retval; + goto put_runtime_pm; /* USB 2.0 roothub is stored in the PCI device now. */ hcd = dev_get_drvdata(&dev->dev); @@ -228,12 +238,17 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) if (xhci->quirks & XHCI_LPM_SUPPORT) hcd_to_bus(xhci->shared_hcd)->root_hub->lpm_capable = 1; + /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ + pm_runtime_put_noidle(&dev->dev); + return 0; put_usb3_hcd: usb_put_hcd(xhci->shared_hcd); dealloc_usb2_hcd: usb_hcd_pci_remove(dev); +put_runtime_pm: + pm_runtime_put_noidle(&dev->dev); return retval; } @@ -268,7 +283,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci_compliance_mode_recovery_timer_quirk_check()) pdev->no_d3cold = true; - return xhci_suspend(xhci); + return xhci_suspend(xhci, do_wakeup); } static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index d9c169f..bb50d30 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -203,7 +203,15 @@ static int xhci_plat_suspend(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct xhci_hcd *xhci = hcd_to_xhci(hcd); - return xhci_suspend(xhci); + /* + * xhci_suspend() needs `do_wakeup` to know whether host is allowed + * to do wakeup during suspend. Since xhci_plat_suspend is currently + * only designed for system suspend, device_may_wakeup() is enough + * to dertermine whether host is allowed to do wakeup. Need to + * reconsider this when xhci_plat_suspend enlarges its scope, e.g., + * also applies to runtime suspend. + */ + return xhci_suspend(xhci, device_may_wakeup(dev)); } static int xhci_plat_resume(struct device *dev) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 6bfbd80..6f052da 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -552,10 +552,11 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci, struct xhci_dequeue_state *state) { struct xhci_virt_device *dev = xhci->devs[slot_id]; + struct xhci_virt_ep *ep = &dev->eps[ep_index]; struct xhci_ring *ep_ring; struct xhci_generic_trb *trb; - struct xhci_ep_ctx *ep_ctx; dma_addr_t addr; + u64 hw_dequeue; ep_ring = xhci_triad_to_transfer_ring(xhci, slot_id, ep_index, stream_id); @@ -565,56 +566,65 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci, stream_id); return; } - state->new_cycle_state = 0; - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, - "Finding segment containing stopped TRB."); - state->new_deq_seg = find_trb_seg(cur_td->start_seg, - dev->eps[ep_index].stopped_trb, - &state->new_cycle_state); - if (!state->new_deq_seg) { - WARN_ON(1); - return; - } /* Dig out the cycle state saved by the xHC during the stop ep cmd */ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Finding endpoint context"); - ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index); - state->new_cycle_state = 0x1 & le64_to_cpu(ep_ctx->deq); + /* 4.6.9 the css flag is written to the stream context for streams */ + if (ep->ep_state & EP_HAS_STREAMS) { + struct xhci_stream_ctx *ctx = + &ep->stream_info->stream_ctx_array[stream_id]; + hw_dequeue = le64_to_cpu(ctx->stream_ring); + } else { + struct xhci_ep_ctx *ep_ctx + = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index); + hw_dequeue = le64_to_cpu(ep_ctx->deq); + } + + /* Find virtual address and segment of hardware dequeue pointer */ + state->new_deq_seg = ep_ring->deq_seg; + state->new_deq_ptr = ep_ring->dequeue; + while (xhci_trb_virt_to_dma(state->new_deq_seg, state->new_deq_ptr) + != (dma_addr_t)(hw_dequeue & ~0xf)) { + next_trb(xhci, ep_ring, &state->new_deq_seg, + &state->new_deq_ptr); + if (state->new_deq_ptr == ep_ring->dequeue) { + WARN_ON(1); + return; + } + } + /* + * Find cycle state for last_trb, starting at old cycle state of + * hw_dequeue. If there is only one segment ring, find_trb_seg() will + * return immediately and cannot toggle the cycle state if this search + * wraps around, so add one more toggle manually in that case. + */ + state->new_cycle_state = hw_dequeue & 0x1; + if (ep_ring->first_seg == ep_ring->first_seg->next && + cur_td->last_trb < state->new_deq_ptr) + state->new_cycle_state ^= 0x1; state->new_deq_ptr = cur_td->last_trb; xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Finding segment containing last TRB in TD."); state->new_deq_seg = find_trb_seg(state->new_deq_seg, - state->new_deq_ptr, - &state->new_cycle_state); + state->new_deq_ptr, &state->new_cycle_state); if (!state->new_deq_seg) { WARN_ON(1); return; } + /* Increment to find next TRB after last_trb. Cycle if appropriate. */ trb = &state->new_deq_ptr->generic; if (TRB_TYPE_LINK_LE32(trb->field[3]) && (trb->field[3] & cpu_to_le32(LINK_TOGGLE))) state->new_cycle_state ^= 0x1; next_trb(xhci, ep_ring, &state->new_deq_seg, &state->new_deq_ptr); - /* - * If there is only one segment in a ring, find_trb_seg()'s while loop - * will not run, and it will return before it has a chance to see if it - * needs to toggle the cycle bit. It can't tell if the stalled transfer - * ended just before the link TRB on a one-segment ring, or if the TD - * wrapped around the top of the ring, because it doesn't have the TD in - * question. Look for the one-segment case where stalled TRB's address - * is greater than the new dequeue pointer address. - */ - if (ep_ring->first_seg == ep_ring->first_seg->next && - state->new_deq_ptr < dev->eps[ep_index].stopped_trb) - state->new_cycle_state ^= 0x1; + /* Don't update the ring cycle state for the producer (us). */ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "Cycle state = 0x%x", state->new_cycle_state); - /* Don't update the ring cycle state for the producer (us). */ xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "New dequeue segment = %p (virtual)", state->new_deq_seg); @@ -802,7 +812,6 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, if (list_empty(&ep->cancelled_td_list)) { xhci_stop_watchdog_timer_in_irq(xhci, ep); ep->stopped_td = NULL; - ep->stopped_trb = NULL; ring_doorbell_for_active_rings(xhci, slot_id, ep_index); return; } @@ -870,11 +879,9 @@ remove_finished_td: ring_doorbell_for_active_rings(xhci, slot_id, ep_index); } - /* Clear stopped_td and stopped_trb if endpoint is not halted */ - if (!(ep->ep_state & EP_HALTED)) { + /* Clear stopped_td if endpoint is not halted */ + if (!(ep->ep_state & EP_HALTED)) ep->stopped_td = NULL; - ep->stopped_trb = NULL; - } /* * Drop the lock and complete the URBs in the cancelled TD list. @@ -1199,9 +1206,8 @@ static void handle_reset_ep_completion(struct xhci_hcd *xhci, false); xhci_ring_cmd_db(xhci); } else { - /* Clear our internal halted state and restart the ring(s) */ + /* Clear our internal halted state */ xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_HALTED; - ring_doorbell_for_active_rings(xhci, slot_id, ep_index); } } @@ -1870,14 +1876,12 @@ static void xhci_cleanup_halted_endpoint(struct xhci_hcd *xhci, struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index]; ep->ep_state |= EP_HALTED; ep->stopped_td = td; - ep->stopped_trb = event_trb; ep->stopped_stream = stream_id; xhci_queue_reset_ep(xhci, slot_id, ep_index); xhci_cleanup_stalled_ring(xhci, td->urb->dev, ep_index); ep->stopped_td = NULL; - ep->stopped_trb = NULL; ep->stopped_stream = 0; xhci_ring_cmd_db(xhci); @@ -1959,26 +1963,15 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, * the ring dequeue pointer or take this TD off any lists yet. */ ep->stopped_td = td; - ep->stopped_trb = event_trb; return 0; } else { - if (trb_comp_code == COMP_STALL) { - /* The transfer is completed from the driver's - * perspective, but we need to issue a set dequeue - * command for this stalled endpoint to move the dequeue - * pointer past the TD. We can't do that here because - * the halt condition must be cleared first. Let the - * USB class driver clear the stall later. - */ - ep->stopped_td = td; - ep->stopped_trb = event_trb; - ep->stopped_stream = ep_ring->stream_id; - } else if (xhci_requires_manual_halt_cleanup(xhci, - ep_ctx, trb_comp_code)) { - /* Other types of errors halt the endpoint, but the - * class driver doesn't call usb_reset_endpoint() unless - * the error is -EPIPE. Clear the halted status in the - * xHCI hardware manually. + if (trb_comp_code == COMP_STALL || + xhci_requires_manual_halt_cleanup(xhci, ep_ctx, + trb_comp_code)) { + /* Issue a reset endpoint command to clear the host side + * halt, followed by a set dequeue command to move the + * dequeue pointer past the TD. + * The class driver clears the device side halt later. */ xhci_cleanup_halted_endpoint(xhci, slot_id, ep_index, ep_ring->stream_id, @@ -2098,9 +2091,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, else td->urb->actual_length = 0; - xhci_cleanup_halted_endpoint(xhci, - slot_id, ep_index, 0, td, event_trb); - return finish_td(xhci, td, event_trb, event, ep, status, true); + return finish_td(xhci, td, event_trb, event, ep, status, false); } /* * Did we transfer any data, despite the errors that might have @@ -2576,7 +2567,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, * last TRB of the previous TD. The command completion handle * will take care the rest. */ - if (!event_seg && trb_comp_code == COMP_STOP_INVAL) { + if (!event_seg && (trb_comp_code == COMP_STOP || + trb_comp_code == COMP_STOP_INVAL)) { ret = 0; goto cleanup; } @@ -2588,7 +2580,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, * successful event after a short transfer. * Ignore it. */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && ep_ring->last_td_was_short) { ep_ring->last_td_was_short = false; ret = 0; @@ -2653,17 +2645,8 @@ cleanup: if (ret) { urb = td->urb; urb_priv = urb->hcpriv; - /* Leave the TD around for the reset endpoint function - * to use(but only if it's not a control endpoint, - * since we already queued the Set TR dequeue pointer - * command for stalled control endpoints). - */ - if (usb_endpoint_xfer_control(&urb->ep->desc) || - (trb_comp_code != COMP_STALL && - trb_comp_code != COMP_BABBLE)) - xhci_urb_free_priv(xhci, urb_priv); - else - kfree(urb_priv); + + xhci_urb_free_priv(xhci, urb_priv); usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb); if ((urb->actual_length != urb->transfer_buffer_length && @@ -3630,7 +3613,7 @@ static unsigned int xhci_get_burst_count(struct xhci_hcd *xhci, return 0; max_burst = urb->ep->ss_ep_comp.bMaxBurst; - return roundup(total_packet_count, max_burst + 1) - 1; + return DIV_ROUND_UP(total_packet_count, max_burst + 1) - 1; } /* diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f178f76..e0ccc95 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -35,6 +35,8 @@ #define DRIVER_AUTHOR "Sarah Sharp" #define DRIVER_DESC "'eXtensible' Host Controller (xHC) Driver" +#define PORT_WAKE_BITS (PORT_WKOC_E | PORT_WKDISC_E | PORT_WKCONN_E) + /* Some 0.95 hardware can't handle the chain bit on a Link TRB being cleared */ static int link_quirk; module_param(link_quirk, int, S_IRUGO | S_IWUSR); @@ -400,16 +402,16 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) #else -static int xhci_try_enable_msi(struct usb_hcd *hcd) +static inline int xhci_try_enable_msi(struct usb_hcd *hcd) { return 0; } -static void xhci_cleanup_msix(struct xhci_hcd *xhci) +static inline void xhci_cleanup_msix(struct xhci_hcd *xhci) { } -static void xhci_msix_sync_irqs(struct xhci_hcd *xhci) +static inline void xhci_msix_sync_irqs(struct xhci_hcd *xhci) { } @@ -840,13 +842,47 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) xhci_set_cmd_ring_deq(xhci); } +static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) +{ + int port_index; + __le32 __iomem **port_array; + unsigned long flags; + u32 t1, t2; + + spin_lock_irqsave(&xhci->lock, flags); + + /* disble usb3 ports Wake bits*/ + port_index = xhci->num_usb3_ports; + port_array = xhci->usb3_ports; + while (port_index--) { + t1 = readl(port_array[port_index]); + t1 = xhci_port_state_to_neutral(t1); + t2 = t1 & ~PORT_WAKE_BITS; + if (t1 != t2) + writel(t2, port_array[port_index]); + } + + /* disble usb2 ports Wake bits*/ + port_index = xhci->num_usb2_ports; + port_array = xhci->usb2_ports; + while (port_index--) { + t1 = readl(port_array[port_index]); + t1 = xhci_port_state_to_neutral(t1); + t2 = t1 & ~PORT_WAKE_BITS; + if (t1 != t2) + writel(t2, port_array[port_index]); + } + + spin_unlock_irqrestore(&xhci->lock, flags); +} + /* * Stop HC (not bus-specific) * * This is called when the machine transition into S3/S4 mode. * */ -int xhci_suspend(struct xhci_hcd *xhci) +int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) { int rc = 0; unsigned int delay = XHCI_MAX_HALT_USEC; @@ -857,6 +893,10 @@ int xhci_suspend(struct xhci_hcd *xhci) xhci->shared_hcd->state != HC_STATE_SUSPENDED) return -EINVAL; + /* Clear root port wake on bits if wakeup not allowed. */ + if (!do_wakeup) + xhci_disable_port_wake_on_bits(xhci); + /* Don't poll the roothubs on bus suspend. */ xhci_dbg(xhci, "%s: stopping port polling.\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); @@ -926,7 +966,7 @@ int xhci_suspend(struct xhci_hcd *xhci) */ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) { - u32 command, temp = 0; + u32 command, temp = 0, status; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct usb_hcd *secondary_hcd; int retval = 0; @@ -1045,8 +1085,12 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { - usb_hcd_resume_root_hub(hcd); - usb_hcd_resume_root_hub(xhci->shared_hcd); + /* Resume root hubs only when have pending events. */ + status = readl(&xhci->op_regs->status); + if (status & STS_EINT) { + usb_hcd_resume_root_hub(hcd); + usb_hcd_resume_root_hub(xhci->shared_hcd); + } } /* @@ -2880,64 +2924,33 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, } } -/* Deal with stalled endpoints. The core should have sent the control message - * to clear the halt condition. However, we need to make the xHCI hardware - * reset its sequence number, since a device will expect a sequence number of - * zero after the halt condition is cleared. +/* Called when clearing halted device. The core should have sent the control + * message to clear the device halt condition. The host side of the halt should + * already be cleared with a reset endpoint command issued when the STALL tx + * event was received. + * * Context: in_interrupt */ + void xhci_endpoint_reset(struct usb_hcd *hcd, struct usb_host_endpoint *ep) { struct xhci_hcd *xhci; - struct usb_device *udev; - unsigned int ep_index; - unsigned long flags; - int ret; - struct xhci_virt_ep *virt_ep; xhci = hcd_to_xhci(hcd); - udev = (struct usb_device *) ep->hcpriv; - /* Called with a root hub endpoint (or an endpoint that wasn't added - * with xhci_add_endpoint() - */ - if (!ep->hcpriv) - return; - ep_index = xhci_get_endpoint_index(&ep->desc); - virt_ep = &xhci->devs[udev->slot_id]->eps[ep_index]; - if (!virt_ep->stopped_td) { - xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, - "Endpoint 0x%x not halted, refusing to reset.", - ep->desc.bEndpointAddress); - return; - } - if (usb_endpoint_xfer_control(&ep->desc)) { - xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, - "Control endpoint stall already handled."); - return; - } - xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, - "Queueing reset endpoint command"); - spin_lock_irqsave(&xhci->lock, flags); - ret = xhci_queue_reset_ep(xhci, udev->slot_id, ep_index); /* - * Can't change the ring dequeue pointer until it's transitioned to the - * stopped state, which is only upon a successful reset endpoint - * command. Better hope that last command worked! + * We might need to implement the config ep cmd in xhci 4.8.1 note: + * The Reset Endpoint Command may only be issued to endpoints in the + * Halted state. If software wishes reset the Data Toggle or Sequence + * Number of an endpoint that isn't in the Halted state, then software + * may issue a Configure Endpoint Command with the Drop and Add bits set + * for the target endpoint. that is in the Stopped state. */ - if (!ret) { - xhci_cleanup_stalled_ring(xhci, udev, ep_index); - kfree(virt_ep->stopped_td); - xhci_ring_cmd_db(xhci); - } - virt_ep->stopped_td = NULL; - virt_ep->stopped_trb = NULL; - virt_ep->stopped_stream = 0; - spin_unlock_irqrestore(&xhci->lock, flags); - if (ret) - xhci_warn(xhci, "FIXME allocate a new ring segment\n"); + /* For now just print debug to follow the situation */ + xhci_dbg(xhci, "Endpoint 0x%x ep reset callback called\n", + ep->desc.bEndpointAddress); } static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, @@ -3922,13 +3935,21 @@ static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci, int ret; spin_lock_irqsave(&xhci->lock, flags); - if (max_exit_latency == xhci->devs[udev->slot_id]->current_mel) { + + virt_dev = xhci->devs[udev->slot_id]; + + /* + * virt_dev might not exists yet if xHC resumed from hibernate (S4) and + * xHC was re-initialized. Exit latency will be set later after + * hub_port_finish_reset() is done and xhci->devs[] are re-allocated + */ + + if (!virt_dev || max_exit_latency == virt_dev->current_mel) { spin_unlock_irqrestore(&xhci->lock, flags); return 0; } /* Attempt to issue an Evaluate Context command to change the MEL. */ - virt_dev = xhci->devs[udev->slot_id]; command = xhci->lpm_command; ctrl_ctx = xhci_get_input_control_ctx(xhci, command->in_ctx); if (!ctrl_ctx) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index ed3a425..1703de9 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -864,8 +864,6 @@ struct xhci_virt_ep { #define EP_GETTING_NO_STREAMS (1 << 5) /* ---- Related to URB cancellation ---- */ struct list_head cancelled_td_list; - /* The TRB that was last reported in a stopped endpoint ring */ - union xhci_trb *stopped_trb; struct xhci_td *stopped_td; unsigned int stopped_stream; /* Watchdog timer for stop endpoint command to cancel URBs */ @@ -1769,7 +1767,7 @@ void xhci_shutdown(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); #ifdef CONFIG_PM -int xhci_suspend(struct xhci_hcd *xhci); +int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup); int xhci_resume(struct xhci_hcd *xhci, bool hibernated); #else #define xhci_suspend NULL diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index 3eaa83f..e2373f1 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -815,15 +815,10 @@ static void adu_disconnect(struct usb_interface *interface) usb_set_intfdata(interface, NULL); /* if the device is not opened, then we clean up right now */ - dev_dbg(&dev->udev->dev, "%s : open count %d\n", - __func__, dev->open_count); if (!dev->open_count) adu_delete(dev); mutex_unlock(&adutux_mutex); - - dev_info(&interface->dev, "ADU device adutux%d now disconnected\n", - (minor - ADU_MINOR_BASE)); } /* usb specific object needed to register this driver with the usb subsystem */ diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index de98906..0aef801 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -3248,6 +3248,7 @@ static const struct usb_device_id sisusb_table[] = { { USB_DEVICE(0x0711, 0x0918) }, { USB_DEVICE(0x0711, 0x0920) }, { USB_DEVICE(0x0711, 0x0950) }, + { USB_DEVICE(0x0711, 0x5200) }, { USB_DEVICE(0x182d, 0x021c) }, { USB_DEVICE(0x182d, 0x0269) }, { } diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index aa28ac8..9a1297e 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -7,9 +7,10 @@ #include <linux/moduleparam.h> #include <linux/scatterlist.h> #include <linux/mutex.h> - +#include <linux/timer.h> #include <linux/usb.h> +#define SIMPLE_IO_TIMEOUT 10000 /* in milliseconds */ /*-------------------------------------------------------------------------*/ @@ -366,6 +367,7 @@ static int simple_io( int max = urb->transfer_buffer_length; struct completion completion; int retval = 0; + unsigned long expire; urb->context = &completion; while (retval == 0 && iterations-- > 0) { @@ -378,9 +380,15 @@ static int simple_io( if (retval != 0) break; - /* NOTE: no timeouts; can't be broken out of by interrupt */ - wait_for_completion(&completion); - retval = urb->status; + expire = msecs_to_jiffies(SIMPLE_IO_TIMEOUT); + if (!wait_for_completion_timeout(&completion, expire)) { + usb_kill_urb(urb); + retval = (urb->status == -ENOENT ? + -ETIMEDOUT : urb->status); + } else { + retval = urb->status; + } + urb->dev = udev; if (retval == 0 && usb_pipein(urb->pipe)) retval = simple_check_buf(tdev, urb); @@ -476,6 +484,14 @@ alloc_sglist(int nents, int max, int vary) return sg; } +static void sg_timeout(unsigned long _req) +{ + struct usb_sg_request *req = (struct usb_sg_request *) _req; + + req->status = -ETIMEDOUT; + usb_sg_cancel(req); +} + static int perform_sglist( struct usbtest_dev *tdev, unsigned iterations, @@ -487,6 +503,9 @@ static int perform_sglist( { struct usb_device *udev = testdev_to_usbdev(tdev); int retval = 0; + struct timer_list sg_timer; + + setup_timer_on_stack(&sg_timer, sg_timeout, (unsigned long) req); while (retval == 0 && iterations-- > 0) { retval = usb_sg_init(req, udev, pipe, @@ -497,7 +516,10 @@ static int perform_sglist( if (retval) break; + mod_timer(&sg_timer, jiffies + + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); + del_timer_sync(&sg_timer); retval = req->status; /* FIXME check resulting data pattern */ @@ -1149,6 +1171,11 @@ static int unlink1(struct usbtest_dev *dev, int pipe, int size, int async) urb->context = &completion; urb->complete = unlink1_callback; + if (usb_pipeout(urb->pipe)) { + simple_fill_buf(urb); + urb->transfer_flags |= URB_ZERO_PACKET; + } + /* keep the endpoint busy. there are lots of hc/hcd-internal * states, and testing should get to all of them over time. * @@ -1279,6 +1306,11 @@ static int unlink_queued(struct usbtest_dev *dev, int pipe, unsigned num, unlink_queued_callback, &ctx); ctx.urbs[i]->transfer_dma = buf_dma; ctx.urbs[i]->transfer_flags = URB_NO_TRANSFER_DMA_MAP; + + if (usb_pipeout(ctx.urbs[i]->pipe)) { + simple_fill_buf(ctx.urbs[i]); + ctx.urbs[i]->transfer_flags |= URB_ZERO_PACKET; + } } /* Submit all the URBs and then unlink URBs num - 4 and num - 2. */ diff --git a/drivers/usb/musb/musb_am335x.c b/drivers/usb/musb/musb_am335x.c index 41ac5b5..83b97dc 100644 --- a/drivers/usb/musb/musb_am335x.c +++ b/drivers/usb/musb/musb_am335x.c @@ -20,21 +20,6 @@ err: return ret; } -static int of_remove_populated_child(struct device *dev, void *d) -{ - struct platform_device *pdev = to_platform_device(dev); - - of_device_unregister(pdev); - return 0; -} - -static int am335x_child_remove(struct platform_device *pdev) -{ - device_for_each_child(&pdev->dev, NULL, of_remove_populated_child); - pm_runtime_disable(&pdev->dev); - return 0; -} - static const struct of_device_id am335x_child_of_match[] = { { .compatible = "ti,am33xx-usb" }, { }, @@ -43,13 +28,17 @@ MODULE_DEVICE_TABLE(of, am335x_child_of_match); static struct platform_driver am335x_child_driver = { .probe = am335x_child_probe, - .remove = am335x_child_remove, .driver = { .name = "am335x-usb-childs", .of_match_table = of_match_ptr(am335x_child_of_match), }, }; -module_platform_driver(am335x_child_driver); +static int __init am335x_child_init(void) +{ + return platform_driver_register(&am335x_child_driver); +} +module_init(am335x_child_init); + MODULE_DESCRIPTION("AM33xx child devices"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index f7dca0b..2cca870 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -439,7 +439,6 @@ void musb_hnp_stop(struct musb *musb) static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, u8 devctl) { - struct usb_otg *otg = musb->xceiv->otg; irqreturn_t handled = IRQ_NONE; dev_dbg(musb->controller, "<== DevCtl=%02x, int_usb=0x%x\n", devctl, @@ -654,7 +653,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, break; case OTG_STATE_B_PERIPHERAL: musb_g_suspend(musb); - musb->is_active = otg->gadget->b_hnp_enable; + musb->is_active = musb->g.b_hnp_enable; if (musb->is_active) { musb->xceiv->state = OTG_STATE_B_WAIT_ACON; dev_dbg(musb->controller, "HNP: Setting timer for b_ase0_brst\n"); @@ -670,7 +669,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb, break; case OTG_STATE_A_HOST: musb->xceiv->state = OTG_STATE_A_SUSPEND; - musb->is_active = otg->host->b_hnp_enable; + musb->is_active = musb->hcd->self.b_hnp_enable; break; case OTG_STATE_B_HOST: /* Transition to B_PERIPHERAL, see 6.8.2.6 p 44 */ diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index 0c593af..77b475a 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -190,10 +190,11 @@ static enum hrtimer_restart cppi41_recheck_tx_req(struct hrtimer *timer) } } - if (!list_empty(&controller->early_tx_list)) { + if (!list_empty(&controller->early_tx_list) && + !hrtimer_is_queued(&controller->early_tx)) { ret = HRTIMER_RESTART; hrtimer_forward_now(&controller->early_tx, - ktime_set(0, 150 * NSEC_PER_USEC)); + ktime_set(0, 50 * NSEC_PER_USEC)); } spin_unlock_irqrestore(&musb->lock, flags); @@ -266,9 +267,11 @@ static void cppi41_dma_callback(void *private_data) } list_add_tail(&cppi41_channel->tx_check, &controller->early_tx_list); - if (!hrtimer_active(&controller->early_tx)) { + if (!hrtimer_is_queued(&controller->early_tx)) { + unsigned long usecs = cppi41_channel->total_len / 10; + hrtimer_start_range_ns(&controller->early_tx, - ktime_set(0, 140 * NSEC_PER_USEC), + ktime_set(0, usecs * NSEC_PER_USEC), 40 * NSEC_PER_USEC, HRTIMER_MODE_REL); } @@ -583,9 +586,9 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller) ret = of_property_read_string_index(np, "dma-names", i, &str); if (ret) goto err; - if (!strncmp(str, "tx", 2)) + if (strstarts(str, "tx")) is_tx = 1; - else if (!strncmp(str, "rx", 2)) + else if (strstarts(str, "rx")) is_tx = 0; else { dev_err(dev, "Wrong dmatype %s\n", str); diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c index 4c21679..05d1b20 100644 --- a/drivers/usb/musb/musb_debugfs.c +++ b/drivers/usb/musb/musb_debugfs.c @@ -194,30 +194,30 @@ static ssize_t musb_test_mode_write(struct file *file, if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; - if (!strncmp(buf, "force host", 9)) + if (strstarts(buf, "force host")) test = MUSB_TEST_FORCE_HOST; - if (!strncmp(buf, "fifo access", 11)) + if (strstarts(buf, "fifo access")) test = MUSB_TEST_FIFO_ACCESS; - if (!strncmp(buf, "force full-speed", 15)) + if (strstarts(buf, "force full-speed")) test = MUSB_TEST_FORCE_FS; - if (!strncmp(buf, "force high-speed", 15)) + if (strstarts(buf, "force high-speed")) test = MUSB_TEST_FORCE_HS; - if (!strncmp(buf, "test packet", 10)) { + if (strstarts(buf, "test packet")) { test = MUSB_TEST_PACKET; musb_load_testpacket(musb); } - if (!strncmp(buf, "test K", 6)) + if (strstarts(buf, "test K")) test = MUSB_TEST_K; - if (!strncmp(buf, "test J", 6)) + if (strstarts(buf, "test J")) test = MUSB_TEST_J; - if (!strncmp(buf, "test SE0 NAK", 12)) + if (strstarts(buf, "test SE0 NAK")) test = MUSB_TEST_SE0_NAK; musb_writeb(musb->mregs, MUSB_TESTMODE, test); diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 9a2b8c8..d73cda3 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2631,7 +2631,6 @@ void musb_host_cleanup(struct musb *musb) if (musb->port_mode == MUSB_PORT_MODE_GADGET) return; usb_remove_hcd(musb->hcd); - musb->hcd = NULL; } void musb_host_free(struct musb *musb) diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c index 59256b1..8264256 100644 --- a/drivers/usb/musb/ux500.c +++ b/drivers/usb/musb/ux500.c @@ -275,7 +275,6 @@ static int ux500_probe(struct platform_device *pdev) musb->dev.parent = &pdev->dev; musb->dev.dma_mask = &pdev->dev.coherent_dma_mask; musb->dev.coherent_dma_mask = pdev->dev.coherent_dma_mask; - musb->dev.of_node = pdev->dev.of_node; glue->dev = &pdev->dev; glue->musb = musb; diff --git a/drivers/usb/phy/phy-isp1301-omap.c b/drivers/usb/phy/phy-isp1301-omap.c index d3a5160..4a2ced0 100644 --- a/drivers/usb/phy/phy-isp1301-omap.c +++ b/drivers/usb/phy/phy-isp1301-omap.c @@ -1295,7 +1295,7 @@ isp1301_set_host(struct usb_otg *otg, struct usb_bus *host) return isp1301_otg_enable(isp); return 0; -#elif !defined(CONFIG_USB_GADGET_OMAP) +#elif !IS_ENABLED(CONFIG_USB_OMAP) // FIXME update its refcount otg->host = host; diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c index e9cb1cb..d85a782 100644 --- a/drivers/usb/phy/phy-tegra-usb.c +++ b/drivers/usb/phy/phy-tegra-usb.c @@ -881,8 +881,8 @@ static int utmi_phy_probe(struct tegra_usb_phy *tegra_phy, return -ENOMEM; } - tegra_phy->config = devm_kzalloc(&pdev->dev, - sizeof(*tegra_phy->config), GFP_KERNEL); + tegra_phy->config = devm_kzalloc(&pdev->dev, sizeof(*config), + GFP_KERNEL); if (!tegra_phy->config) { dev_err(&pdev->dev, "unable to allocate memory for USB UTMIP config\n"); diff --git a/drivers/usb/phy/phy-ulpi.c b/drivers/usb/phy/phy-ulpi.c index 217339d..17ea3f2 100644 --- a/drivers/usb/phy/phy-ulpi.c +++ b/drivers/usb/phy/phy-ulpi.c @@ -47,6 +47,8 @@ struct ulpi_info { static struct ulpi_info ulpi_ids[] = { ULPI_INFO(ULPI_ID(0x04cc, 0x1504), "NXP ISP1504"), ULPI_INFO(ULPI_ID(0x0424, 0x0006), "SMSC USB331x"), + ULPI_INFO(ULPI_ID(0x0424, 0x0007), "SMSC USB3320"), + ULPI_INFO(ULPI_ID(0x0451, 0x1507), "TI TUSB1210"), }; static int ulpi_set_otg_flags(struct usb_phy *phy) diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c index a9984c7..5d7966b 100644 --- a/drivers/usb/phy/phy.c +++ b/drivers/usb/phy/phy.c @@ -229,6 +229,9 @@ struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index) phy = __usb_find_phy_dev(dev, &phy_bind_list, index); if (IS_ERR(phy) || !try_module_get(phy->dev->driver->owner)) { pr_err("unable to find transceiver\n"); + if (!IS_ERR(phy)) + phy = ERR_PTR(-ENODEV); + goto err0; } diff --git a/drivers/usb/serial/bus.c b/drivers/usb/serial/bus.c index 6335490..74fc63b 100644 --- a/drivers/usb/serial/bus.c +++ b/drivers/usb/serial/bus.c @@ -97,13 +97,19 @@ static int usb_serial_device_remove(struct device *dev) struct usb_serial_port *port; int retval = 0; int minor; + int autopm_err; port = to_usb_serial_port(dev); if (!port) return -ENODEV; - /* make sure suspend/resume doesn't race against port_remove */ - usb_autopm_get_interface(port->serial->interface); + /* + * Make sure suspend/resume doesn't race against port_remove. + * + * Note that no further runtime PM callbacks will be made if + * autopm_get fails. + */ + autopm_err = usb_autopm_get_interface(port->serial->interface); minor = port->minor; tty_unregister_device(usb_serial_tty_driver, minor); @@ -117,7 +123,9 @@ static int usb_serial_device_remove(struct device *dev) dev_info(dev, "%s converter now disconnected from ttyUSB%d\n", driver->description, minor); - usb_autopm_put_interface(port->serial->interface); + if (!autopm_err) + usb_autopm_put_interface(port->serial->interface); + return retval; } diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index c69bb50..e4ce48c 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -47,6 +47,8 @@ static struct console usbcons; * ------------------------------------------------------------ */ +static const struct tty_operations usb_console_fake_tty_ops = { +}; /* * The parsing of the command line works exactly like the @@ -139,14 +141,18 @@ static int usb_console_setup(struct console *co, char *options) goto reset_open_count; } kref_init(&tty->kref); - tty_port_tty_set(&port->port, tty); tty->driver = usb_serial_tty_driver; tty->index = co->index; + init_ldsem(&tty->ldisc_sem); + INIT_LIST_HEAD(&tty->tty_files); + kref_get(&tty->driver->kref); + tty->ops = &usb_console_fake_tty_ops; if (tty_init_termios(tty)) { retval = -ENOMEM; dev_err(&port->dev, "no more memory\n"); - goto free_tty; + goto put_tty; } + tty_port_tty_set(&port->port, tty); } /* only call the device specific open if this @@ -164,7 +170,7 @@ static int usb_console_setup(struct console *co, char *options) serial->type->set_termios(tty, port, &dummy); tty_port_tty_set(&port->port, NULL); - kfree(tty); + tty_kref_put(tty); } set_bit(ASYNCB_INITIALIZED, &port->port.flags); } @@ -180,8 +186,8 @@ static int usb_console_setup(struct console *co, char *options) fail: tty_port_tty_set(&port->port, NULL); - free_tty: - kfree(tty); + put_tty: + tty_kref_put(tty); reset_open_count: port->port.count = 0; usb_autopm_put_interface(serial->interface); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 6987b53..b5fa609 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -104,6 +104,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */ { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */ { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ + { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ @@ -119,8 +120,12 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ + { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ + { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ @@ -152,7 +157,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ + { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ + { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ + { USB_DEVICE(0x1D6F, 0x0010) }, /* Seluxit ApS RF Dongle */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ { USB_DEVICE(0x1FB9, 0x0100) }, /* Lake Shore Model 121 Current Source */ @@ -858,9 +866,6 @@ static int cp210x_startup(struct usb_serial *serial) struct usb_host_interface *cur_altsetting; struct cp210x_serial_private *spriv; - /* cp210x buffers behave strangely unless device is reset */ - usb_reset_device(serial->dev); - spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); if (!spriv) return -ENOMEM; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b7f715f..00710ff 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -146,12 +146,14 @@ static struct ftdi_sio_quirk ftdi_8u2232c_quirk = { * /sys/bus/usb-serial/drivers/ftdi_sio/new_id and send a patch or report. */ static struct usb_device_id id_table_combined [] = { + { USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANUSB_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CANDAPTER_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_BM_ATOM_NANO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_NXTCAM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_EV3CON_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_0_PID) }, @@ -482,6 +484,39 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FD_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FE_PID) }, { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_01FF_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_4701_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9300_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9301_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9302_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9303_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9304_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9305_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9306_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9307_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9308_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9309_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930A_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930B_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930C_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930D_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930E_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_930F_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9310_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9311_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9312_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9313_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9314_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9315_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9316_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9317_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9318_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_9319_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931A_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931B_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931C_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931D_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931E_PID) }, + { USB_DEVICE(MTXORB_VID, MTXORB_FTDI_RANGE_931F_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PERLE_ULTRAPORT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PIEGROUP_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TNC_X_PID) }, @@ -581,6 +616,8 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_TAVIR_STK500_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TIAO_UMPA_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, /* * ELV devices: */ @@ -672,6 +709,10 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) }, { USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) }, + { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) }, + { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) }, + { USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) }, + { USB_DEVICE(XSENS_VID, XSENS_MTW_PID) }, { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) }, { USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) }, @@ -719,7 +760,8 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_ACG_HFDUAL_PID) }, { USB_DEVICE(FTDI_VID, FTDI_YEI_SERVOCENTER31_PID) }, { USB_DEVICE(FTDI_VID, FTDI_THORLABS_PID) }, - { USB_DEVICE(TESTO_VID, TESTO_USB_INTERFACE_PID) }, + { USB_DEVICE(TESTO_VID, TESTO_1_PID) }, + { USB_DEVICE(TESTO_VID, TESTO_3_PID) }, { USB_DEVICE(FTDI_VID, FTDI_GAMMA_SCOUT_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13M_PID) }, { USB_DEVICE(FTDI_VID, FTDI_TACTRIX_OPENPORT_13S_PID) }, @@ -736,6 +778,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID), .driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk }, { USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) }, + { USB_DEVICE(NOVITUS_VID, NOVITUS_BONO_E_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) }, { USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) }, @@ -910,6 +953,45 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, /* Cressi Devices */ { USB_DEVICE(FTDI_VID, FTDI_CRESSI_PID) }, + /* Brainboxes Devices */ + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_001_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_012_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_4_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_5_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_357_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_606_3_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_701_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_1_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_2_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_3_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_842_4_PID) }, + /* ekey Devices */ + { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) }, + /* Infineon Devices */ + { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) }, + /* GE Healthcare devices */ + { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) }, { } /* Terminating entry */ }; @@ -1532,14 +1614,17 @@ static void ftdi_set_max_packet_size(struct usb_serial_port *port) struct usb_device *udev = serial->dev; struct usb_interface *interface = serial->interface; - struct usb_endpoint_descriptor *ep_desc = &interface->cur_altsetting->endpoint[1].desc; + struct usb_endpoint_descriptor *ep_desc; unsigned num_endpoints; - int i; + unsigned i; num_endpoints = interface->cur_altsetting->desc.bNumEndpoints; dev_info(&udev->dev, "Number of endpoints %d\n", num_endpoints); + if (!num_endpoints) + return; + /* NOTE: some customers have programmed FT232R/FT245R devices * with an endpoint size of 0 - not good. In this case, we * want to override the endpoint descriptor setting and use a diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index e599fbf..e52409c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -30,6 +30,12 @@ /*** third-party PIDs (using FTDI_VID) ***/ +/* + * Certain versions of the official Windows FTDI driver reprogrammed + * counterfeit FTDI devices to PID 0. Support these devices anyway. + */ +#define FTDI_BRICK_PID 0x0000 + #define FTDI_LUMEL_PD12_PID 0x6002 /* @@ -42,6 +48,8 @@ /* www.candapter.com Ewert Energy Systems CANdapter device */ #define FTDI_CANDAPTER_PID 0x9F80 /* Product Id */ +#define FTDI_BM_ATOM_NANO_PID 0xa559 /* Basic Micro ATOM Nano USB2Serial */ + /* * Texas Instruments XDS100v2 JTAG / BeagleBone A3 * http://processors.wiki.ti.com/index.php/XDS100 @@ -140,12 +148,19 @@ /* * Xsens Technologies BV products (http://www.xsens.com). */ -#define XSENS_CONVERTER_0_PID 0xD388 -#define XSENS_CONVERTER_1_PID 0xD389 +#define XSENS_VID 0x2639 +#define XSENS_AWINDA_STATION_PID 0x0101 +#define XSENS_AWINDA_DONGLE_PID 0x0102 +#define XSENS_MTW_PID 0x0200 /* Xsens MTw */ +#define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */ + +/* Xsens devices using FTDI VID */ +#define XSENS_CONVERTER_0_PID 0xD388 /* Xsens USB converter */ +#define XSENS_CONVERTER_1_PID 0xD389 /* Xsens Wireless Receiver */ #define XSENS_CONVERTER_2_PID 0xD38A -#define XSENS_CONVERTER_3_PID 0xD38B -#define XSENS_CONVERTER_4_PID 0xD38C -#define XSENS_CONVERTER_5_PID 0xD38D +#define XSENS_CONVERTER_3_PID 0xD38B /* Xsens USB-serial converter */ +#define XSENS_CONVERTER_4_PID 0xD38C /* Xsens Wireless Receiver */ +#define XSENS_CONVERTER_5_PID 0xD38D /* Xsens Awinda Station */ #define XSENS_CONVERTER_6_PID 0xD38E #define XSENS_CONVERTER_7_PID 0xD38F @@ -538,6 +553,11 @@ */ #define FTDI_TIAO_UMPA_PID 0x8a98 /* TIAO/DIYGADGET USB Multi-Protocol Adapter */ +/* + * NovaTech product ids (FTDI_VID) + */ +#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ + /********************************/ /** third-party VID/PID combos **/ @@ -579,6 +599,12 @@ #define RATOC_PRODUCT_ID_USB60F 0xb020 /* + * Infineon Technologies + */ +#define INFINEON_VID 0x058b +#define INFINEON_TRIBOARD_PID 0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */ + +/* * Acton Research Corp. */ #define ACTON_VID 0x0647 /* Vendor ID */ @@ -793,7 +819,8 @@ * Submitted by Colin Leroy */ #define TESTO_VID 0x128D -#define TESTO_USB_INTERFACE_PID 0x0001 +#define TESTO_1_PID 0x0001 +#define TESTO_3_PID 0x0003 /* * Mobility Electronics products. @@ -820,6 +847,12 @@ #define TELLDUS_TELLSTICK_PID 0x0C30 /* RF control dongle 433 MHz using FT232RL */ /* + * NOVITUS printers + */ +#define NOVITUS_VID 0x1a28 +#define NOVITUS_BONO_E_PID 0x6010 + +/* * RT Systems programming cables for various ham radios */ #define RTSYSTEMS_VID 0x2100 /* Vendor ID */ @@ -893,8 +926,8 @@ #define BAYER_CONTOUR_CABLE_PID 0x6001 /* - * The following are the values for the Matrix Orbital FTDI Range - * Anything in this range will use an FT232RL. + * Matrix Orbital Intelligent USB displays. + * http://www.matrixorbital.com */ #define MTXORB_VID 0x1B3D #define MTXORB_FTDI_RANGE_0100_PID 0x0100 @@ -1153,8 +1186,39 @@ #define MTXORB_FTDI_RANGE_01FD_PID 0x01FD #define MTXORB_FTDI_RANGE_01FE_PID 0x01FE #define MTXORB_FTDI_RANGE_01FF_PID 0x01FF - - +#define MTXORB_FTDI_RANGE_4701_PID 0x4701 +#define MTXORB_FTDI_RANGE_9300_PID 0x9300 +#define MTXORB_FTDI_RANGE_9301_PID 0x9301 +#define MTXORB_FTDI_RANGE_9302_PID 0x9302 +#define MTXORB_FTDI_RANGE_9303_PID 0x9303 +#define MTXORB_FTDI_RANGE_9304_PID 0x9304 +#define MTXORB_FTDI_RANGE_9305_PID 0x9305 +#define MTXORB_FTDI_RANGE_9306_PID 0x9306 +#define MTXORB_FTDI_RANGE_9307_PID 0x9307 +#define MTXORB_FTDI_RANGE_9308_PID 0x9308 +#define MTXORB_FTDI_RANGE_9309_PID 0x9309 +#define MTXORB_FTDI_RANGE_930A_PID 0x930A +#define MTXORB_FTDI_RANGE_930B_PID 0x930B +#define MTXORB_FTDI_RANGE_930C_PID 0x930C +#define MTXORB_FTDI_RANGE_930D_PID 0x930D +#define MTXORB_FTDI_RANGE_930E_PID 0x930E +#define MTXORB_FTDI_RANGE_930F_PID 0x930F +#define MTXORB_FTDI_RANGE_9310_PID 0x9310 +#define MTXORB_FTDI_RANGE_9311_PID 0x9311 +#define MTXORB_FTDI_RANGE_9312_PID 0x9312 +#define MTXORB_FTDI_RANGE_9313_PID 0x9313 +#define MTXORB_FTDI_RANGE_9314_PID 0x9314 +#define MTXORB_FTDI_RANGE_9315_PID 0x9315 +#define MTXORB_FTDI_RANGE_9316_PID 0x9316 +#define MTXORB_FTDI_RANGE_9317_PID 0x9317 +#define MTXORB_FTDI_RANGE_9318_PID 0x9318 +#define MTXORB_FTDI_RANGE_9319_PID 0x9319 +#define MTXORB_FTDI_RANGE_931A_PID 0x931A +#define MTXORB_FTDI_RANGE_931B_PID 0x931B +#define MTXORB_FTDI_RANGE_931C_PID 0x931C +#define MTXORB_FTDI_RANGE_931D_PID 0x931D +#define MTXORB_FTDI_RANGE_931E_PID 0x931E +#define MTXORB_FTDI_RANGE_931F_PID 0x931F /* * The Mobility Lab (TML) @@ -1326,3 +1390,51 @@ * Manufacturer: Cressi */ #define FTDI_CRESSI_PID 0x87d0 + +/* + * Brainboxes devices + */ +#define BRAINBOXES_VID 0x05d1 +#define BRAINBOXES_VX_001_PID 0x1001 /* VX-001 ExpressCard 1 Port RS232 */ +#define BRAINBOXES_VX_012_PID 0x1002 /* VX-012 ExpressCard 2 Port RS232 */ +#define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */ +#define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */ +#define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */ +#define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */ +#define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */ +#define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */ +#define BRAINBOXES_US_606_3_PID 0x2003 /* US-606 6 Port RS232 Serial Port 4 and 6 */ +#define BRAINBOXES_US_701_1_PID 0x2011 /* US-701 4xRS232 1Mbaud Port 1 and 2 */ +#define BRAINBOXES_US_701_2_PID 0x2012 /* US-701 4xRS422 1Mbaud Port 3 and 4 */ +#define BRAINBOXES_US_279_1_PID 0x2021 /* US-279 8xRS422 1Mbaud Port 1 and 2 */ +#define BRAINBOXES_US_279_2_PID 0x2022 /* US-279 8xRS422 1Mbaud Port 3 and 4 */ +#define BRAINBOXES_US_279_3_PID 0x2023 /* US-279 8xRS422 1Mbaud Port 5 and 6 */ +#define BRAINBOXES_US_279_4_PID 0x2024 /* US-279 8xRS422 1Mbaud Port 7 and 8 */ +#define BRAINBOXES_US_346_1_PID 0x3011 /* US-346 4xRS422/485 1Mbaud Port 1 and 2 */ +#define BRAINBOXES_US_346_2_PID 0x3012 /* US-346 4xRS422/485 1Mbaud Port 3 and 4 */ +#define BRAINBOXES_US_257_PID 0x5001 /* US-257 2xRS232 1Mbaud */ +#define BRAINBOXES_US_313_PID 0x6001 /* US-313 2xRS422/485 1Mbaud */ +#define BRAINBOXES_US_357_PID 0x7001 /* US_357 1xRS232/422/485 */ +#define BRAINBOXES_US_842_1_PID 0x8001 /* US-842 8xRS422/485 1Mbaud Port 1 and 2 */ +#define BRAINBOXES_US_842_2_PID 0x8002 /* US-842 8xRS422/485 1Mbaud Port 3 and 4 */ +#define BRAINBOXES_US_842_3_PID 0x8003 /* US-842 8xRS422/485 1Mbaud Port 5 and 6 */ +#define BRAINBOXES_US_842_4_PID 0x8004 /* US-842 8xRS422/485 1Mbaud Port 7 and 8 */ +#define BRAINBOXES_US_160_1_PID 0x9001 /* US-160 16xRS232 1Mbaud Port 1 and 2 */ +#define BRAINBOXES_US_160_2_PID 0x9002 /* US-160 16xRS232 1Mbaud Port 3 and 4 */ +#define BRAINBOXES_US_160_3_PID 0x9003 /* US-160 16xRS232 1Mbaud Port 5 and 6 */ +#define BRAINBOXES_US_160_4_PID 0x9004 /* US-160 16xRS232 1Mbaud Port 7 and 8 */ +#define BRAINBOXES_US_160_5_PID 0x9005 /* US-160 16xRS232 1Mbaud Port 9 and 10 */ +#define BRAINBOXES_US_160_6_PID 0x9006 /* US-160 16xRS232 1Mbaud Port 11 and 12 */ +#define BRAINBOXES_US_160_7_PID 0x9007 /* US-160 16xRS232 1Mbaud Port 13 and 14 */ +#define BRAINBOXES_US_160_8_PID 0x9008 /* US-160 16xRS232 1Mbaud Port 15 and 16 */ + +/* + * ekey biometric systems GmbH (http://ekey.net/) + */ +#define FTDI_EKEY_CONV_USB_PID 0xCB08 /* Converter USB */ + +/* + * GE Healthcare devices + */ +#define GE_HEALTHCARE_VID 0x1901 +#define GE_HEALTHCARE_NEMO_TRACKER_PID 0x0015 diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index b7187bf..0385bc4 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/serial.h> +#include <linux/swab.h> #include <linux/kfifo.h> #include <linux/ioctl.h> #include <linux/firmware.h> @@ -281,7 +282,7 @@ static int read_download_mem(struct usb_device *dev, int start_address, { int status = 0; __u8 read_length; - __be16 be_start_address; + u16 be_start_address; dev_dbg(&dev->dev, "%s - @ %x for %d\n", __func__, start_address, length); @@ -297,10 +298,14 @@ static int read_download_mem(struct usb_device *dev, int start_address, if (read_length > 1) { dev_dbg(&dev->dev, "%s - @ %x for %d\n", __func__, start_address, read_length); } - be_start_address = cpu_to_be16(start_address); + /* + * NOTE: Must use swab as wIndex is sent in little-endian + * byte order regardless of host byte order. + */ + be_start_address = swab16((u16)start_address); status = ti_vread_sync(dev, UMPC_MEMORY_READ, (__u16)address_type, - (__force __u16)be_start_address, + be_start_address, buffer, read_length); if (status) { @@ -397,7 +402,7 @@ static int write_i2c_mem(struct edgeport_serial *serial, struct device *dev = &serial->serial->dev->dev; int status = 0; int write_length; - __be16 be_start_address; + u16 be_start_address; /* We can only send a maximum of 1 aligned byte page at a time */ @@ -412,11 +417,16 @@ static int write_i2c_mem(struct edgeport_serial *serial, __func__, start_address, write_length); usb_serial_debug_data(dev, __func__, write_length, buffer); - /* Write first page */ - be_start_address = cpu_to_be16(start_address); + /* + * Write first page. + * + * NOTE: Must use swab as wIndex is sent in little-endian byte order + * regardless of host byte order. + */ + be_start_address = swab16((u16)start_address); status = ti_vsend_sync(serial->serial->dev, UMPC_MEMORY_WRITE, (__u16)address_type, - (__force __u16)be_start_address, + be_start_address, buffer, write_length); if (status) { dev_dbg(dev, "%s - ERROR %d\n", __func__, status); @@ -439,11 +449,16 @@ static int write_i2c_mem(struct edgeport_serial *serial, __func__, start_address, write_length); usb_serial_debug_data(dev, __func__, write_length, buffer); - /* Write next page */ - be_start_address = cpu_to_be16(start_address); + /* + * Write next page. + * + * NOTE: Must use swab as wIndex is sent in little-endian byte + * order regardless of host byte order. + */ + be_start_address = swab16((u16)start_address); status = ti_vsend_sync(serial->serial->dev, UMPC_MEMORY_WRITE, (__u16)address_type, - (__force __u16)be_start_address, + be_start_address, buffer, write_length); if (status) { dev_err(dev, "%s - ERROR %d\n", __func__, status); @@ -590,8 +605,8 @@ static int get_descriptor_addr(struct edgeport_serial *serial, if (rom_desc->Type == desc_type) return start_address; - start_address = start_address + sizeof(struct ti_i2c_desc) - + rom_desc->Size; + start_address = start_address + sizeof(struct ti_i2c_desc) + + le16_to_cpu(rom_desc->Size); } while ((start_address < TI_MAX_I2C_SIZE) && rom_desc->Type); @@ -604,7 +619,7 @@ static int valid_csum(struct ti_i2c_desc *rom_desc, __u8 *buffer) __u16 i; __u8 cs = 0; - for (i = 0; i < rom_desc->Size; i++) + for (i = 0; i < le16_to_cpu(rom_desc->Size); i++) cs = (__u8)(cs + buffer[i]); if (cs != rom_desc->CheckSum) { @@ -658,7 +673,7 @@ static int check_i2c_image(struct edgeport_serial *serial) break; if ((start_address + sizeof(struct ti_i2c_desc) + - rom_desc->Size) > TI_MAX_I2C_SIZE) { + le16_to_cpu(rom_desc->Size)) > TI_MAX_I2C_SIZE) { status = -ENODEV; dev_dbg(dev, "%s - structure too big, erroring out.\n", __func__); break; @@ -673,7 +688,8 @@ static int check_i2c_image(struct edgeport_serial *serial) /* Read the descriptor data */ status = read_rom(serial, start_address + sizeof(struct ti_i2c_desc), - rom_desc->Size, buffer); + le16_to_cpu(rom_desc->Size), + buffer); if (status) break; @@ -682,7 +698,7 @@ static int check_i2c_image(struct edgeport_serial *serial) break; } start_address = start_address + sizeof(struct ti_i2c_desc) + - rom_desc->Size; + le16_to_cpu(rom_desc->Size); } while ((rom_desc->Type != I2C_DESC_TYPE_ION) && (start_address < TI_MAX_I2C_SIZE)); @@ -721,7 +737,7 @@ static int get_manuf_info(struct edgeport_serial *serial, __u8 *buffer) /* Read the descriptor data */ status = read_rom(serial, start_address+sizeof(struct ti_i2c_desc), - rom_desc->Size, buffer); + le16_to_cpu(rom_desc->Size), buffer); if (status) goto exit; @@ -816,7 +832,7 @@ static int build_i2c_fw_hdr(__u8 *header, struct device *dev) firmware_rec = (struct ti_i2c_firmware_rec*)i2c_header->Data; i2c_header->Type = I2C_DESC_TYPE_FIRMWARE_BLANK; - i2c_header->Size = (__u16)buffer_size; + i2c_header->Size = cpu_to_le16(buffer_size); i2c_header->CheckSum = cs; firmware_rec->Ver_Major = OperationalMajorVersion; firmware_rec->Ver_Minor = OperationalMinorVersion; diff --git a/drivers/usb/serial/io_usbvend.h b/drivers/usb/serial/io_usbvend.h index 51f83fb..6f6a856 100644 --- a/drivers/usb/serial/io_usbvend.h +++ b/drivers/usb/serial/io_usbvend.h @@ -594,7 +594,7 @@ struct edge_boot_descriptor { struct ti_i2c_desc { __u8 Type; // Type of descriptor - __u16 Size; // Size of data only not including header + __le16 Size; // Size of data only not including header __u8 CheckSum; // Checksum (8 bit sum of data only) __u8 Data[0]; // Data starts here } __attribute__((packed)); diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index d6960ae..e58e21b 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -312,24 +312,30 @@ static void usa26_indat_callback(struct urb *urb) if ((data[0] & 0x80) == 0) { /* no errors on individual bytes, only possible overrun err */ - if (data[0] & RXERROR_OVERRUN) - err = TTY_OVERRUN; - else - err = 0; + if (data[0] & RXERROR_OVERRUN) { + tty_insert_flip_char(&port->port, 0, + TTY_OVERRUN); + } for (i = 1; i < urb->actual_length ; ++i) - tty_insert_flip_char(&port->port, data[i], err); + tty_insert_flip_char(&port->port, data[i], + TTY_NORMAL); } else { /* some bytes had errors, every byte has status */ dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__); for (i = 0; i + 1 < urb->actual_length; i += 2) { - int stat = data[i], flag = 0; - if (stat & RXERROR_OVERRUN) - flag |= TTY_OVERRUN; - if (stat & RXERROR_FRAMING) - flag |= TTY_FRAME; - if (stat & RXERROR_PARITY) - flag |= TTY_PARITY; + int stat = data[i]; + int flag = TTY_NORMAL; + + if (stat & RXERROR_OVERRUN) { + tty_insert_flip_char(&port->port, 0, + TTY_OVERRUN); + } /* XXX should handle break (0x10) */ + if (stat & RXERROR_PARITY) + flag = TTY_PARITY; + else if (stat & RXERROR_FRAMING) + flag = TTY_FRAME; + tty_insert_flip_char(&port->port, data[i+1], flag); } @@ -416,6 +422,8 @@ static void usa26_instat_callback(struct urb *urb) } port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -426,7 +434,7 @@ static void usa26_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -536,6 +544,8 @@ static void usa28_instat_callback(struct urb *urb) } port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -546,7 +556,7 @@ static void usa28_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -619,6 +629,8 @@ static void usa49_instat_callback(struct urb *urb) } port = serial->port[msg->portNumber]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -629,7 +641,7 @@ static void usa49_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -667,14 +679,19 @@ static void usa49_indat_callback(struct urb *urb) } else { /* some bytes had errors, every byte has status */ for (i = 0; i + 1 < urb->actual_length; i += 2) { - int stat = data[i], flag = 0; - if (stat & RXERROR_OVERRUN) - flag |= TTY_OVERRUN; - if (stat & RXERROR_FRAMING) - flag |= TTY_FRAME; - if (stat & RXERROR_PARITY) - flag |= TTY_PARITY; + int stat = data[i]; + int flag = TTY_NORMAL; + + if (stat & RXERROR_OVERRUN) { + tty_insert_flip_char(&port->port, 0, + TTY_OVERRUN); + } /* XXX should handle break (0x10) */ + if (stat & RXERROR_PARITY) + flag = TTY_PARITY; + else if (stat & RXERROR_FRAMING) + flag = TTY_FRAME; + tty_insert_flip_char(&port->port, data[i+1], flag); } @@ -731,15 +748,19 @@ static void usa49wg_indat_callback(struct urb *urb) */ for (x = 0; x + 1 < len && i + 1 < urb->actual_length; x += 2) { - int stat = data[i], flag = 0; + int stat = data[i]; + int flag = TTY_NORMAL; - if (stat & RXERROR_OVERRUN) - flag |= TTY_OVERRUN; - if (stat & RXERROR_FRAMING) - flag |= TTY_FRAME; - if (stat & RXERROR_PARITY) - flag |= TTY_PARITY; + if (stat & RXERROR_OVERRUN) { + tty_insert_flip_char(&port->port, 0, + TTY_OVERRUN); + } /* XXX should handle break (0x10) */ + if (stat & RXERROR_PARITY) + flag = TTY_PARITY; + else if (stat & RXERROR_FRAMING) + flag = TTY_FRAME; + tty_insert_flip_char(&port->port, data[i+1], flag); i += 2; @@ -791,25 +812,31 @@ static void usa90_indat_callback(struct urb *urb) if ((data[0] & 0x80) == 0) { /* no errors on individual bytes, only possible overrun err*/ - if (data[0] & RXERROR_OVERRUN) - err = TTY_OVERRUN; - else - err = 0; + if (data[0] & RXERROR_OVERRUN) { + tty_insert_flip_char(&port->port, 0, + TTY_OVERRUN); + } for (i = 1; i < urb->actual_length ; ++i) tty_insert_flip_char(&port->port, - data[i], err); + data[i], TTY_NORMAL); } else { /* some bytes had errors, every byte has status */ dev_dbg(&port->dev, "%s - RX error!!!!\n", __func__); for (i = 0; i + 1 < urb->actual_length; i += 2) { - int stat = data[i], flag = 0; - if (stat & RXERROR_OVERRUN) - flag |= TTY_OVERRUN; - if (stat & RXERROR_FRAMING) - flag |= TTY_FRAME; - if (stat & RXERROR_PARITY) - flag |= TTY_PARITY; + int stat = data[i]; + int flag = TTY_NORMAL; + + if (stat & RXERROR_OVERRUN) { + tty_insert_flip_char( + &port->port, 0, + TTY_OVERRUN); + } /* XXX should handle break (0x10) */ + if (stat & RXERROR_PARITY) + flag = TTY_PARITY; + else if (stat & RXERROR_FRAMING) + flag = TTY_FRAME; + tty_insert_flip_char(&port->port, data[i+1], flag); } @@ -852,6 +879,8 @@ static void usa90_instat_callback(struct urb *urb) port = serial->port[0]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -862,7 +891,7 @@ static void usa90_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -923,6 +952,8 @@ static void usa67_instat_callback(struct urb *urb) port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -931,7 +962,7 @@ static void usa67_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index 78b48c3..efa75b4 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -336,7 +336,8 @@ static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, port->interrupt_out_urb->transfer_buffer_length = length; priv->cur_pos = priv->cur_pos + length; - result = usb_submit_urb(port->interrupt_out_urb, GFP_NOIO); + result = usb_submit_urb(port->interrupt_out_urb, + GFP_ATOMIC); dev_dbg(&port->dev, "%s - Send write URB returns: %i\n", __func__, result); todo = priv->filled - priv->cur_pos; @@ -351,7 +352,7 @@ static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, if (priv->device_type == KOBIL_ADAPTER_B_PRODUCT_ID || priv->device_type == KOBIL_ADAPTER_K_PRODUCT_ID) { result = usb_submit_urb(port->interrupt_in_urb, - GFP_NOIO); + GFP_ATOMIC); dev_dbg(&port->dev, "%s - Send read URB returns: %i\n", __func__, result); } } diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index cbe779f..df495ea 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -219,7 +219,7 @@ static int opticon_write(struct tty_struct *tty, struct usb_serial_port *port, /* The conncected devices do not have a bulk write endpoint, * to transmit data to de barcode device the control endpoint is used */ - dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_NOIO); + dr = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC); if (!dr) { dev_err(&port->dev, "out of memory\n"); count = -ENOMEM; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 68fc9fe..8b34841 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -161,6 +161,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 +#define NOVATELWIRELESS_PRODUCT_E371 0x9011 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -234,8 +235,31 @@ static void option_instat_callback(struct urb *urb); #define QUALCOMM_VENDOR_ID 0x05C6 #define CMOTECH_VENDOR_ID 0x16d8 -#define CMOTECH_PRODUCT_6008 0x6008 -#define CMOTECH_PRODUCT_6280 0x6280 +#define CMOTECH_PRODUCT_6001 0x6001 +#define CMOTECH_PRODUCT_CMU_300 0x6002 +#define CMOTECH_PRODUCT_6003 0x6003 +#define CMOTECH_PRODUCT_6004 0x6004 +#define CMOTECH_PRODUCT_6005 0x6005 +#define CMOTECH_PRODUCT_CGU_628A 0x6006 +#define CMOTECH_PRODUCT_CHE_628S 0x6007 +#define CMOTECH_PRODUCT_CMU_301 0x6008 +#define CMOTECH_PRODUCT_CHU_628 0x6280 +#define CMOTECH_PRODUCT_CHU_628S 0x6281 +#define CMOTECH_PRODUCT_CDU_680 0x6803 +#define CMOTECH_PRODUCT_CDU_685A 0x6804 +#define CMOTECH_PRODUCT_CHU_720S 0x7001 +#define CMOTECH_PRODUCT_7002 0x7002 +#define CMOTECH_PRODUCT_CHU_629K 0x7003 +#define CMOTECH_PRODUCT_7004 0x7004 +#define CMOTECH_PRODUCT_7005 0x7005 +#define CMOTECH_PRODUCT_CGU_629 0x7006 +#define CMOTECH_PRODUCT_CHU_629S 0x700a +#define CMOTECH_PRODUCT_CHU_720I 0x7211 +#define CMOTECH_PRODUCT_7212 0x7212 +#define CMOTECH_PRODUCT_7213 0x7213 +#define CMOTECH_PRODUCT_7251 0x7251 +#define CMOTECH_PRODUCT_7252 0x7252 +#define CMOTECH_PRODUCT_7253 0x7253 #define TELIT_VENDOR_ID 0x1bc7 #define TELIT_PRODUCT_UC864E 0x1003 @@ -243,15 +267,21 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_CC864_DUAL 0x1005 #define TELIT_PRODUCT_CC864_SINGLE 0x1006 #define TELIT_PRODUCT_DE910_DUAL 0x1010 +#define TELIT_PRODUCT_UE910_V2 0x1012 #define TELIT_PRODUCT_LE920 0x1200 +#define TELIT_PRODUCT_LE910 0x1201 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 #define ZTE_PRODUCT_MF622 0x0001 #define ZTE_PRODUCT_MF628 0x0015 #define ZTE_PRODUCT_MF626 0x0031 -#define ZTE_PRODUCT_MC2718 0xffe8 #define ZTE_PRODUCT_AC2726 0xfff1 +#define ZTE_PRODUCT_CDMA_TECH 0xfffe +#define ZTE_PRODUCT_AC8710T 0xffff +#define ZTE_PRODUCT_MC2718 0xffe8 +#define ZTE_PRODUCT_AD3812 0xffeb +#define ZTE_PRODUCT_MC2716 0xffed #define BENQ_VENDOR_ID 0x04a5 #define BENQ_PRODUCT_H10 0x4068 @@ -286,6 +316,7 @@ static void option_instat_callback(struct urb *urb); #define ALCATEL_PRODUCT_X060S_X200 0x0000 #define ALCATEL_PRODUCT_X220_X500D 0x0017 #define ALCATEL_PRODUCT_L100V 0x011e +#define ALCATEL_PRODUCT_L800MA 0x0203 #define PIRELLI_VENDOR_ID 0x1266 #define PIRELLI_PRODUCT_C100_1 0x1002 @@ -326,8 +357,12 @@ static void option_instat_callback(struct urb *urb); /* Zoom */ #define ZOOM_PRODUCT_4597 0x9607 +/* SpeedUp SU9800 usb 3g modem */ +#define SPEEDUP_PRODUCT_SU9800 0x9800 + /* Haier products */ #define HAIER_VENDOR_ID 0x201e +#define HAIER_PRODUCT_CE81B 0x10f8 #define HAIER_PRODUCT_CE100 0x2009 /* Cinterion (formerly Siemens) products */ @@ -346,8 +381,13 @@ static void option_instat_callback(struct urb *urb); /* Olivetti products */ #define OLIVETTI_VENDOR_ID 0x0b3c #define OLIVETTI_PRODUCT_OLICARD100 0xc000 +#define OLIVETTI_PRODUCT_OLICARD120 0xc001 +#define OLIVETTI_PRODUCT_OLICARD140 0xc002 #define OLIVETTI_PRODUCT_OLICARD145 0xc003 +#define OLIVETTI_PRODUCT_OLICARD155 0xc004 #define OLIVETTI_PRODUCT_OLICARD200 0xc005 +#define OLIVETTI_PRODUCT_OLICARD160 0xc00a +#define OLIVETTI_PRODUCT_OLICARD500 0xc00b /* Celot products */ #define CELOT_VENDOR_ID 0x211f @@ -460,6 +500,10 @@ static void option_instat_callback(struct urb *urb); #define INOVIA_VENDOR_ID 0x20a6 #define INOVIA_SEW858 0x1105 +/* VIA Telecom */ +#define VIATELECOM_VENDOR_ID 0x15eb +#define VIATELECOM_PRODUCT_CDS7 0x0001 + /* some devices interfaces need special handling due to a number of reasons */ enum option_blacklist_reason { OPTION_BLACKLIST_NONE = 0, @@ -493,14 +537,26 @@ static const struct option_blacklist_info zte_k3765_z_blacklist = { .reserved = BIT(4), }; +static const struct option_blacklist_info zte_ad3812_z_blacklist = { + .sendsetup = BIT(0) | BIT(1) | BIT(2), +}; + static const struct option_blacklist_info zte_mc2718_z_blacklist = { .sendsetup = BIT(1) | BIT(2) | BIT(3) | BIT(4), }; +static const struct option_blacklist_info zte_mc2716_z_blacklist = { + .sendsetup = BIT(1) | BIT(2) | BIT(3), +}; + static const struct option_blacklist_info huawei_cdc12_blacklist = { .reserved = BIT(1) | BIT(2), }; +static const struct option_blacklist_info net_intf0_blacklist = { + .reserved = BIT(0), +}; + static const struct option_blacklist_info net_intf1_blacklist = { .reserved = BIT(1), }; @@ -534,6 +590,11 @@ static const struct option_blacklist_info zte_1255_blacklist = { .reserved = BIT(3) | BIT(4), }; +static const struct option_blacklist_info telit_le910_blacklist = { + .sendsetup = BIT(0), + .reserved = BIT(1) | BIT(2), +}; + static const struct option_blacklist_info telit_le920_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(5), @@ -982,6 +1043,7 @@ static const struct usb_device_id option_ids[] = { /* Novatel Ovation MC551 a.k.a. Verizon USB551L */ { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, @@ -1031,16 +1093,59 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(BANDRICH_VENDOR_ID, BANDRICH_PRODUCT_1012, 0xff) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ - { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */ - { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6004) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6005) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CGU_628A) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHE_628S), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_301), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_628), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_628S) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CDU_680) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CDU_685A) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_720S), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7002), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_629K), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7004), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7005) }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CGU_629), + .driver_info = (kernel_ulong_t)&net_intf5_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_629S), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CHU_720I), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7212), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7213), + .driver_info = (kernel_ulong_t)&net_intf0_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7251), + .driver_info = (kernel_ulong_t)&net_intf1_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7252), + .driver_info = (kernel_ulong_t)&net_intf1_blacklist }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_7253), + .driver_info = (kernel_ulong_t)&net_intf1_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864E) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UC864G) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_DUAL) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), + .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ @@ -1408,6 +1513,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1426, 0xff, 0xff, 0xff), /* ZTE MF91 */ .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ + .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, @@ -1463,13 +1570,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff93, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff94, 0xff, 0xff, 0xff) }, - /* NOTE: most ZTE CDMA devices should be driven by zte_ev, not option */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_CDMA_TECH, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC8710T, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2718, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&zte_mc2718_z_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AD3812, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&zte_ad3812_z_blacklist }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MC2716, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&zte_mc2716_z_blacklist }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x01) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x02, 0x05) }, { USB_VENDOR_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0xff, 0x86, 0x10) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_AC2726, 0xff, 0xff, 0xff) }, { USB_DEVICE(BENQ_VENDOR_ID, BENQ_PRODUCT_H10) }, { USB_DEVICE(DLINK_VENDOR_ID, DLINK_PRODUCT_DWM_652) }, @@ -1498,14 +1610,18 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf5_blacklist }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_L100V), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_L800MA), + .driver_info = (kernel_ulong_t)&net_intf2_blacklist }, { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), .driver_info = (kernel_ulong_t)&four_g_w14_blacklist }, + { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, { USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) }, + { USB_DEVICE_AND_INTERFACE_INFO(HAIER_VENDOR_ID, HAIER_PRODUCT_CE81B, 0xff, 0xff, 0xff) }, /* Pirelli */ { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_1, 0xff) }, { USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_2, 0xff) }, @@ -1537,12 +1653,21 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDMNET) }, { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) }, /* HC28 enumerates with Siemens or Cinterion VID depending on FW revision */ { USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) }, - - { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD120), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD140), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD155), + .driver_info = (kernel_ulong_t)&net_intf6_blacklist }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200), - .driver_info = (kernel_ulong_t)&net_intf6_blacklist - }, + .driver_info = (kernel_ulong_t)&net_intf6_blacklist }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD160), + .driver_info = (kernel_ulong_t)&net_intf6_blacklist }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD500), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, @@ -1631,6 +1756,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, + { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); @@ -1824,6 +1950,8 @@ static void option_instat_callback(struct urb *urb) dev_dbg(dev, "%s: type %x req %x\n", __func__, req_pkt->bRequestType, req_pkt->bRequest); } + } else if (status == -ENOENT || status == -ESHUTDOWN) { + dev_dbg(dev, "%s: urb stopped: %d\n", __func__, status); } else dev_err(dev, "%s: error %d\n", __func__, status); @@ -1848,6 +1976,7 @@ static int option_send_setup(struct usb_serial_port *port) struct option_private *priv = intfdata->private; struct usb_wwan_port_private *portdata; int val = 0; + int res; portdata = usb_get_serial_port_data(port); @@ -1856,9 +1985,17 @@ static int option_send_setup(struct usb_serial_port *port) if (portdata->rts_state) val |= 0x02; - return usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + res = usb_autopm_get_interface(serial->interface); + if (res) + return res; + + res = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), 0x22, 0x21, val, priv->bInterfaceNumber, NULL, 0, USB_CTRL_SET_TIMEOUT); + + usb_autopm_put_interface(serial->interface); + + return res; } MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index beb8edc..81ab710 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -48,6 +48,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_GPRS) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_HCR331) }, { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_MOTOROLA) }, + { USB_DEVICE(PL2303_VENDOR_ID, PL2303_PRODUCT_ID_ZTEK) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID) }, { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID) }, @@ -83,6 +84,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(YCCABLE_VENDOR_ID, YCCABLE_PRODUCT_ID) }, { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LCM960_PRODUCT_ID) }, { USB_DEVICE(CRESSI_VENDOR_ID, CRESSI_EDY_PRODUCT_ID) }, { USB_DEVICE(ZEAGLE_VENDOR_ID, ZEAGLE_N2ITION3_PRODUCT_ID) }, { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index c38b8c0..71fd9da 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -22,6 +22,7 @@ #define PL2303_PRODUCT_ID_GPRS 0x0609 #define PL2303_PRODUCT_ID_HCR331 0x331a #define PL2303_PRODUCT_ID_MOTOROLA 0x0307 +#define PL2303_PRODUCT_ID_ZTEK 0xe1f1 #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 @@ -121,8 +122,11 @@ #define SUPERIAL_VENDOR_ID 0x5372 #define SUPERIAL_PRODUCT_ID 0x2303 -/* Hewlett-Packard LD220-HP POS Pole Display */ +/* Hewlett-Packard POS Pole Displays */ #define HP_VENDOR_ID 0x03f0 +#define HP_LD960_PRODUCT_ID 0x0b39 +#define HP_LCM220_PRODUCT_ID 0x3139 +#define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 /* Cressi Edy (diving computer) PC interface */ diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 968a402..3e96d1a 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,8 +22,17 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" +/* standard device layouts supported by this driver */ +enum qcserial_layouts { + QCSERIAL_G2K = 0, /* Gobi 2000 */ + QCSERIAL_G1K = 1, /* Gobi 1000 */ + QCSERIAL_SWI = 2, /* Sierra Wireless */ +}; + #define DEVICE_G1K(v, p) \ - USB_DEVICE(v, p), .driver_info = 1 + USB_DEVICE(v, p), .driver_info = QCSERIAL_G1K +#define DEVICE_SWI(v, p) \ + USB_DEVICE(v, p), .driver_info = QCSERIAL_SWI static const struct usb_device_id id_table[] = { /* Gobi 1000 devices */ @@ -126,22 +135,27 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(0x12D1, 0x14F1)}, /* Sony Gobi 3000 Composite */ {USB_DEVICE(0x0AF0, 0x8120)}, /* Option GTM681W */ - /* non Gobi Qualcomm serial devices */ - {USB_DEVICE_INTERFACE_NUMBER(0x0f3d, 0x68a2, 0)}, /* Sierra Wireless MC7700 Device Management */ - {USB_DEVICE_INTERFACE_NUMBER(0x0f3d, 0x68a2, 2)}, /* Sierra Wireless MC7700 NMEA */ - {USB_DEVICE_INTERFACE_NUMBER(0x0f3d, 0x68a2, 3)}, /* Sierra Wireless MC7700 Modem */ - {USB_DEVICE_INTERFACE_NUMBER(0x114f, 0x68a2, 0)}, /* Sierra Wireless MC7750 Device Management */ - {USB_DEVICE_INTERFACE_NUMBER(0x114f, 0x68a2, 2)}, /* Sierra Wireless MC7750 NMEA */ - {USB_DEVICE_INTERFACE_NUMBER(0x114f, 0x68a2, 3)}, /* Sierra Wireless MC7750 Modem */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x68a2, 0)}, /* Sierra Wireless MC7710 Device Management */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x68a2, 2)}, /* Sierra Wireless MC7710 NMEA */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x68a2, 3)}, /* Sierra Wireless MC7710 Modem */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x901c, 0)}, /* Sierra Wireless EM7700 Device Management */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x901c, 2)}, /* Sierra Wireless EM7700 NMEA */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x901c, 3)}, /* Sierra Wireless EM7700 Modem */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x9051, 0)}, /* Netgear AirCard 340U Device Management */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x9051, 2)}, /* Netgear AirCard 340U NMEA */ - {USB_DEVICE_INTERFACE_NUMBER(0x1199, 0x9051, 3)}, /* Netgear AirCard 340U Modem */ + /* non-Gobi Sierra Wireless devices */ + {DEVICE_SWI(0x0f3d, 0x68a2)}, /* Sierra Wireless MC7700 */ + {DEVICE_SWI(0x114f, 0x68a2)}, /* Sierra Wireless MC7750 */ + {DEVICE_SWI(0x1199, 0x68a2)}, /* Sierra Wireless MC7710 */ + {DEVICE_SWI(0x1199, 0x68c0)}, /* Sierra Wireless MC73xx */ + {DEVICE_SWI(0x1199, 0x901c)}, /* Sierra Wireless EM7700 */ + {DEVICE_SWI(0x1199, 0x901f)}, /* Sierra Wireless EM7355 */ + {DEVICE_SWI(0x1199, 0x9040)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9041)}, /* Sierra Wireless MC7305/MC7355 */ + {DEVICE_SWI(0x1199, 0x9051)}, /* Netgear AirCard 340U */ + {DEVICE_SWI(0x1199, 0x9053)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9054)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9055)}, /* Netgear AirCard 341U */ + {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ { } /* Terminating entry */ }; @@ -154,11 +168,8 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) int retval = -ENODEV; __u8 nintf; __u8 ifnum; - bool is_gobi1k = id->driver_info ? true : false; int altsetting = -1; - dev_dbg(dev, "Is Gobi 1000 = %d\n", is_gobi1k); - nintf = serial->dev->actconfig->desc.bNumInterfaces; dev_dbg(dev, "Num Interfaces = %d\n", nintf); ifnum = intf->desc.bInterfaceNumber; @@ -193,25 +204,29 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) goto done; } - if (nintf < 3 || nintf > 4) { - dev_err(dev, "unknown number of interfaces: %d\n", nintf); - goto done; - } - /* default to enabling interface */ altsetting = 0; - /* Composite mode; don't bind to the QMI/net interface as that + /* + * Composite mode; don't bind to the QMI/net interface as that * gets handled by other drivers. */ - if (is_gobi1k) { - /* Gobi 1K USB layout: + switch (id->driver_info) { + case QCSERIAL_G1K: + /* + * Gobi 1K USB layout: * 0: DM/DIAG (use libqcdm from ModemManager for communication) * 1: serial port (doesn't respond) * 2: AT-capable modem port * 3: QMI/net */ + if (nintf < 3 || nintf > 4) { + dev_err(dev, "unknown number of interfaces: %d\n", nintf); + altsetting = -1; + goto done; + } + if (ifnum == 0) { dev_dbg(dev, "Gobi 1K DM/DIAG interface found\n"); altsetting = 1; @@ -219,13 +234,21 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) dev_dbg(dev, "Modem port found\n"); else altsetting = -1; - } else { - /* Gobi 2K+ USB layout: + break; + case QCSERIAL_G2K: + /* + * Gobi 2K+ USB layout: * 0: QMI/net * 1: DM/DIAG (use libqcdm from ModemManager for communication) * 2: AT-capable modem port * 3: NMEA */ + if (nintf < 3 || nintf > 4) { + dev_err(dev, "unknown number of interfaces: %d\n", nintf); + altsetting = -1; + goto done; + } + switch (ifnum) { case 0: /* Don't claim the QMI/net interface */ @@ -246,6 +269,35 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) dev_dbg(dev, "Gobi 2K+ NMEA GPS interface found\n"); break; } + break; + case QCSERIAL_SWI: + /* + * Sierra Wireless layout: + * 0: DM/DIAG (use libqcdm from ModemManager for communication) + * 2: NMEA + * 3: AT-capable modem port + * 8: QMI/net + */ + switch (ifnum) { + case 0: + dev_dbg(dev, "DM/DIAG interface found\n"); + break; + case 2: + dev_dbg(dev, "NMEA GPS interface found\n"); + break; + case 3: + dev_dbg(dev, "Modem port found\n"); + break; + default: + /* don't claim any unsupported interface */ + altsetting = -1; + break; + } + break; + default: + dev_err(dev, "unsupported device layout type: %lu\n", + id->driver_info); + break; } done: diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index de958c5..d09a4e7 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -58,6 +58,7 @@ struct sierra_intf_private { spinlock_t susp_lock; unsigned int suspended:1; int in_flight; + unsigned int open_ports; }; static int sierra_set_power_state(struct usb_device *udev, __u16 swiState) @@ -281,17 +282,21 @@ static const struct usb_device_id id_table[] = { /* Sierra Wireless HSPA Non-Composite Device */ { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x6892, 0xFF, 0xFF, 0xFF)}, { USB_DEVICE(0x1199, 0x6893) }, /* Sierra Wireless Device */ - { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ + /* Sierra Wireless Direct IP modems */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68A3, 0xFF, 0xFF, 0xFF), + .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist + }, + { USB_DEVICE_AND_INTERFACE_INFO(0x1199, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, /* AT&T Direct IP LTE modems */ { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68AA, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, - { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */ + /* Airprime/Sierra Wireless Direct IP modems */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0F3D, 0x68A3, 0xFF, 0xFF, 0xFF), .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist }, - { USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */ { } }; @@ -768,6 +773,7 @@ static void sierra_close(struct usb_serial_port *port) struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; struct sierra_intf_private *intfdata = port->serial->private; + struct urb *urb; portdata = usb_get_serial_port_data(port); @@ -776,7 +782,6 @@ static void sierra_close(struct usb_serial_port *port) mutex_lock(&serial->disc_mutex); if (!serial->disconnected) { - serial->interface->needs_remote_wakeup = 0; /* odd error handling due to pm counters */ if (!usb_autopm_get_interface(serial->interface)) sierra_send_setup(port); @@ -787,8 +792,22 @@ static void sierra_close(struct usb_serial_port *port) mutex_unlock(&serial->disc_mutex); spin_lock_irq(&intfdata->susp_lock); portdata->opened = 0; + if (--intfdata->open_ports == 0) + serial->interface->needs_remote_wakeup = 0; spin_unlock_irq(&intfdata->susp_lock); + for (;;) { + urb = usb_get_from_anchor(&portdata->delayed); + if (!urb) + break; + kfree(urb->transfer_buffer); + usb_free_urb(urb); + usb_autopm_put_interface_async(serial->interface); + spin_lock(&portdata->lock); + portdata->outstanding_urbs--; + spin_unlock(&portdata->lock); + } + sierra_stop_rx_urbs(port); for (i = 0; i < portdata->num_in_urbs; i++) { sierra_release_urb(portdata->in_urbs[i]); @@ -825,23 +844,29 @@ static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port) usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN); err = sierra_submit_rx_urbs(port, GFP_KERNEL); - if (err) { - /* get rid of everything as in close */ - sierra_close(port); - /* restore balance for autopm */ - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - return err; - } + if (err) + goto err_submit; + sierra_send_setup(port); - serial->interface->needs_remote_wakeup = 1; spin_lock_irq(&intfdata->susp_lock); portdata->opened = 1; + if (++intfdata->open_ports == 1) + serial->interface->needs_remote_wakeup = 1; spin_unlock_irq(&intfdata->susp_lock); usb_autopm_put_interface(serial->interface); return 0; + +err_submit: + sierra_stop_rx_urbs(port); + + for (i = 0; i < portdata->num_in_urbs; i++) { + sierra_release_urb(portdata->in_urbs[i]); + portdata->in_urbs[i] = NULL; + } + + return err; } @@ -937,6 +962,7 @@ static int sierra_port_remove(struct usb_serial_port *port) struct sierra_port_private *portdata; portdata = usb_get_serial_port_data(port); + usb_set_serial_port_data(port, NULL); kfree(portdata); return 0; @@ -953,6 +979,8 @@ static void stop_read_write_urbs(struct usb_serial *serial) for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; portdata = usb_get_serial_port_data(port); + if (!portdata) + continue; sierra_stop_rx_urbs(port); usb_kill_anchored_urbs(&portdata->active); } @@ -995,6 +1023,9 @@ static int sierra_resume(struct usb_serial *serial) port = serial->port[i]; portdata = usb_get_serial_port_data(port); + if (!portdata) + continue; + while ((urb = usb_get_from_anchor(&portdata->delayed))) { usb_anchor_urb(urb, &portdata->active); intfdata->in_flight++; @@ -1002,8 +1033,12 @@ static int sierra_resume(struct usb_serial *serial) if (err < 0) { intfdata->in_flight--; usb_unanchor_urb(urb); - usb_scuttle_anchored_urbs(&portdata->delayed); - break; + kfree(urb->transfer_buffer); + usb_free_urb(urb); + spin_lock(&portdata->lock); + portdata->outstanding_urbs--; + spin_unlock(&portdata->lock); + continue; } } diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index e5750be..d667ff9 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -495,10 +495,9 @@ static void ssu100_update_lsr(struct usb_serial_port *port, u8 lsr, if (*tty_flag == TTY_NORMAL) *tty_flag = TTY_FRAME; } - if (lsr & UART_LSR_OE){ + if (lsr & UART_LSR_OE) { port->icount.overrun++; - if (*tty_flag == TTY_NORMAL) - *tty_flag = TTY_OVERRUN; + tty_insert_flip_char(&port->port, 0, TTY_OVERRUN); } } @@ -516,12 +515,8 @@ static void ssu100_process_read_urb(struct urb *urb) if ((len >= 4) && (packet[0] == 0x1b) && (packet[1] == 0x1b) && ((packet[2] == 0x00) || (packet[2] == 0x01))) { - if (packet[2] == 0x00) { + if (packet[2] == 0x00) ssu100_update_lsr(port, packet[3], &flag); - if (flag == TTY_OVERRUN) - tty_insert_flip_char(&port->port, 0, - TTY_OVERRUN); - } if (packet[2] == 0x01) ssu100_update_msr(port, packet[3]); diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 6091bd5..cb6eff2 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -764,29 +764,39 @@ static int usb_serial_probe(struct usb_interface *interface, if (usb_endpoint_is_bulk_in(endpoint)) { /* we found a bulk in endpoint */ dev_dbg(ddev, "found bulk in on endpoint %d\n", i); - bulk_in_endpoint[num_bulk_in] = endpoint; - ++num_bulk_in; + if (num_bulk_in < MAX_NUM_PORTS) { + bulk_in_endpoint[num_bulk_in] = endpoint; + ++num_bulk_in; + } } if (usb_endpoint_is_bulk_out(endpoint)) { /* we found a bulk out endpoint */ dev_dbg(ddev, "found bulk out on endpoint %d\n", i); - bulk_out_endpoint[num_bulk_out] = endpoint; - ++num_bulk_out; + if (num_bulk_out < MAX_NUM_PORTS) { + bulk_out_endpoint[num_bulk_out] = endpoint; + ++num_bulk_out; + } } if (usb_endpoint_is_int_in(endpoint)) { /* we found a interrupt in endpoint */ dev_dbg(ddev, "found interrupt in on endpoint %d\n", i); - interrupt_in_endpoint[num_interrupt_in] = endpoint; - ++num_interrupt_in; + if (num_interrupt_in < MAX_NUM_PORTS) { + interrupt_in_endpoint[num_interrupt_in] = + endpoint; + ++num_interrupt_in; + } } if (usb_endpoint_is_int_out(endpoint)) { /* we found an interrupt out endpoint */ dev_dbg(ddev, "found interrupt out on endpoint %d\n", i); - interrupt_out_endpoint[num_interrupt_out] = endpoint; - ++num_interrupt_out; + if (num_interrupt_out < MAX_NUM_PORTS) { + interrupt_out_endpoint[num_interrupt_out] = + endpoint; + ++num_interrupt_out; + } } } @@ -809,8 +819,10 @@ static int usb_serial_probe(struct usb_interface *interface, if (usb_endpoint_is_int_in(endpoint)) { /* we found a interrupt in endpoint */ dev_dbg(ddev, "found interrupt in for Prolific device on separate interface\n"); - interrupt_in_endpoint[num_interrupt_in] = endpoint; - ++num_interrupt_in; + if (num_interrupt_in < MAX_NUM_PORTS) { + interrupt_in_endpoint[num_interrupt_in] = endpoint; + ++num_interrupt_in; + } } } } @@ -850,6 +862,11 @@ static int usb_serial_probe(struct usb_interface *interface, num_ports = type->num_ports; } + if (num_ports > MAX_NUM_PORTS) { + dev_warn(ddev, "too many ports requested: %d\n", num_ports); + num_ports = MAX_NUM_PORTS; + } + serial->num_ports = num_ports; serial->num_bulk_in = num_bulk_in; serial->num_bulk_out = num_bulk_out; @@ -1348,10 +1365,12 @@ static int usb_serial_register(struct usb_serial_driver *driver) static void usb_serial_deregister(struct usb_serial_driver *device) { pr_info("USB Serial deregistering driver %s\n", device->description); + mutex_lock(&table_lock); list_del(&device->driver_list); - usb_serial_bus_deregister(device); mutex_unlock(&table_lock); + + usb_serial_bus_deregister(device); } /** diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index 8536578..ad5fff4 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -228,8 +228,10 @@ int usb_wwan_write(struct tty_struct *tty, struct usb_serial_port *port, usb_pipeendpoint(this_urb->pipe), i); err = usb_autopm_get_interface_async(port->serial->interface); - if (err < 0) + if (err < 0) { + clear_bit(i, &portdata->out_busy); break; + } /* send the data */ memcpy(this_urb->transfer_buffer, buf, todo); @@ -386,6 +388,14 @@ int usb_wwan_open(struct tty_struct *tty, struct usb_serial_port *port) portdata = usb_get_serial_port_data(port); intfdata = serial->private; + if (port->interrupt_in_urb) { + err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); + if (err) { + dev_dbg(&port->dev, "%s: submit int urb failed: %d\n", + __func__, err); + } + } + /* Start reading from the IN endpoint */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; @@ -412,12 +422,26 @@ int usb_wwan_open(struct tty_struct *tty, struct usb_serial_port *port) } EXPORT_SYMBOL(usb_wwan_open); +static void unbusy_queued_urb(struct urb *urb, + struct usb_wwan_port_private *portdata) +{ + int i; + + for (i = 0; i < N_OUT_URB; i++) { + if (urb == portdata->out_urbs[i]) { + clear_bit(i, &portdata->out_busy); + break; + } + } +} + void usb_wwan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct usb_wwan_port_private *portdata; struct usb_wwan_intf_private *intfdata = port->serial->private; + struct urb *urb; portdata = usb_get_serial_port_data(port); @@ -426,10 +450,19 @@ void usb_wwan_close(struct usb_serial_port *port) portdata->opened = 0; spin_unlock_irq(&intfdata->susp_lock); + for (;;) { + urb = usb_get_from_anchor(&portdata->delayed); + if (!urb) + break; + unbusy_queued_urb(urb, portdata); + usb_autopm_put_interface_async(serial->interface); + } + for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); + usb_kill_urb(port->interrupt_in_urb); /* balancing - important as an error cannot be handled*/ usb_autopm_get_interface_no_resume(serial->interface); @@ -467,9 +500,11 @@ int usb_wwan_port_probe(struct usb_serial_port *port) struct usb_wwan_port_private *portdata; struct urb *urb; u8 *buffer; - int err; int i; + if (!port->bulk_in_size || !port->bulk_out_size) + return -ENODEV; + portdata = kzalloc(sizeof(*portdata), GFP_KERNEL); if (!portdata) return -ENOMEM; @@ -477,9 +512,6 @@ int usb_wwan_port_probe(struct usb_serial_port *port) init_usb_anchor(&portdata->delayed); for (i = 0; i < N_IN_URB; i++) { - if (!port->bulk_in_size) - break; - buffer = (u8 *)__get_free_page(GFP_KERNEL); if (!buffer) goto bail_out_error; @@ -493,9 +525,6 @@ int usb_wwan_port_probe(struct usb_serial_port *port) } for (i = 0; i < N_OUT_URB; i++) { - if (!port->bulk_out_size) - break; - buffer = kmalloc(OUT_BUFLEN, GFP_KERNEL); if (!buffer) goto bail_out_error2; @@ -510,13 +539,6 @@ int usb_wwan_port_probe(struct usb_serial_port *port) usb_set_serial_port_data(port, portdata); - if (port->interrupt_in_urb) { - err = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); - if (err) - dev_dbg(&port->dev, "%s: submit irq_in urb failed %d\n", - __func__, err); - } - return 0; bail_out_error2: @@ -584,44 +606,29 @@ static void stop_read_write_urbs(struct usb_serial *serial) int usb_wwan_suspend(struct usb_serial *serial, pm_message_t message) { struct usb_wwan_intf_private *intfdata = serial->private; - int b; + spin_lock_irq(&intfdata->susp_lock); if (PMSG_IS_AUTO(message)) { - spin_lock_irq(&intfdata->susp_lock); - b = intfdata->in_flight; - spin_unlock_irq(&intfdata->susp_lock); - - if (b) + if (intfdata->in_flight) { + spin_unlock_irq(&intfdata->susp_lock); return -EBUSY; + } } - - spin_lock_irq(&intfdata->susp_lock); intfdata->suspended = 1; spin_unlock_irq(&intfdata->susp_lock); + stop_read_write_urbs(serial); return 0; } EXPORT_SYMBOL(usb_wwan_suspend); -static void unbusy_queued_urb(struct urb *urb, struct usb_wwan_port_private *portdata) -{ - int i; - - for (i = 0; i < N_OUT_URB; i++) { - if (urb == portdata->out_urbs[i]) { - clear_bit(i, &portdata->out_busy); - break; - } - } -} - -static void play_delayed(struct usb_serial_port *port) +static int play_delayed(struct usb_serial_port *port) { struct usb_wwan_intf_private *data; struct usb_wwan_port_private *portdata; struct urb *urb; - int err; + int err = 0; portdata = usb_get_serial_port_data(port); data = port->serial->private; @@ -638,6 +645,8 @@ static void play_delayed(struct usb_serial_port *port) break; } } + + return err; } int usb_wwan_resume(struct usb_serial *serial) @@ -647,54 +656,51 @@ int usb_wwan_resume(struct usb_serial *serial) struct usb_wwan_intf_private *intfdata = serial->private; struct usb_wwan_port_private *portdata; struct urb *urb; - int err = 0; - - /* get the interrupt URBs resubmitted unconditionally */ - for (i = 0; i < serial->num_ports; i++) { - port = serial->port[i]; - if (!port->interrupt_in_urb) { - dev_dbg(&port->dev, "%s: No interrupt URB for port\n", __func__); - continue; - } - err = usb_submit_urb(port->interrupt_in_urb, GFP_NOIO); - dev_dbg(&port->dev, "Submitted interrupt URB for port (result %d)\n", err); - if (err < 0) { - dev_err(&port->dev, "%s: Error %d for interrupt URB\n", - __func__, err); - goto err_out; - } - } + int err; + int err_count = 0; + spin_lock_irq(&intfdata->susp_lock); for (i = 0; i < serial->num_ports; i++) { /* walk all ports */ port = serial->port[i]; portdata = usb_get_serial_port_data(port); /* skip closed ports */ - spin_lock_irq(&intfdata->susp_lock); - if (!portdata || !portdata->opened) { - spin_unlock_irq(&intfdata->susp_lock); + if (!portdata || !portdata->opened) continue; + + if (port->interrupt_in_urb) { + err = usb_submit_urb(port->interrupt_in_urb, + GFP_ATOMIC); + if (err) { + dev_err(&port->dev, + "%s: submit int urb failed: %d\n", + __func__, err); + err_count++; + } } + err = play_delayed(port); + if (err) + err_count++; + for (j = 0; j < N_IN_URB; j++) { urb = portdata->in_urbs[j]; err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { dev_err(&port->dev, "%s: Error %d for bulk URB %d\n", __func__, err, i); - spin_unlock_irq(&intfdata->susp_lock); - goto err_out; + err_count++; } } - play_delayed(port); - spin_unlock_irq(&intfdata->susp_lock); } - spin_lock_irq(&intfdata->susp_lock); intfdata->suspended = 0; spin_unlock_irq(&intfdata->susp_lock); -err_out: - return err; + + if (err_count) + return -EIO; + + return 0; } EXPORT_SYMBOL(usb_wwan_resume); #endif diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 36a7740..cc5a430 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -521,6 +521,10 @@ static void command_port_read_callback(struct urb *urb) dev_dbg(&urb->dev->dev, "%s - command_info is NULL, exiting.\n", __func__); return; } + if (!urb->actual_length) { + dev_dbg(&urb->dev->dev, "%s - empty response, exiting.\n", __func__); + return; + } if (status) { dev_dbg(&urb->dev->dev, "%s - nonzero urb status: %d\n", __func__, status); if (status != -ENOENT) @@ -541,7 +545,8 @@ static void command_port_read_callback(struct urb *urb) /* These are unsolicited reports from the firmware, hence no waiting command to wakeup */ dev_dbg(&urb->dev->dev, "%s - event received\n", __func__); - } else if (data[0] == WHITEHEAT_GET_DTR_RTS) { + } else if ((data[0] == WHITEHEAT_GET_DTR_RTS) && + (urb->actual_length - 1 <= sizeof(command_info->result_buffer))) { memcpy(command_info->result_buffer, &data[1], urb->actual_length - 1); command_info->command_finished = WHITEHEAT_CMD_COMPLETE; diff --git a/drivers/usb/serial/zte_ev.c b/drivers/usb/serial/zte_ev.c index eae2c87..88dd32c 100644 --- a/drivers/usb/serial/zte_ev.c +++ b/drivers/usb/serial/zte_ev.c @@ -273,28 +273,8 @@ static void zte_ev_usb_serial_close(struct usb_serial_port *port) } static const struct usb_device_id id_table[] = { - /* AC8710, AC8710T */ - { USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffff, 0xff, 0xff, 0xff) }, - /* AC8700 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xfffe, 0xff, 0xff, 0xff) }, /* MG880 */ { USB_DEVICE(0x19d2, 0xfffd) }, - { USB_DEVICE(0x19d2, 0xfffc) }, - { USB_DEVICE(0x19d2, 0xfffb) }, - /* AC8710_V3 */ - { USB_DEVICE(0x19d2, 0xfff6) }, - { USB_DEVICE(0x19d2, 0xfff7) }, - { USB_DEVICE(0x19d2, 0xfff8) }, - { USB_DEVICE(0x19d2, 0xfff9) }, - { USB_DEVICE(0x19d2, 0xffee) }, - /* AC2716, MC2716 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffed, 0xff, 0xff, 0xff) }, - /* AD3812 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x19d2, 0xffeb, 0xff, 0xff, 0xff) }, - { USB_DEVICE(0x19d2, 0xffec) }, - { USB_DEVICE(0x05C6, 0x3197) }, - { USB_DEVICE(0x05C6, 0x6000) }, - { USB_DEVICE(0x05C6, 0x9008) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); diff --git a/drivers/usb/storage/shuttle_usbat.c b/drivers/usb/storage/shuttle_usbat.c index 4ef2a80..008d805 100644 --- a/drivers/usb/storage/shuttle_usbat.c +++ b/drivers/usb/storage/shuttle_usbat.c @@ -1851,7 +1851,7 @@ static int usbat_probe(struct usb_interface *intf, us->transport_name = "Shuttle USBAT"; us->transport = usbat_flash_transport; us->transport_reset = usb_stor_CB_reset; - us->max_lun = 1; + us->max_lun = 0; result = usb_stor_probe2(us); return result; diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 22c7d43..b1d815e 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -1118,6 +1118,31 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us) */ if (result == USB_STOR_XFER_LONG) fake_sense = 1; + + /* + * Sometimes a device will mistakenly skip the data phase + * and go directly to the status phase without sending a + * zero-length packet. If we get a 13-byte response here, + * check whether it really is a CSW. + */ + if (result == USB_STOR_XFER_SHORT && + srb->sc_data_direction == DMA_FROM_DEVICE && + transfer_length - scsi_get_resid(srb) == + US_BULK_CS_WRAP_LEN) { + struct scatterlist *sg = NULL; + unsigned int offset = 0; + + if (usb_stor_access_xfer_buf((unsigned char *) bcs, + US_BULK_CS_WRAP_LEN, srb, &sg, + &offset, FROM_XFER_BUF) == + US_BULK_CS_WRAP_LEN && + bcs->Signature == + cpu_to_le32(US_BULK_CS_SIGN)) { + usb_stor_dbg(us, "Device skipped data phase\n"); + scsi_set_resid(srb, transfer_length); + goto skipped_data_phase; + } + } } /* See flow chart on pg 15 of the Bulk Only Transport spec for @@ -1153,6 +1178,7 @@ int usb_stor_Bulk_transport(struct scsi_cmnd *srb, struct us_data *us) if (result != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; + skipped_data_phase: /* check bulk status */ residue = le32_to_cpu(bcs->Residue); usb_stor_dbg(us, "Bulk Status S 0x%x T 0x%x R %u Stat 0x%x\n", diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index d966b59..b8029ec 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -259,7 +259,7 @@ static int uas_try_complete(struct scsi_cmnd *cmnd, const char *caller) struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; struct uas_dev_info *devinfo = (void *)cmnd->device->hostdata; - WARN_ON(!spin_is_locked(&devinfo->lock)); + lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & (COMMAND_INFLIGHT | DATA_IN_URB_INFLIGHT | DATA_OUT_URB_INFLIGHT | @@ -558,7 +558,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd, struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp; int err; - WARN_ON(!spin_is_locked(&devinfo->lock)); + lockdep_assert_held(&devinfo->lock); if (cmdinfo->state & SUBMIT_STATUS_URB) { err = uas_submit_sense_urb(cmnd->device->host, gfp, cmdinfo->stream); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index adbeb25..16a36b2 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -101,6 +101,12 @@ UNUSUAL_DEV( 0x03f0, 0x4002, 0x0001, 0x0001, "PhotoSmart R707", USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY), +UNUSUAL_DEV( 0x03f3, 0x0001, 0x0000, 0x9999, + "Adaptec", + "USBConnect 2000", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + /* Reported by Sebastian Kapfer <sebastian_kapfer@gmx.net> * and Olaf Hering <olh@suse.de> (different bcd's, same vendor/product) * for USB floppies that need the SINGLE_LUN enforcement. @@ -234,6 +240,20 @@ UNUSUAL_DEV( 0x0421, 0x0495, 0x0370, 0x0370, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_MAX_SECTORS_64 ), +/* Reported by Daniele Forsi <dforsi@gmail.com> */ +UNUSUAL_DEV( 0x0421, 0x04b9, 0x0350, 0x0350, + "Nokia", + "5300", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64 ), + +/* Patch submitted by Victor A. Santos <victoraur.santos@gmail.com> */ +UNUSUAL_DEV( 0x0421, 0x05af, 0x0742, 0x0742, + "Nokia", + "305", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* Patch submitted by Mikhail Zolotaryov <lebon@lebon.org.ua> */ UNUSUAL_DEV( 0x0421, 0x06aa, 0x1110, 0x1110, "Nokia", @@ -472,18 +492,24 @@ UNUSUAL_DEV( 0x04e6, 0x000a, 0x0200, 0x0200, "eUSB CompactFlash Adapter", USB_SC_8020, USB_PR_CB, NULL, 0), -UNUSUAL_DEV( 0x04e6, 0x000B, 0x0100, 0x0100, +UNUSUAL_DEV( 0x04e6, 0x000b, 0x0100, 0x0100, "Shuttle", "eUSCSI Bridge", USB_SC_SCSI, USB_PR_BULK, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), -UNUSUAL_DEV( 0x04e6, 0x000C, 0x0100, 0x0100, +UNUSUAL_DEV( 0x04e6, 0x000c, 0x0100, 0x0100, "Shuttle", "eUSCSI Bridge", USB_SC_SCSI, USB_PR_BULK, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +UNUSUAL_DEV( 0x04e6, 0x000f, 0x0000, 0x9999, + "SCM Microsystems", + "eUSB SCSI Adapter (Bus Powered)", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + UNUSUAL_DEV( 0x04e6, 0x0101, 0x0200, 0x0200, "Shuttle", "CD-RW Device", @@ -727,6 +753,12 @@ UNUSUAL_DEV( 0x059b, 0x0001, 0x0100, 0x0100, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_SINGLE_LUN ), +UNUSUAL_DEV( 0x059b, 0x0040, 0x0100, 0x0100, + "Iomega", + "Jaz USB Adapter", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SINGLE_LUN ), + /* Reported by <Hendryk.Pfeiffer@gmx.de> */ UNUSUAL_DEV( 0x059f, 0x0643, 0x0000, 0x0000, "LaCie", @@ -1067,6 +1099,13 @@ UNUSUAL_DEV( 0x0840, 0x0085, 0x0001, 0x0001, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_CAPACITY), +/* Supplied with some Castlewood ORB removable drives */ +UNUSUAL_DEV( 0x084b, 0xa001, 0x0000, 0x9999, + "Castlewood Systems", + "USB to SCSI cable", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + /* Entry and supporting patch by Theodore Kilgore <kilgota@auburn.edu>. * Flag will support Bulk devices which use a standards-violating 32-byte * Command Block Wrapper. Here, the "DC2MEGA" cameras (several brands) with @@ -1099,6 +1138,18 @@ UNUSUAL_DEV( 0x0851, 0x1543, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE), +UNUSUAL_DEV( 0x085a, 0x0026, 0x0100, 0x0133, + "Xircom", + "PortGear USB-SCSI (Mac USB Dock)", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + +UNUSUAL_DEV( 0x085a, 0x0028, 0x0100, 0x0133, + "Xircom", + "PortGear USB to SCSI Converter", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + /* Submitted by Jan De Luyck <lkml@kcore.org> */ UNUSUAL_DEV( 0x08bd, 0x1100, 0x0000, 0x0000, "CITIZEN", @@ -1931,6 +1982,14 @@ UNUSUAL_DEV( 0x152d, 0x2329, 0x0100, 0x0100, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_RESIDUE | US_FL_SANE_SENSE ), +/* Entrega Technologies U1-SC25 (later Xircom PortGear PGSCSI) + * and Mac USB Dock USB-SCSI */ +UNUSUAL_DEV( 0x1645, 0x0007, 0x0100, 0x0133, + "Entrega Technologies", + "USB to SCSI Converter", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + /* Reported by Robert Schedel <r.schedel@yahoo.de> * Note: this is a 'super top' device like the above 14cd/6600 device */ UNUSUAL_DEV( 0x1652, 0x6600, 0x0201, 0x0201, @@ -1953,6 +2012,12 @@ UNUSUAL_DEV( 0x177f, 0x0400, 0x0000, 0x0000, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BULK_IGNORE_TAG | US_FL_MAX_SECTORS_64 ), +UNUSUAL_DEV( 0x1822, 0x0001, 0x0000, 0x9999, + "Ariston Technologies", + "iConnect USB to SCSI adapter", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + /* Reported by Hans de Goede <hdegoede@redhat.com> * These Appotech controllers are found in Picture Frames, they provide a * (buggy) emulation of a cdrom drive which contains the windows software @@ -1998,6 +2063,13 @@ UNUSUAL_DEV( 0x1e74, 0x4621, 0x0000, 0x0000, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BULK_IGNORE_TAG | US_FL_MAX_SECTORS_64 ), +/* Supplied with some Castlewood ORB removable drives */ +UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, + "Double-H Technology", + "USB to SCSI Intelligent Cable", + USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, + US_FL_SCM_MULT_TARG ), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c index 9209eaf..41db3b1 100644 --- a/drivers/uwb/lc-dev.c +++ b/drivers/uwb/lc-dev.c @@ -441,16 +441,19 @@ void uwbd_dev_onair(struct uwb_rc *rc, struct uwb_beca_e *bce) uwb_dev->mac_addr = *bce->mac_addr; uwb_dev->dev_addr = bce->dev_addr; dev_set_name(&uwb_dev->dev, "%s", macbuf); + + /* plug the beacon cache */ + bce->uwb_dev = uwb_dev; + uwb_dev->bce = bce; + uwb_bce_get(bce); /* released in uwb_dev_sys_release() */ + result = uwb_dev_add(uwb_dev, &rc->uwb_dev.dev, rc); if (result < 0) { dev_err(dev, "new device %s: cannot instantiate device\n", macbuf); goto error_dev_add; } - /* plug the beacon cache */ - bce->uwb_dev = uwb_dev; - uwb_dev->bce = bce; - uwb_bce_get(bce); /* released in uwb_dev_sys_release() */ + dev_info(dev, "uwb device (mac %s dev %s) connected to %s %s\n", macbuf, devbuf, rc->uwb_dev.dev.parent->bus->name, dev_name(rc->uwb_dev.dev.parent)); @@ -458,6 +461,8 @@ void uwbd_dev_onair(struct uwb_rc *rc, struct uwb_beca_e *bce) return; error_dev_add: + bce->uwb_dev = NULL; + uwb_bce_put(bce); kfree(uwb_dev); return; } diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 6ab71b9..275aa3fc 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -821,13 +821,11 @@ static const struct vfio_device_ops vfio_pci_ops = { static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - u8 type; struct vfio_pci_device *vdev; struct iommu_group *group; int ret; - pci_read_config_byte(pdev, PCI_HEADER_TYPE, &type); - if ((type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; group = iommu_group_get(&pdev->dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index d300fd9..d7fddc7 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -861,6 +861,23 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, return 0; } +static int vhost_scsi_to_tcm_attr(int attr) +{ + switch (attr) { + case VIRTIO_SCSI_S_SIMPLE: + return MSG_SIMPLE_TAG; + case VIRTIO_SCSI_S_ORDERED: + return MSG_ORDERED_TAG; + case VIRTIO_SCSI_S_HEAD: + return MSG_HEAD_TAG; + case VIRTIO_SCSI_S_ACA: + return MSG_ACA_TAG; + default: + break; + } + return MSG_SIMPLE_TAG; +} + static void tcm_vhost_submission_work(struct work_struct *work) { struct tcm_vhost_cmd *cmd = @@ -887,9 +904,9 @@ static void tcm_vhost_submission_work(struct work_struct *work) rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, - cmd->tvc_task_attr, cmd->tvc_data_direction, - TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, - sg_bidi_ptr, sg_no_bidi); + vhost_scsi_to_tcm_attr(cmd->tvc_task_attr), + cmd->tvc_data_direction, TARGET_SCF_ACK_KREF, + sg_ptr, cmd->tvc_sgl_count, sg_bidi_ptr, sg_no_bidi); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0); @@ -1194,6 +1211,7 @@ static int vhost_scsi_set_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct tcm_vhost_tpg **vs_tpg; @@ -1241,6 +1259,21 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, ret = -EEXIST; goto out; } + /* + * In order to ensure individual vhost-scsi configfs + * groups cannot be removed while in use by vhost ioctl, + * go ahead and take an explicit se_tpg->tpg_group.cg_item + * dependency now. + */ + se_tpg = &tpg->se_tpg; + ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); + if (ret) { + pr_warn("configfs_depend_item() failed: %d\n", ret); + kfree(vs_tpg); + mutex_unlock(&tpg->tv_tpg_mutex); + goto out; + } tpg->tv_tpg_vhost_count++; tpg->vhost_scsi = vs; vs_tpg[tpg->tport_tpgt] = tpg; @@ -1283,6 +1316,7 @@ static int vhost_scsi_clear_endpoint(struct vhost_scsi *vs, struct vhost_scsi_target *t) { + struct se_portal_group *se_tpg; struct tcm_vhost_tport *tv_tport; struct tcm_vhost_tpg *tpg; struct vhost_virtqueue *vq; @@ -1331,6 +1365,13 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs, vs->vs_tpg[target] = NULL; match = true; mutex_unlock(&tpg->tv_tpg_mutex); + /* + * Release se_tpg->tpg_group.cg_item configfs dependency now + * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur. + */ + se_tpg = &tpg->se_tpg; + configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c index 088511a..0aca4e6 100644 --- a/drivers/video/atmel_lcdfb.c +++ b/drivers/video/atmel_lcdfb.c @@ -1081,6 +1081,12 @@ static int __init atmel_lcdfb_probe(struct platform_device *pdev) goto free_cmap; } + ret = atmel_lcdfb_set_par(info); + if (ret < 0) { + dev_err(dev, "set par failed: %d\n", ret); + goto unregister_irqs; + } + dev_set_drvdata(dev, info); /* diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c index e45833c..182bd68 100644 --- a/drivers/video/aty/mach64_accel.c +++ b/drivers/video/aty/mach64_accel.c @@ -4,6 +4,7 @@ */ #include <linux/delay.h> +#include <asm/unaligned.h> #include <linux/fb.h> #include <video/mach64.h> #include "atyfb.h" @@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) u32 *pbitmap, dwords = (src_bytes + 3) / 4; for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) { wait_for_fifo(1, par); - aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par); + aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); } } diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c index 95ec042..0fe02e2 100644 --- a/drivers/video/aty/mach64_cursor.c +++ b/drivers/video/aty/mach64_cursor.c @@ -5,6 +5,7 @@ #include <linux/fb.h> #include <linux/init.h> #include <linux/string.h> +#include "../fb_draw.h" #include <asm/io.h> @@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { + u16 l = 0xaaaa; b = *src++; m = *msk++; switch (cursor->rop) { case ROP_XOR: // Upper 4 bits of mask data - fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++); + l = cursor_bits_lookup[(b ^ m) >> 4] | // Lower 4 bits of mask - fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f], - dst++); + (cursor_bits_lookup[(b ^ m) & 0x0f] << 8); break; case ROP_COPY: // Upper 4 bits of mask data - fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++); + l = cursor_bits_lookup[(b & m) >> 4] | // Lower 4 bits of mask - fb_writeb(cursor_bits_lookup[(b & m) & 0x0f], - dst++); + (cursor_bits_lookup[(b & m) & 0x0f] << 8); break; } + /* + * If cursor size is not a multiple of 8 characters + * we must pad it with transparent pattern (0xaaaa). + */ + if ((j + 1) * 8 > cursor->image.width) { + l = comp(l, 0xaaaa, + (1 << ((cursor->image.width & 7) * 2)) - 1); + } + fb_writeb(l & 0xff, dst++); + fb_writeb(l >> 8, dst++); } dst += offset; } diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c index bb5a96b..bcb5723 100644 --- a/drivers/video/cfbcopyarea.c +++ b/drivers/video/cfbcopyarea.c @@ -43,13 +43,22 @@ */ static void -bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, int bits, +bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, + const unsigned long __iomem *src, unsigned src_idx, int bits, unsigned n, u32 bswapmask) { unsigned long first, last; int const shift = dst_idx-src_idx; - int left, right; + +#if 0 + /* + * If you suspect bug in this function, compare it with this simple + * memmove implementation. + */ + fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, + (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); + return; +#endif first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask); last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask); @@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, unsigned long d0, d1; int m; - right = shift & (bits - 1); - left = -shift & (bits - 1); - bswapmask &= shift; + int const left = shift & (bits - 1); + int const right = -shift & (bits - 1); if (dst_idx+n <= bits) { // Single destination word @@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, d0 = fb_rev_pixels_in_long(d0, bswapmask); if (shift > 0) { // Single source word - d0 >>= right; + d0 <<= left; } else if (src_idx+n <= bits) { // Single source word - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src + 1); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; + d0 = d0 >> right | d1 << left; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); @@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, if (shift > 0) { // Single source word d1 = d0; - d0 >>= right; - dst++; + d0 <<= left; n -= bits - dst_idx; } else { // 2 source words d1 = FB_READL(src++); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; - dst++; + d0 = d0 >> right | d1 << left; n -= bits - dst_idx; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); d0 = d1; + dst++; // Main chunk m = n % bits; n /= bits; while ((n >= 4) && !bswapmask) { d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; n -= 4; } while (n--) { d1 = FB_READL(src++); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0 << left | d1 >> right; + d0 = d0 >> right | d1 << left; d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(d0, dst++); d0 = d1; } // Trailing bits - if (last) { - if (m <= right) { + if (m) { + if (m <= bits - right) { // Single source word - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; + d0 = d0 >> right | d1 << left; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), last), dst); @@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, */ static void -bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, int bits, +bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, + const unsigned long __iomem *src, unsigned src_idx, int bits, unsigned n, u32 bswapmask) { unsigned long first, last; int shift; - dst += (n-1)/bits; - src += (n-1)/bits; - if ((n-1) % bits) { - dst_idx += (n-1) % bits; - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= bits - 1; - src_idx += (n-1) % bits; - src += src_idx >> (ffs(bits) - 1); - src_idx &= bits - 1; - } +#if 0 + /* + * If you suspect bug in this function, compare it with this simple + * memmove implementation. + */ + fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, + (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); + return; +#endif + + dst += (dst_idx + n - 1) / bits; + src += (src_idx + n - 1) / bits; + dst_idx = (dst_idx + n - 1) % bits; + src_idx = (src_idx + n - 1) % bits; shift = dst_idx-src_idx; - first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask); - last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits), - bswapmask); + first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask); + last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask); if (!shift) { // Same alignment for source and dest if ((unsigned long)dst_idx+1 >= n) { // Single word - if (last) - first &= last; - FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); + if (first) + last &= first; + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } else { // Multiple destination words // Leading bits - if (first != ~0UL) { + if (first) { FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); dst--; src--; @@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, FB_WRITEL(FB_READL(src--), dst--); // Trailing bits - if (last) + if (last != -1UL) FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } } else { @@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, unsigned long d0, d1; int m; - int const left = -shift & (bits-1); - int const right = shift & (bits-1); - bswapmask &= shift; + int const left = shift & (bits-1); + int const right = -shift & (bits-1); if ((unsigned long)dst_idx+1 >= n) { // Single destination word - if (last) - first &= last; + if (first) + last &= first; d0 = FB_READL(src); if (shift < 0) { // Single source word - d0 <<= left; + d0 >>= right; } else if (1+(unsigned long)src_idx >= n) { // Single source word - d0 >>= right; + d0 <<= left; } else { // 2 source words d1 = FB_READL(src - 1); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); - FB_WRITEL(comp(d0, FB_READL(dst), first), dst); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); } else { // Multiple destination words /** We must always remember the last value read, because in case @@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, if (shift < 0) { // Single source word d1 = d0; - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src--); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); @@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, n /= bits; while ((n >= 4) && !bswapmask) { d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; n -= 4; } while (n--) { d1 = FB_READL(src--); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0 >> right | d1 << left; + d0 = d0 << left | d1 >> right; d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(d0, dst--); d0 = d1; } // Trailing bits - if (last) { - if (m <= left) { + if (m) { + if (m <= bits - left) { // Single source word - d0 >>= right; + d0 <<= left; } else { // 2 source words d1 = FB_READL(src); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), last), dst); @@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy; u32 height = area->height, width = area->width; unsigned long const bits_per_line = p->fix.line_length*8u; - unsigned long __iomem *dst = NULL, *src = NULL; + unsigned long __iomem *base = NULL; int bits = BITS_PER_LONG, bytes = bits >> 3; - int dst_idx = 0, src_idx = 0, rev_copy = 0; + unsigned dst_idx = 0, src_idx = 0, rev_copy = 0; u32 bswapmask = fb_compute_bswapmask(p); if (p->state != FBINFO_STATE_RUNNING) @@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) // split the base of the framebuffer into a long-aligned address and the // index of the first bit - dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); + base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1)); // add offset of source and target area dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel; @@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) while (height--) { dst_idx -= bits_per_line; src_idx -= bits_per_line; - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= (bytes - 1); - src += src_idx >> (ffs(bits) - 1); - src_idx &= (bytes - 1); - bitcpy_rev(p, dst, dst_idx, src, src_idx, bits, + bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits, + base + (src_idx / bits), src_idx % bits, bits, width*p->var.bits_per_pixel, bswapmask); } } else { while (height--) { - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= (bytes - 1); - src += src_idx >> (ffs(bits) - 1); - src_idx &= (bytes - 1); - bitcpy(p, dst, dst_idx, src, src_idx, bits, + bitcpy(p, base + (dst_idx / bits), dst_idx % bits, + base + (src_idx / bits), src_idx % bits, bits, width*p->var.bits_per_pixel, bswapmask); dst_idx += bits_per_line; src_idx += bits_per_line; diff --git a/drivers/video/console/bitblit.c b/drivers/video/console/bitblit.c index 61b182b..dbfe4ee 100644 --- a/drivers/video/console/bitblit.c +++ b/drivers/video/console/bitblit.c @@ -205,7 +205,6 @@ static void bit_putcs(struct vc_data *vc, struct fb_info *info, static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, int bottom_only) { - int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; unsigned int cw = vc->vc_font.width; unsigned int ch = vc->vc_font.height; unsigned int rw = info->var.xres - (vc->vc_cols*cw); @@ -214,7 +213,7 @@ static void bit_clear_margins(struct vc_data *vc, struct fb_info *info, unsigned int bs = info->var.yres - bh; struct fb_fillrect region; - region.color = attr_bgcol_ec(bgshift, vc, info); + region.color = 0; region.rop = ROP_COPY; if (rw && !bottom_only) { diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index cd8a802..9297a9b 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -759,7 +759,7 @@ static int con2fb_release_oldinfo(struct vc_data *vc, struct fb_info *oldinfo, newinfo in an undefined state. Thus, a call to fb_set_par() may be needed for the newinfo. */ - if (newinfo->fbops->fb_set_par) { + if (newinfo && newinfo->fbops->fb_set_par) { ret = newinfo->fbops->fb_set_par(newinfo); if (ret) @@ -3028,8 +3028,31 @@ static int fbcon_fb_unbind(int idx) if (con2fb_map[i] == idx) set_con2fb_map(i, new_idx, 0); } - } else + } else { + struct fb_info *info = registered_fb[idx]; + + /* This is sort of like set_con2fb_map, except it maps + * the consoles to no device and then releases the + * oldinfo to free memory and cancel the cursor blink + * timer. I can imagine this just becoming part of + * set_con2fb_map where new_idx is -1 + */ + for (i = first_fb_vc; i <= last_fb_vc; i++) { + if (con2fb_map[i] == idx) { + con2fb_map[i] = -1; + if (!search_fb_in_map(idx)) { + ret = con2fb_release_oldinfo(vc_cons[i].d, + info, NULL, i, + idx, 0); + if (ret) { + con2fb_map[i] = idx; + return ret; + } + } + } + } ret = fbcon_unbind(); + } return ret; } diff --git a/drivers/video/console/fbcon_ccw.c b/drivers/video/console/fbcon_ccw.c index 41b32ae..5a3cbf6 100644 --- a/drivers/video/console/fbcon_ccw.c +++ b/drivers/video/console/fbcon_ccw.c @@ -197,9 +197,8 @@ static void ccw_clear_margins(struct vc_data *vc, struct fb_info *info, unsigned int bh = info->var.xres - (vc->vc_rows*ch); unsigned int bs = vc->vc_rows*ch; struct fb_fillrect region; - int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - region.color = attr_bgcol_ec(bgshift,vc,info); + region.color = 0; region.rop = ROP_COPY; if (rw && !bottom_only) { diff --git a/drivers/video/console/fbcon_cw.c b/drivers/video/console/fbcon_cw.c index a93670e..e7ee44d 100644 --- a/drivers/video/console/fbcon_cw.c +++ b/drivers/video/console/fbcon_cw.c @@ -180,9 +180,8 @@ static void cw_clear_margins(struct vc_data *vc, struct fb_info *info, unsigned int bh = info->var.xres - (vc->vc_rows*ch); unsigned int rs = info->var.yres - rw; struct fb_fillrect region; - int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - region.color = attr_bgcol_ec(bgshift,vc,info); + region.color = 0; region.rop = ROP_COPY; if (rw && !bottom_only) { diff --git a/drivers/video/console/fbcon_ud.c b/drivers/video/console/fbcon_ud.c index ff0872c..19e3714 100644 --- a/drivers/video/console/fbcon_ud.c +++ b/drivers/video/console/fbcon_ud.c @@ -227,9 +227,8 @@ static void ud_clear_margins(struct vc_data *vc, struct fb_info *info, unsigned int rw = info->var.xres - (vc->vc_cols*cw); unsigned int bh = info->var.yres - (vc->vc_rows*ch); struct fb_fillrect region; - int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - region.color = attr_bgcol_ec(bgshift,vc,info); + region.color = 0; region.rop = ROP_COPY; if (rw && !bottom_only) { diff --git a/drivers/video/fb-puv3.c b/drivers/video/fb-puv3.c index 27fc956..5201125 100644 --- a/drivers/video/fb-puv3.c +++ b/drivers/video/fb-puv3.c @@ -18,8 +18,10 @@ #include <linux/fb.h> #include <linux/init.h> #include <linux/console.h> +#include <linux/mm.h> #include <asm/sizes.h> +#include <asm/pgtable.h> #include <mach/hardware.h> /* Platform_data reserved for unifb registers. */ diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index 080c35b..cc5dbb5 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -25,6 +25,21 @@ static bool nologo; module_param(nologo, bool, 0); MODULE_PARM_DESC(nologo, "Disables startup logo"); +/* + * Logos are located in the initdata, and will be freed in kernel_init. + * Use late_init to mark the logos as freed to prevent any further use. + */ + +static bool logos_freed; + +static int __init fb_logo_late_init(void) +{ + logos_freed = true; + return 0; +} + +late_initcall(fb_logo_late_init); + /* logo's are marked __initdata. Use __init_refok to tell * modpost that it is intended that this function uses data * marked __initdata. @@ -33,7 +48,7 @@ const struct linux_logo * __init_refok fb_find_logo(int depth) { const struct linux_logo *logo = NULL; - if (nologo) + if (nologo || logos_freed) return NULL; if (depth >= 1) { diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c index 8335a6f..0d5cb85 100644 --- a/drivers/video/matrox/matroxfb_accel.c +++ b/drivers/video/matrox/matroxfb_accel.c @@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_info *minfo) minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO; if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC; minfo->accel.m_opmode = mopmode; + minfo->accel.m_access = maccess; + minfo->accel.m_pitch = mpitch; } EXPORT_SYMBOL(matrox_cfbX_init); +static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo) +{ + mga_outl(M_MACCESS, minfo->accel.m_access); + mga_outl(M_PITCH, minfo->accel.m_pitch); +} + static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, int sx, int dy, int dx, int height, int width) { @@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, CRITBEGIN if ((dy < sy) || ((dy == sy) && (dx <= sx))) { - mga_fifo(2); + mga_fifo(4); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_AR5, vxres); @@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, start = sy*vxres+sx+curr_ydstorg(minfo); end = start+width; } else { - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_SGN, 5); mga_outl(M_AR5, -vxres); @@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, start = end+width; dy += height-1; } - mga_fifo(4); + mga_fifo(6); + matrox_accel_restore_maccess(minfo); mga_outl(M_AR0, end); mga_outl(M_AR3, start); mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); @@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, CRITBEGIN if ((dy < sy) || ((dy == sy) && (dx <= sx))) { - mga_fifo(2); + mga_fifo(4); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_AR5, vxres); @@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, start = sy*vxres+sx+curr_ydstorg(minfo); end = start+width; } else { - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_SGN, 5); mga_outl(M_AR5, -vxres); @@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, start = end+width; dy += height-1; } - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_AR0, end); mga_outl(M_AR3, start); mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); @@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color, CRITBEGIN - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE); mga_outl(M_FCOL, color); mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); @@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx, width >>= 1; sx >>= 1; if (width) { - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2); mga_outl(M_FCOL, bgx); mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); @@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, CRITBEGIN - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); if (easy) mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE); else @@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, fxbndry = ((xx + width - 1) << 16) | xx; mmio = minfo->mmio.vbase; - mga_fifo(6); + mga_fifo(8); + matrox_accel_restore_maccess(minfo); mga_writel(mmio, M_FXBNDRY, fxbndry); mga_writel(mmio, M_AR0, ar0); mga_writel(mmio, M_AR3, 0); diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h index 11ed57b..89a8a89a 100644 --- a/drivers/video/matrox/matroxfb_base.h +++ b/drivers/video/matrox/matroxfb_base.h @@ -307,6 +307,8 @@ struct matrox_accel_data { #endif u_int32_t m_dwg_rect; u_int32_t m_opmode; + u_int32_t m_access; + u_int32_t m_pitch; }; struct v4l2_queryctrl; @@ -696,7 +698,7 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv); #define mga_fifo(n) do {} while ((mga_inl(M_FIFOSTATUS) & 0xFF) < (n)) -#define WaitTillIdle() do {} while (mga_inl(M_STATUS) & 0x10000) +#define WaitTillIdle() do { mga_inl(M_STATUS); do {} while (mga_inl(M_STATUS) & 0x10000); } while (0) /* code speedup */ #ifdef CONFIG_FB_MATROX_MILLENIUM diff --git a/drivers/video/offb.c b/drivers/video/offb.c index 0c4f343..9a0109b 100644 --- a/drivers/video/offb.c +++ b/drivers/video/offb.c @@ -301,7 +301,7 @@ static struct fb_ops offb_ops = { static void __iomem *offb_map_reg(struct device_node *np, int index, unsigned long offset, unsigned long size) { - const u32 *addrp; + const __be32 *addrp; u64 asize, taddr; unsigned int flags; @@ -369,7 +369,11 @@ static void offb_init_palette_hacks(struct fb_info *info, struct device_node *dp } of_node_put(pciparent); } else if (dp && of_device_is_compatible(dp, "qemu,std-vga")) { - const u32 io_of_addr[3] = { 0x01000000, 0x0, 0x0 }; +#ifdef __BIG_ENDIAN + const __be32 io_of_addr[3] = { 0x01000000, 0x0, 0x0 }; +#else + const __be32 io_of_addr[3] = { 0x00000001, 0x0, 0x0 }; +#endif u64 io_addr = of_translate_address(dp, io_of_addr); if (io_addr != OF_BAD_ADDR) { par->cmap_adr = ioremap(io_addr + 0x3c8, 2); @@ -536,7 +540,7 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) unsigned int flags, rsize, addr_prop = 0; unsigned long max_size = 0; u64 rstart, address = OF_BAD_ADDR; - const u32 *pp, *addrp, *up; + const __be32 *pp, *addrp, *up; u64 asize; int foreign_endian = 0; @@ -552,25 +556,25 @@ static void __init offb_init_nodriver(struct device_node *dp, int no_real_node) if (pp == NULL) pp = of_get_property(dp, "depth", &len); if (pp && len == sizeof(u32)) - depth = *pp; + depth = be32_to_cpup(pp); pp = of_get_property(dp, "linux,bootx-width", &len); if (pp == NULL) pp = of_get_property(dp, "width", &len); if (pp && len == sizeof(u32)) - width = *pp; + width = be32_to_cpup(pp); pp = of_get_property(dp, "linux,bootx-height", &len); if (pp == NULL) pp = of_get_property(dp, "height", &len); if (pp && len == sizeof(u32)) - height = *pp; + height = be32_to_cpup(pp); pp = of_get_property(dp, "linux,bootx-linebytes", &len); if (pp == NULL) pp = of_get_property(dp, "linebytes", &len); if (pp && len == sizeof(u32) && (*pp != 0xffffffffu)) - pitch = *pp; + pitch = be32_to_cpup(pp); else pitch = width * ((depth + 7) / 8); diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c index c9c8e5a..a78ca6a 100644 --- a/drivers/video/tgafb.c +++ b/drivers/video/tgafb.c @@ -188,6 +188,8 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) if (var->xres_virtual != var->xres || var->yres_virtual != var->yres) return -EINVAL; + if (var->xres * var->yres * (var->bits_per_pixel >> 3) > info->fix.smem_len) + return -EINVAL; if (var->nonstd) return -EINVAL; if (1000000000 / var->pixclock > TGA_PLL_MAX_FREQ) @@ -268,6 +270,7 @@ tgafb_set_par(struct fb_info *info) par->yres = info->var.yres; par->pll_freq = pll_freq = 1000000000 / info->var.pixclock; par->bits_per_pixel = info->var.bits_per_pixel; + info->fix.line_length = par->xres * (par->bits_per_pixel >> 3); tga_type = par->tga_type; @@ -1142,222 +1145,57 @@ copyarea_line_32bpp(struct fb_info *info, u32 dy, u32 sy, __raw_writel(TGA_MODE_SBM_24BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); } -/* The general case of forward copy in 8bpp mode. */ +/* The (almost) general case of backward copy in 8bpp mode. */ static inline void -copyarea_foreward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, - u32 height, u32 width, u32 line_length) +copyarea_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, + u32 height, u32 width, u32 line_length, + const struct fb_copyarea *area) { struct tga_par *par = (struct tga_par *) info->par; - unsigned long i, copied, left; - unsigned long dpos, spos, dalign, salign, yincr; - u32 smask_first, dmask_first, dmask_last; - int pixel_shift, need_prime, need_second; - unsigned long n64, n32, xincr_first; + unsigned i, yincr; + int depos, sepos, backward, last_step, step; + u32 mask_last; + unsigned n32; void __iomem *tga_regs; void __iomem *tga_fb; - yincr = line_length; - if (dy > sy) { - dy += height - 1; - sy += height - 1; - yincr = -yincr; - } - - /* Compute the offsets and alignments in the frame buffer. - More than anything else, these control how we do copies. */ - dpos = dy * line_length + dx; - spos = sy * line_length + sx; - dalign = dpos & 7; - salign = spos & 7; - dpos &= -8; - spos &= -8; - - /* Compute the value for the PIXELSHIFT register. This controls - both non-co-aligned source and destination and copy direction. */ - if (dalign >= salign) - pixel_shift = dalign - salign; - else - pixel_shift = 8 - (salign - dalign); - - /* Figure out if we need an additional priming step for the - residue register. */ - need_prime = (salign > dalign); - if (need_prime) - dpos -= 8; - - /* Begin by copying the leading unaligned destination. Copy enough - to make the next destination address 32-byte aligned. */ - copied = 32 - (dalign + (dpos & 31)); - if (copied == 32) - copied = 0; - xincr_first = (copied + 7) & -8; - smask_first = dmask_first = (1ul << copied) - 1; - smask_first <<= salign; - dmask_first <<= dalign + need_prime*8; - if (need_prime && copied > 24) - copied -= 8; - left = width - copied; - - /* Care for small copies. */ - if (copied > width) { - u32 t; - t = (1ul << width) - 1; - t <<= dalign + need_prime*8; - dmask_first &= t; - left = 0; - } - - /* Attempt to use 64-byte copies. This is only possible if the - source and destination are co-aligned at 64 bytes. */ - n64 = need_second = 0; - if ((dpos & 63) == (spos & 63) - && (height == 1 || line_length % 64 == 0)) { - /* We may need a 32-byte copy to ensure 64 byte alignment. */ - need_second = (dpos + xincr_first) & 63; - if ((need_second & 32) != need_second) - printk(KERN_ERR "tgafb: need_second wrong\n"); - if (left >= need_second + 64) { - left -= need_second; - n64 = left / 64; - left %= 64; - } else - need_second = 0; - } - - /* Copy trailing full 32-byte sections. This will be the main - loop if the 64 byte loop can't be used. */ - n32 = left / 32; - left %= 32; - - /* Copy the trailing unaligned destination. */ - dmask_last = (1ul << left) - 1; - - tga_regs = par->tga_regs_base; - tga_fb = par->tga_fb_base; - - /* Set up the MODE and PIXELSHIFT registers. */ - __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_COPY, tga_regs+TGA_MODE_REG); - __raw_writel(pixel_shift, tga_regs+TGA_PIXELSHIFT_REG); - wmb(); - - for (i = 0; i < height; ++i) { - unsigned long j; - void __iomem *sfb; - void __iomem *dfb; - - sfb = tga_fb + spos; - dfb = tga_fb + dpos; - if (dmask_first) { - __raw_writel(smask_first, sfb); - wmb(); - __raw_writel(dmask_first, dfb); - wmb(); - sfb += xincr_first; - dfb += xincr_first; - } - - if (need_second) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(0xffffffff, dfb); - wmb(); - sfb += 32; - dfb += 32; - } - - if (n64 && (((unsigned long)sfb | (unsigned long)dfb) & 63)) - printk(KERN_ERR - "tgafb: misaligned copy64 (s:%p, d:%p)\n", - sfb, dfb); - - for (j = 0; j < n64; ++j) { - __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); - wmb(); - __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); - wmb(); - sfb += 64; - dfb += 64; - } - - for (j = 0; j < n32; ++j) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(0xffffffff, dfb); - wmb(); - sfb += 32; - dfb += 32; - } - - if (dmask_last) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(dmask_last, dfb); - wmb(); - } - - spos += yincr; - dpos += yincr; + /* Do acceleration only if we are aligned on 8 pixels */ + if ((dx | sx | width) & 7) { + cfb_copyarea(info, area); + return; } - /* Reset the MODE register to normal. */ - __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); -} - -/* The (almost) general case of backward copy in 8bpp mode. */ -static inline void -copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, - u32 height, u32 width, u32 line_length, - const struct fb_copyarea *area) -{ - struct tga_par *par = (struct tga_par *) info->par; - unsigned long i, left, yincr; - unsigned long depos, sepos, dealign, sealign; - u32 mask_first, mask_last; - unsigned long n32; - void __iomem *tga_regs; - void __iomem *tga_fb; - yincr = line_length; if (dy > sy) { dy += height - 1; sy += height - 1; yincr = -yincr; } + backward = dy == sy && dx > sx && dx < sx + width; /* Compute the offsets and alignments in the frame buffer. More than anything else, these control how we do copies. */ - depos = dy * line_length + dx + width; - sepos = sy * line_length + sx + width; - dealign = depos & 7; - sealign = sepos & 7; - - /* ??? The documentation appears to be incorrect (or very - misleading) wrt how pixel shifting works in backward copy - mode, i.e. when PIXELSHIFT is negative. I give up for now. - Do handle the common case of co-aligned backward copies, - but frob everything else back on generic code. */ - if (dealign != sealign) { - cfb_copyarea(info, area); - return; - } - - /* We begin the copy with the trailing pixels of the - unaligned destination. */ - mask_first = (1ul << dealign) - 1; - left = width - dealign; - - /* Care for small copies. */ - if (dealign > width) { - mask_first ^= (1ul << (dealign - width)) - 1; - left = 0; - } + depos = dy * line_length + dx; + sepos = sy * line_length + sx; + if (backward) + depos += width, sepos += width; /* Next copy full words at a time. */ - n32 = left / 32; - left %= 32; + n32 = width / 32; + last_step = width % 32; /* Finally copy the unaligned head of the span. */ - mask_last = -1 << (32 - left); + mask_last = (1ul << last_step) - 1; + + if (!backward) { + step = 32; + last_step = 32; + } else { + step = -32; + last_step = -last_step; + sepos -= 32; + depos -= 32; + } tga_regs = par->tga_regs_base; tga_fb = par->tga_fb_base; @@ -1374,25 +1212,33 @@ copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, sfb = tga_fb + sepos; dfb = tga_fb + depos; - if (mask_first) { - __raw_writel(mask_first, sfb); - wmb(); - __raw_writel(mask_first, dfb); - wmb(); - } - for (j = 0; j < n32; ++j) { - sfb -= 32; - dfb -= 32; + for (j = 0; j < n32; j++) { + if (j < 2 && j + 1 < n32 && !backward && + !(((unsigned long)sfb | (unsigned long)dfb) & 63)) { + do { + __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); + wmb(); + __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); + wmb(); + sfb += 64; + dfb += 64; + j += 2; + } while (j + 1 < n32); + j--; + continue; + } __raw_writel(0xffffffff, sfb); wmb(); __raw_writel(0xffffffff, dfb); wmb(); + sfb += step; + dfb += step; } if (mask_last) { - sfb -= 32; - dfb -= 32; + sfb += last_step - step; + dfb += last_step - step; __raw_writel(mask_last, sfb); wmb(); __raw_writel(mask_last, dfb); @@ -1453,14 +1299,9 @@ tgafb_copyarea(struct fb_info *info, const struct fb_copyarea *area) else if (bpp == 32) cfb_copyarea(info, area); - /* Detect overlapping source and destination that requires - a backward copy. */ - else if (dy == sy && dx > sx && dx < sx + width) - copyarea_backward_8bpp(info, dx, dy, sx, sy, height, - width, line_length, area); else - copyarea_foreward_8bpp(info, dx, dy, sx, sy, height, - width, line_length); + copyarea_8bpp(info, dx, dy, sx, sy, height, + width, line_length, area); } @@ -1476,6 +1317,7 @@ tgafb_init_fix(struct fb_info *info) int tga_bus_tc = TGA_BUS_TC(par->dev); u8 tga_type = par->tga_type; const char *tga_type_name = NULL; + unsigned memory_size; switch (tga_type) { case TGA_TYPE_8PLANE: @@ -1483,21 +1325,25 @@ tgafb_init_fix(struct fb_info *info) tga_type_name = "Digital ZLXp-E1"; if (tga_bus_tc) tga_type_name = "Digital ZLX-E1"; + memory_size = 2097152; break; case TGA_TYPE_24PLANE: if (tga_bus_pci) tga_type_name = "Digital ZLXp-E2"; if (tga_bus_tc) tga_type_name = "Digital ZLX-E2"; + memory_size = 8388608; break; case TGA_TYPE_24PLUSZ: if (tga_bus_pci) tga_type_name = "Digital ZLXp-E3"; if (tga_bus_tc) tga_type_name = "Digital ZLX-E3"; + memory_size = 16777216; break; default: tga_type_name = "Unknown"; + memory_size = 16777216; break; } @@ -1509,9 +1355,8 @@ tgafb_init_fix(struct fb_info *info) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR); - info->fix.line_length = par->xres * (par->bits_per_pixel >> 3); info->fix.smem_start = (size_t) par->tga_fb_base; - info->fix.smem_len = info->fix.line_length * par->yres; + info->fix.smem_len = memory_size; info->fix.mmio_start = (size_t) par->tga_regs_base; info->fix.mmio_len = 512; @@ -1635,6 +1480,9 @@ static int tgafb_register(struct device *dev) modedb_tga = &modedb_tc; modedbsize_tga = 1; } + + tgafb_init_fix(info); + ret = fb_find_mode(&info->var, info, mode_option ? mode_option : mode_option_tga, modedb_tga, modedbsize_tga, NULL, @@ -1652,7 +1500,6 @@ static int tgafb_register(struct device *dev) } tgafb_set_par(info); - tgafb_init_fix(info); if (register_framebuffer(info) < 0) { printk(KERN_ERR "tgafb: Could not register framebuffer\n"); diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 98917fc..e2ccc0f 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -792,6 +792,7 @@ static int virtio_pci_restore(struct device *dev) struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct virtio_driver *drv; + unsigned status = 0; int ret; drv = container_of(vp_dev->vdev.dev.driver, @@ -802,14 +803,40 @@ static int virtio_pci_restore(struct device *dev) return ret; pci_set_master(pci_dev); + /* We always start by resetting the device, in case a previous + * driver messed it up. */ + vp_reset(&vp_dev->vdev); + + /* Acknowledge that we've seen the device. */ + status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; + vp_set_status(&vp_dev->vdev, status); + + /* Maybe driver failed before freeze. + * Restore the failed status, for debugging. */ + status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED; + vp_set_status(&vp_dev->vdev, status); + + if (!drv) + return 0; + + /* We have a driver! */ + status |= VIRTIO_CONFIG_S_DRIVER; + vp_set_status(&vp_dev->vdev, status); + vp_finalize_features(&vp_dev->vdev); - if (drv && drv->restore) + if (drv->restore) { ret = drv->restore(&vp_dev->vdev); + if (ret) { + status |= VIRTIO_CONFIG_S_FAILED; + vp_set_status(&vp_dev->vdev, status); + return ret; + } + } /* Finally, tell the device we're all set */ - if (!ret) - vp_set_status(&vp_dev->vdev, vp_dev->saved_status); + status |= VIRTIO_CONFIG_S_DRIVER_OK; + vp_set_status(&vp_dev->vdev, status); return ret; } diff --git a/drivers/vlynq/vlynq.c b/drivers/vlynq/vlynq.c index 7b07135..c0227f9 100644 --- a/drivers/vlynq/vlynq.c +++ b/drivers/vlynq/vlynq.c @@ -762,7 +762,8 @@ static int vlynq_remove(struct platform_device *pdev) device_unregister(&dev->dev); iounmap(dev->local); - release_mem_region(dev->regs_start, dev->regs_end - dev->regs_start); + release_mem_region(dev->regs_start, + dev->regs_end - dev->regs_start + 1); kfree(dev); diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c index 0b2fefb..1abbf80 100644 --- a/drivers/vme/bridges/vme_ca91cx42.c +++ b/drivers/vme/bridges/vme_ca91cx42.c @@ -869,14 +869,13 @@ static ssize_t ca91cx42_master_read(struct vme_master_resource *image, spin_lock(&image->lock); - /* The following code handles VME address alignment problem - * in order to assure the maximal data width cycle. - * We cannot use memcpy_xxx directly here because it - * may cut data transfer in 8-bits cycles, thus making - * D16 cycle impossible. - * From the other hand, the bridge itself assures that - * maximal configured data cycle is used and splits it - * automatically for non-aligned addresses. + /* The following code handles VME address alignment. We cannot use + * memcpy_xxx here because it may cut data transfers in to 8-bit + * cycles when D16 or D32 cycles are required on the VME bus. + * On the other hand, the bridge itself assures that the maximum data + * cycle configured for the transfer is used and splits it + * automatically for non-aligned addresses, so we don't want the + * overhead of needlessly forcing small transfers for the entire cycle. */ if ((uintptr_t)addr & 0x1) { *(u8 *)buf = ioread8(addr); @@ -896,9 +895,9 @@ static ssize_t ca91cx42_master_read(struct vme_master_resource *image, } count32 = (count - done) & ~0x3; - if (count32 > 0) { - memcpy_fromio(buf + done, addr + done, (unsigned int)count); - done += count32; + while (done < count32) { + *(u32 *)(buf + done) = ioread32(addr + done); + done += 4; } if ((count - done) & 0x2) { @@ -930,7 +929,7 @@ static ssize_t ca91cx42_master_write(struct vme_master_resource *image, spin_lock(&image->lock); /* Here we apply for the same strategy we do in master_read - * function in order to assure D16 cycle when required. + * function in order to assure the correct cycles. */ if ((uintptr_t)addr & 0x1) { iowrite8(*(u8 *)buf, addr); @@ -950,9 +949,9 @@ static ssize_t ca91cx42_master_write(struct vme_master_resource *image, } count32 = (count - done) & ~0x3; - if (count32 > 0) { - memcpy_toio(addr + done, buf + done, count32); - done += count32; + while (done < count32) { + iowrite32(*(u32 *)(buf + done), addr + done); + done += 4; } if ((count - done) & 0x2) { diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c index 7db4e63..ef9028f 100644 --- a/drivers/vme/bridges/vme_tsi148.c +++ b/drivers/vme/bridges/vme_tsi148.c @@ -741,7 +741,7 @@ static int tsi148_slave_get(struct vme_slave_resource *image, int *enabled, reg_join(vme_bound_high, vme_bound_low, &vme_bound); reg_join(pci_offset_high, pci_offset_low, &pci_offset); - *pci_base = (dma_addr_t)vme_base + pci_offset; + *pci_base = (dma_addr_t)(*vme_base + pci_offset); *enabled = 0; *aspace = 0; @@ -910,11 +910,15 @@ static int tsi148_master_set(struct vme_master_resource *image, int enabled, unsigned long long pci_bound, vme_offset, pci_base; struct vme_bridge *tsi148_bridge; struct tsi148_driver *bridge; + struct pci_bus_region region; + struct pci_dev *pdev; tsi148_bridge = image->parent; bridge = tsi148_bridge->driver_priv; + pdev = container_of(tsi148_bridge->parent, struct pci_dev, dev); + /* Verify input data */ if (vme_base & 0xFFFF) { dev_err(tsi148_bridge->parent, "Invalid VME Window " @@ -949,7 +953,9 @@ static int tsi148_master_set(struct vme_master_resource *image, int enabled, pci_bound = 0; vme_offset = 0; } else { - pci_base = (unsigned long long)image->bus_resource.start; + pcibios_resource_to_bus(pdev, ®ion, + &image->bus_resource); + pci_base = region.start; /* * Bound address is a valid address for the window, adjust @@ -1276,8 +1282,8 @@ static ssize_t tsi148_master_read(struct vme_master_resource *image, void *buf, spin_lock(&image->lock); /* The following code handles VME address alignment. We cannot use - * memcpy_xxx directly here because it may cut small data transfers in - * to 8-bit cycles, thus making D16 cycle impossible. + * memcpy_xxx here because it may cut data transfers in to 8-bit + * cycles when D16 or D32 cycles are required on the VME bus. * On the other hand, the bridge itself assures that the maximum data * cycle configured for the transfer is used and splits it * automatically for non-aligned addresses, so we don't want the @@ -1301,9 +1307,9 @@ static ssize_t tsi148_master_read(struct vme_master_resource *image, void *buf, } count32 = (count - done) & ~0x3; - if (count32 > 0) { - memcpy_fromio(buf + done, addr + done, count32); - done += count32; + while (done < count32) { + *(u32 *)(buf + done) = ioread32(addr + done); + done += 4; } if ((count - done) & 0x2) { @@ -1363,7 +1369,7 @@ static ssize_t tsi148_master_write(struct vme_master_resource *image, void *buf, spin_lock(&image->lock); /* Here we apply for the same strategy we do in master_read - * function in order to assure D16 cycle when required. + * function in order to assure the correct cycles. */ if ((uintptr_t)addr & 0x1) { iowrite8(*(u8 *)buf, addr); @@ -1383,9 +1389,9 @@ static ssize_t tsi148_master_write(struct vme_master_resource *image, void *buf, } count32 = (count - done) & ~0x3; - if (count32 > 0) { - memcpy_toio(addr + done, buf + done, count32); - done += count32; + while (done < count32) { + iowrite32(*(u32 *)(buf + done), addr + done); + done += 4; } if ((count - done) & 0x2) { diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 40788c9..73705af 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -54,28 +54,29 @@ static void w1_send_slave(struct w1_master *dev, u64 rn) struct w1_netlink_msg *hdr = (struct w1_netlink_msg *)(msg + 1); struct w1_netlink_cmd *cmd = (struct w1_netlink_cmd *)(hdr + 1); int avail; + u64 *data; /* update kernel slave list */ w1_slave_found(dev, rn); avail = dev->priv_size - cmd->len; - if (avail > 8) { - u64 *data = (void *)(cmd + 1) + cmd->len; + if (avail < 8) { + msg->ack++; + cn_netlink_send(msg, 0, GFP_KERNEL); - *data = rn; - cmd->len += 8; - hdr->len += 8; - msg->len += 8; - return; + msg->len = sizeof(struct w1_netlink_msg) + + sizeof(struct w1_netlink_cmd); + hdr->len = sizeof(struct w1_netlink_cmd); + cmd->len = 0; } - msg->ack++; - cn_netlink_send(msg, 0, GFP_KERNEL); + data = (void *)(cmd + 1) + cmd->len; - msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd); - hdr->len = sizeof(struct w1_netlink_cmd); - cmd->len = 0; + *data = rn; + cmd->len += 8; + hdr->len += 8; + msg->len += 8; } static int w1_process_search_command(struct w1_master *dev, struct cn_msg *msg, diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 37cb09b..c97a47c 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -20,6 +20,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/bitops.h> +#include <linux/delay.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/init.h> @@ -91,6 +92,15 @@ static inline void ath79_wdt_keepalive(void) static inline void ath79_wdt_enable(void) { ath79_wdt_keepalive(); + + /* + * Updating the TIMER register requires a few microseconds + * on the AR934x SoCs at least. Use a small delay to ensure + * that the TIMER register is updated within the hardware + * before enabling the watchdog. + */ + udelay(2); + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR); /* flush write */ ath79_wdt_rr(WDOG_REG_CTRL); diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c index 5c3d4df..22b9a03 100644 --- a/drivers/watchdog/kempld_wdt.c +++ b/drivers/watchdog/kempld_wdt.c @@ -163,7 +163,7 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data, kempld_get_mutex(pld); stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id)); stage_cfg &= ~STAGE_CFG_PRESCALER_MASK; - stage_cfg |= STAGE_CFG_SET_PRESCALER(prescaler); + stage_cfg |= STAGE_CFG_SET_PRESCALER(PRESCALER_21); kempld_write8(pld, KEMPLD_WDT_STAGE_CFG(stage->id), stage_cfg); kempld_write32(pld, KEMPLD_WDT_STAGE_TIMEOUT(stage->id), stage_timeout); diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 58df98a..2cf02ff 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -60,7 +60,6 @@ * @adev: amba device structure of wdt * @status: current status of wdt * @load_val: load value to be set for current timeout - * @timeout: current programmed timeout */ struct sp805_wdt { struct watchdog_device wdd; @@ -69,7 +68,6 @@ struct sp805_wdt { struct clk *clk; struct amba_device *adev; unsigned int load_val; - unsigned int timeout; }; static bool nowayout = WATCHDOG_NOWAYOUT; @@ -99,7 +97,7 @@ static int wdt_setload(struct watchdog_device *wdd, unsigned int timeout) spin_lock(&wdt->lock); wdt->load_val = load; /* roundup timeout to closest positive integer value */ - wdt->timeout = div_u64((load + 1) * 2 + (rate / 2), rate); + wdd->timeout = div_u64((load + 1) * 2 + (rate / 2), rate); spin_unlock(&wdt->lock); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 624e8dc..602913d 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -111,16 +111,11 @@ static void do_suspend(void) shutting_down = SHUTDOWN_SUSPEND; -#ifdef CONFIG_PREEMPT - /* If the kernel is preemptible, we need to freeze all the processes - to prevent them from being in the middle of a pagetable update - during suspend. */ err = freeze_processes(); if (err) { pr_err("%s: freeze failed %d\n", __func__, err); goto out; } -#endif err = dpm_suspend_start(PMSG_FREEZE); if (err) { @@ -169,10 +164,8 @@ out_resume: dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); out_thaw: -#ifdef CONFIG_PREEMPT thaw_processes(); out: -#endif shutting_down = SHUTDOWN_INVALID; } #endif /* CONFIG_HIBERNATE_CALLBACKS */ diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index d9a4367..9cca0ea 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; @@ -52,7 +52,8 @@ struct aio_ring { unsigned id; /* kernel internal index number */ unsigned nr; /* number of io_events */ - unsigned head; + unsigned head; /* Written to by userland or under ring_lock + * mutex by aio_read_events_ring(). */ unsigned tail; unsigned magic; @@ -111,6 +112,11 @@ struct kioctx { struct work_struct free_work; + /* + * signals when all in-flight requests are done + */ + struct completion *requests_done; + struct { /* * This counts the number of available slots in the ringbuffer, @@ -158,6 +164,15 @@ static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops; +/* Backing dev info for aio fs. + * -no dirty page accounting or writeback happens + */ +static struct backing_dev_info aio_fs_backing_dev_info = { + .name = "aiofs", + .state = 0, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, +}; + static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct qstr this = QSTR_INIT("[aio]", 5); @@ -169,6 +184,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; + inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; inode->i_size = PAGE_SIZE * nr_pages; path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); @@ -214,6 +230,9 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + if (bdi_init(&aio_fs_backing_dev_info)) + panic("Failed to init aio fs backing dev info."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -243,6 +262,11 @@ static void aio_free_ring(struct kioctx *ctx) { int i; + /* Disconnect the kiotx from the ring file. This prevents future + * accesses to the kioctx from page migration. + */ + put_aio_ring_file(ctx); + for (i = 0; i < ctx->nr_pages; i++) { struct page *page; pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, @@ -254,8 +278,6 @@ static void aio_free_ring(struct kioctx *ctx) put_page(page); } - put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { kfree(ctx->ring_pages); ctx->ring_pages = NULL; @@ -272,40 +294,44 @@ static const struct file_operations aio_ring_fops = { .mmap = aio_ring_mmap, }; -static int aio_set_page_dirty(struct page *page) -{ - return 0; -} - #if IS_ENABLED(CONFIG_MIGRATION) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) { struct kioctx *ctx; unsigned long flags; + pgoff_t idx; int rc; rc = 0; - /* Make sure the old page hasn't already been changed */ + /* mapping->private_lock here protects against the kioctx teardown. */ spin_lock(&mapping->private_lock); ctx = mapping->private_data; - if (ctx) { - pgoff_t idx; - spin_lock_irqsave(&ctx->completion_lock, flags); - idx = old->index; - if (idx < (pgoff_t)ctx->nr_pages) { - if (ctx->ring_pages[idx] != old) - rc = -EAGAIN; - } else - rc = -EINVAL; - spin_unlock_irqrestore(&ctx->completion_lock, flags); + if (!ctx) { + rc = -EINVAL; + goto out; + } + + /* The ring_lock mutex. The prevents aio_read_events() from writing + * to the ring's head, and prevents page migration from mucking in + * a partially initialized kiotx. + */ + if (!mutex_trylock(&ctx->ring_lock)) { + rc = -EAGAIN; + goto out; + } + + idx = old->index; + if (idx < (pgoff_t)ctx->nr_pages) { + /* Make sure the old page hasn't already been changed */ + if (ctx->ring_pages[idx] != old) + rc = -EAGAIN; } else rc = -EINVAL; - spin_unlock(&mapping->private_lock); if (rc != 0) - return rc; + goto out_unlock; /* Writeback must be complete */ BUG_ON(PageWriteback(old)); @@ -314,44 +340,32 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); if (rc != MIGRATEPAGE_SUCCESS) { put_page(new); - return rc; + goto out_unlock; } - /* We can potentially race against kioctx teardown here. Use the - * address_space's private data lock to protect the mapping's - * private_data. + /* Take completion_lock to prevent other writes to the ring buffer + * while the old page is copied to the new. This prevents new + * events from being lost. */ - spin_lock(&mapping->private_lock); - ctx = mapping->private_data; - if (ctx) { - pgoff_t idx; - spin_lock_irqsave(&ctx->completion_lock, flags); - migrate_page_copy(new, old); - idx = old->index; - if (idx < (pgoff_t)ctx->nr_pages) { - /* And only do the move if things haven't changed */ - if (ctx->ring_pages[idx] == old) - ctx->ring_pages[idx] = new; - else - rc = -EAGAIN; - } else - rc = -EINVAL; - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } else - rc = -EBUSY; - spin_unlock(&mapping->private_lock); + spin_lock_irqsave(&ctx->completion_lock, flags); + migrate_page_copy(new, old); + BUG_ON(ctx->ring_pages[idx] != old); + ctx->ring_pages[idx] = new; + spin_unlock_irqrestore(&ctx->completion_lock, flags); - if (rc == MIGRATEPAGE_SUCCESS) - put_page(old); - else - put_page(new); + /* The old page is no longer accessible. */ + put_page(old); +out_unlock: + mutex_unlock(&ctx->ring_lock); +out: + spin_unlock(&mapping->private_lock); return rc; } #endif static const struct address_space_operations aio_ctx_aops = { - .set_page_dirty = aio_set_page_dirty, + .set_page_dirty = __set_page_dirty_no_writeback, #if IS_ENABLED(CONFIG_MIGRATION) .migratepage = aio_migratepage, #endif @@ -380,7 +394,7 @@ static int aio_setup_ring(struct kioctx *ctx) file = aio_private_file(ctx, nr_pages); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; - return -EAGAIN; + return -ENOMEM; } ctx->aio_ring_file = file; @@ -406,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx) pr_debug("pid(%d) page[%d]->count=%d\n", current->pid, i, page_count(page)); SetPageUptodate(page); - SetPageDirty(page); unlock_page(page); ctx->ring_pages[i] = page; @@ -415,7 +428,7 @@ static int aio_setup_ring(struct kioctx *ctx) if (unlikely(i != nr_pages)) { aio_free_ring(ctx); - return -EAGAIN; + return -ENOMEM; } ctx->mmap_size = nr_pages * PAGE_SIZE; @@ -429,7 +442,7 @@ static int aio_setup_ring(struct kioctx *ctx) if (IS_ERR((void *)ctx->mmap_base)) { ctx->mmap_size = 0; aio_free_ring(ctx); - return -EAGAIN; + return -ENOMEM; } pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); @@ -507,6 +520,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + /* At this point we know that there are no any in-flight requests */ + if (ctx->requests_done) + complete(ctx->requests_done); + INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); } @@ -556,6 +573,10 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + /* While kioctx setup is in progress, + * we are protected from page migration + * changes ring_pages by ->ring_lock. + */ ring = kmap_atomic(ctx->ring_pages[0]); ring->id = ctx->id; kunmap_atomic(ring); @@ -640,24 +661,28 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - if (percpu_ref_init(&ctx->users, free_ioctx_users)) - goto err; - - if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) - goto err; - spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); mutex_init(&ctx->ring_lock); + /* Protect against page migration throughout kiotx setup by keeping + * the ring_lock mutex held until setup is complete. */ + mutex_lock(&ctx->ring_lock); init_waitqueue_head(&ctx->wait); INIT_LIST_HEAD(&ctx->active_reqs); + if (percpu_ref_init(&ctx->users, free_ioctx_users)) + goto err; + + if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) + goto err; + ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) goto err; - if (aio_setup_ring(ctx) < 0) + err = aio_setup_ring(ctx); + if (err < 0) goto err; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); @@ -683,6 +708,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (err) goto err_cleanup; + /* Release the ring_lock mutex now that all setup is complete. */ + mutex_unlock(&ctx->ring_lock); + pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; @@ -692,6 +720,7 @@ err_cleanup: err_ctx: aio_free_ring(ctx); err: + mutex_unlock(&ctx->ring_lock); free_percpu(ctx->cpu); free_percpu(ctx->reqs.pcpu_count); free_percpu(ctx->users.pcpu_count); @@ -705,7 +734,8 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, + struct completion *requests_done) { if (!atomic_xchg(&ctx->dead, 1)) { struct kioctx_table *table; @@ -734,7 +764,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); + ctx->requests_done = requests_done; percpu_ref_kill(&ctx->users); + } else { + if (requests_done) + complete(requests_done); } } @@ -769,6 +803,9 @@ void exit_aio(struct mm_struct *mm) unsigned i = 0; while (1) { + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); + rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); @@ -796,23 +833,30 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx); + kill_ioctx(mm, ctx, &requests_done); + + /* Wait until all IO for the context are done. */ + wait_for_completion(&requests_done); } } static void put_reqs_available(struct kioctx *ctx, unsigned nr) { struct kioctx_cpu *kcpu; + unsigned long flags; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); + local_irq_save(flags); kcpu->reqs_available += nr; + while (kcpu->reqs_available >= ctx->req_batch * 2) { kcpu->reqs_available -= ctx->req_batch; atomic_add(ctx->req_batch, &ctx->reqs_available); } + local_irq_restore(flags); preempt_enable(); } @@ -820,10 +864,12 @@ static bool get_reqs_available(struct kioctx *ctx) { struct kioctx_cpu *kcpu; bool ret = false; + unsigned long flags; preempt_disable(); kcpu = this_cpu_ptr(ctx->cpu); + local_irq_save(flags); if (!kcpu->reqs_available) { int old, avail = atomic_read(&ctx->reqs_available); @@ -842,6 +888,7 @@ static bool get_reqs_available(struct kioctx *ctx) ret = true; kcpu->reqs_available--; out: + local_irq_restore(flags); preempt_enable(); return ret; } @@ -994,6 +1041,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) /* everything turned out well, dispose of the aiocb. */ kiocb_free(iocb); + put_reqs_available(ctx, 1); /* * We have to order our ring_info tail store above and test @@ -1024,16 +1072,26 @@ static long aio_read_events_ring(struct kioctx *ctx, mutex_lock(&ctx->ring_lock); + /* Access to ->ring_pages here is protected by ctx->ring_lock. */ ring = kmap_atomic(ctx->ring_pages[0]); head = ring->head; tail = ring->tail; kunmap_atomic(ring); + /* + * Ensure that once we've read the current tail pointer, that + * we also see the events that were stored up to the tail. + */ + smp_rmb(); + pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); if (head == tail) goto out; + head %= ctx->nr_events; + tail %= ctx->nr_events; + while (ret < nr) { long avail; struct io_event *ev; @@ -1072,8 +1130,6 @@ static long aio_read_events_ring(struct kioctx *ctx, flush_dcache_page(ctx->ring_pages[0]); pr_debug("%li h%u t%u\n", ret, head, tail); - - put_reqs_available(ctx, ret); out: mutex_unlock(&ctx->ring_lock); @@ -1171,7 +1227,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(current->mm, ioctx); + kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users); } @@ -1189,8 +1245,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(current->mm, ioctx); + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); + + /* Pass requests_done to kill_ioctx() where it can be set + * in a thread-safe way. If we try to set it here then we have + * a race condition if two io_destroy() called simultaneously. + */ + kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); + + /* Wait until all IO for the context are done. Otherwise kernel + * keep using user-space buffers even if user thinks the context + * is destroyed. + */ + wait_for_completion(&requests_done); + return 0; } pr_debug("EINVAL: io_destroy: invalid context id\n"); @@ -1285,10 +1355,8 @@ rw_common: &iovec, compat) : aio_setup_single_vector(req, rw, buf, &nr_segs, iovec); - if (ret) - return ret; - - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + if (!ret) + ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); if (ret < 0) { if (iovec != &inline_vec) kfree(iovec); @@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) if ((ia_valid & ATTR_UID) && (!uid_eq(current_fsuid(), inode->i_uid) || !uid_eq(attr->ia_uid, inode->i_uid)) && - !inode_capable(inode, CAP_CHOWN)) + !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && (!uid_eq(current_fsuid(), inode->i_uid) || (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && - !inode_capable(inode, CAP_CHOWN)) + !capable_wrt_inode_uidgid(inode, CAP_CHOWN)) return -EPERM; /* Make sure a caller can chmod. */ @@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Also check the setgid bit! */ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : inode->i_gid) && - !inode_capable(inode, CAP_FSETID)) + !capable_wrt_inode_uidgid(inode, CAP_FSETID)) attr->ia_mode &= ~S_ISGID; } @@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) umode_t mode = attr->ia_mode; if (!in_group_p(inode->i_gid) && - !inode_capable(inode, CAP_FSETID)) + !capable_wrt_inode_uidgid(inode, CAP_FSETID)) mode &= ~S_ISGID; inode->i_mode = mode; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b422ad6..b6c6a11 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -91,7 +91,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev, spin_lock(&root->d_lock); if (prev) - next = prev->d_u.d_child.next; + next = prev->d_child.next; else { prev = dget_dlock(root); next = prev->d_subdirs.next; @@ -105,13 +105,13 @@ cont: return NULL; } - q = list_entry(next, struct dentry, d_u.d_child); + q = list_entry(next, struct dentry, d_child); spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); /* Already gone or negative dentry (under construction) - try next */ if (!d_count(q) || !simple_positive(q)) { spin_unlock(&q->d_lock); - next = q->d_u.d_child.next; + next = q->d_child.next; goto cont; } dget_dlock(q); @@ -161,13 +161,13 @@ again: goto relock; } spin_unlock(&p->d_lock); - next = p->d_u.d_child.next; + next = p->d_child.next; p = parent; if (next != &parent->d_subdirs) break; } } - ret = list_entry(next, struct dentry, d_u.d_child); + ret = list_entry(next, struct dentry, d_child); spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED); /* Negative dentry - try next */ @@ -447,7 +447,7 @@ found: spin_lock(&sbi->lookup_lock); spin_lock(&expired->d_parent->d_lock); spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); + list_move(&expired->d_parent->d_subdirs, &expired->d_child); spin_unlock(&expired->d_lock); spin_unlock(&expired->d_parent->d_lock); spin_unlock(&sbi->lookup_lock); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 92ef341..b3f4794 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (!d_count(active)) + if ((int) d_count(active) <= 0) goto next; qstr = &active->d_name; @@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) spin_lock(&expiring->d_lock); - /* Bad luck, we've already been dentry_iput */ + /* We've already been dentry_iput or unlinked */ if (!expiring->d_inode) goto next; @@ -655,7 +655,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry) /* only consider parents below dentrys in the root */ if (IS_ROOT(parent->d_parent)) return; - d_child = &dentry->d_u.d_child; + d_child = &dentry->d_child; /* Set parent managed if it's becoming empty */ if (d_child->next == &parent->d_subdirs && d_child->prev == &parent->d_subdirs) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index b5ee393..04a43c0 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; + bip->bip_max_vcnt = bvec_nr_vecs(idx); } else { bip->bip_vec = bip->bip_inline_vecs; + bip->bip_max_vcnt = inline_vecs; } bip->bip_slab = idx; @@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { + if (bip->bip_vcnt >= bip->bip_max_vcnt) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -316,7 +310,7 @@ static void bio_integrity_generate(struct bio *bio) bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment_all(bv, bio, i) { + bio_for_each_segment(bv, bio, i) { void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 5eb50b5..53039de 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -263,9 +263,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, } if (ret > 0) goto next; - ret = ulist_add_merge(parents, eb->start, - (uintptr_t)eie, - (u64 *)&old, GFP_NOFS); + ret = ulist_add_merge_ptr(parents, eb->start, + eie, (void **)&old, GFP_NOFS); if (ret < 0) break; if (!ret && extent_item_pos) { @@ -955,16 +954,19 @@ again: ret = -EIO; goto out; } + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); if (ret < 0) goto out; ref->inode_list = eie; } - ret = ulist_add_merge(refs, ref->parent, - (uintptr_t)ref->inode_list, - (u64 *)&eie, GFP_NOFS); + ret = ulist_add_merge_ptr(refs, ref->parent, + ref->inode_list, + (void **)&eie, GFP_NOFS); if (ret < 0) goto out; if (!ret && extent_item_pos) { @@ -1390,9 +1392,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, * returns <0 on error */ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, - struct btrfs_extent_item *ei, u32 item_size, - struct btrfs_extent_inline_ref **out_eiref, - int *out_type) + struct btrfs_key *key, + struct btrfs_extent_item *ei, u32 item_size, + struct btrfs_extent_inline_ref **out_eiref, + int *out_type) { unsigned long end; u64 flags; @@ -1402,19 +1405,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, /* first call */ flags = btrfs_extent_flags(eb, ei); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - info = (struct btrfs_tree_block_info *)(ei + 1); - *out_eiref = - (struct btrfs_extent_inline_ref *)(info + 1); + if (key->type == BTRFS_METADATA_ITEM_KEY) { + /* a skinny metadata extent */ + *out_eiref = + (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_eiref = + (struct btrfs_extent_inline_ref *)(info + 1); + } } else { *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); } *ptr = (unsigned long)*out_eiref; - if ((void *)*ptr >= (void *)ei + item_size) + if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) return -ENOENT; } end = (unsigned long)ei + item_size; - *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; + *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); *ptr += btrfs_extent_inline_ref_size(*out_type); @@ -1433,8 +1443,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, * <0 on error. */ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, - struct btrfs_extent_item *ei, u32 item_size, - u64 *out_root, u8 *out_level) + struct btrfs_key *key, struct btrfs_extent_item *ei, + u32 item_size, u64 *out_root, u8 *out_level) { int ret; int type; @@ -1445,8 +1455,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, return 1; while (1) { - ret = __get_extent_inline_ref(ptr, eb, ei, item_size, - &eiref, &type); + ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size, + &eiref, &type); if (ret < 0) return ret; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index a910b27..519b49e 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, u64 *flags); int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, - struct btrfs_extent_item *ei, u32 item_size, - u64 *out_root, u8 *out_level); + struct btrfs_key *key, struct btrfs_extent_item *ei, + u32 item_size, u64 *out_root, u8 *out_level); int iterate_extent_inodes(struct btrfs_fs_info *fs_info, u64 extent_item_objectid, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6e9ff8f..6357298 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -474,7 +474,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, pg_index); rcu_read_unlock(); - if (page) { + if (page && !radix_tree_exceptional_entry(page)) { misses++; if (misses > 4) break; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 84d590a..f46ad53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -64,7 +64,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -1780,6 +1779,9 @@ sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); + if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, + &root->fs_info->fs_state))) + btrfs_cleanup_transaction(root); if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && @@ -3620,6 +3622,11 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(fs_info); + /* + * we must make sure there is not any read request to + * submit after we stopping all workers. + */ + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); del_fs_roots(fs_info); @@ -3806,7 +3813,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); - list_del_init(&root->ordered_root); + list_move_tail(&root->ordered_root, + &fs_info->ordered_roots); btrfs_destroy_ordered_extents(root); @@ -3884,24 +3892,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } -static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) -{ - struct btrfs_pending_snapshot *snapshot; - struct list_head splice; - - INIT_LIST_HEAD(&splice); - - list_splice_init(&t->pending_snapshots, &splice); - - while (!list_empty(&splice)) { - snapshot = list_entry(splice.next, - struct btrfs_pending_snapshot, - list); - snapshot->error = -ECANCELED; - list_del_init(&snapshot->list); - } -} - static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; @@ -4005,12 +3995,6 @@ again: if (ret) break; - /* opt_discard */ - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_error_discard_extent(root, start, - end + 1 - start, - NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); @@ -4031,6 +4015,8 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { + btrfs_destroy_ordered_operations(cur_trans, root); + btrfs_destroy_delayed_refs(cur_trans, root); btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, cur_trans->dirty_pages.dirty_bytes); @@ -4038,8 +4024,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); - btrfs_evict_pending_snapshots(cur_trans); - cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); @@ -4063,63 +4047,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; - LIST_HEAD(list); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); - list_splice_init(&root->fs_info->trans_list, &list); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->trans_lock); - - while (!list_empty(&list)) { - t = list_entry(list.next, struct btrfs_transaction, list); - - btrfs_destroy_ordered_operations(t, root); - - btrfs_destroy_all_ordered_extents(root->fs_info); - - btrfs_destroy_delayed_refs(t, root); - - /* - * FIXME: cleanup wait for commit - * We needn't acquire the lock here, because we are during - * the umount, there is no other task which will change it. - */ - t->state = TRANS_STATE_COMMIT_START; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) - wake_up(&root->fs_info->transaction_blocked_wait); - - btrfs_evict_pending_snapshots(t); - - t->state = TRANS_STATE_UNBLOCKED; - smp_mb(); - if (waitqueue_active(&root->fs_info->transaction_wait)) - wake_up(&root->fs_info->transaction_wait); - - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); - - btrfs_destroy_all_delalloc_inodes(root->fs_info); - - btrfs_destroy_marked_extents(root, &t->dirty_pages, - EXTENT_DIRTY); - - btrfs_destroy_pinned_extent(root, - root->fs_info->pinned_extents); - - t->state = TRANS_STATE_COMPLETED; - smp_mb(); - if (waitqueue_active(&t->commit_wait)) - wake_up(&t->commit_wait); + while (!list_empty(&root->fs_info->trans_list)) { + t = list_first_entry(&root->fs_info->trans_list, + struct btrfs_transaction, list); + if (t->state >= TRANS_STATE_COMMIT_START) { + atomic_inc(&t->use_count); + spin_unlock(&root->fs_info->trans_lock); + btrfs_wait_for_commit(root, t->transid); + btrfs_put_transaction(t); + spin_lock(&root->fs_info->trans_lock); + continue; + } + if (t == root->fs_info->running_transaction) { + t->state = TRANS_STATE_COMMIT_DOING; + spin_unlock(&root->fs_info->trans_lock); + /* + * We wait for 0 num_writers since we don't hold a trans + * handle open currently for this transaction. + */ + wait_event(t->writer_wait, + atomic_read(&t->num_writers) == 0); + } else { + spin_unlock(&root->fs_info->trans_lock); + } + btrfs_cleanup_one_transaction(t, root); - atomic_set(&t->use_count, 0); + spin_lock(&root->fs_info->trans_lock); + if (t == root->fs_info->running_transaction) + root->fs_info->running_transaction = NULL; list_del_init(&t->list); - memset(t, 0, sizeof(*t)); - kmem_cache_free(btrfs_transaction_cachep, t); - } + spin_unlock(&root->fs_info->trans_lock); + btrfs_put_transaction(t); + trace_btrfs_transaction_commit(root); + spin_lock(&root->fs_info->trans_lock); + } + spin_unlock(&root->fs_info->trans_lock); + btrfs_destroy_all_ordered_extents(root->fs_info); + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); + btrfs_destroy_all_delalloc_inodes(root->fs_info); mutex_unlock(&root->fs_info->transaction_kthread_mutex); return 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b256ddc..b1c6e49 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3318,10 +3318,9 @@ again: last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); + btrfs_put_block_group(cache); if (err) /* File system offline */ goto out; - - btrfs_put_block_group(cache); } while (1) { @@ -5477,7 +5476,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, update_global_block_rsv(fs_info); } -static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, + const bool return_free_space) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_group_cache *cache = NULL; @@ -5501,7 +5501,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (start < cache->last_byte_to_unpin) { len = min(len, cache->last_byte_to_unpin - start); - btrfs_add_free_space(cache, start, len); + if (return_free_space) + btrfs_add_free_space(cache, start, len); } start += len; @@ -5564,7 +5565,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); - unpin_extent_range(root, start, end); + unpin_extent_range(root, start, end, true); cond_resched(); } @@ -8810,7 +8811,7 @@ out: int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - return unpin_extent_range(root, start, end); + return unpin_extent_range(root, start, end, false); } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 51731b7..7015d90 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1645,6 +1645,7 @@ again: * shortening the size of the delalloc range we're searching */ free_extent_state(cached_state); + cached_state = NULL; if (!loops) { max_bytes = PAGE_CACHE_SIZE; loops = 1; @@ -2311,7 +2312,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -2325,6 +2326,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); + ret = ret < 0 ? ret : -EIO; + mapping_set_error(page->mapping, ret); } return 0; } @@ -2482,6 +2485,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) uptodate = 0; + offset += len; continue; } } @@ -4442,7 +4446,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); } -static void mark_extent_buffer_accessed(struct extent_buffer *eb) +static void mark_extent_buffer_accessed(struct extent_buffer *eb, + struct page *accessed) { unsigned long num_pages, i; @@ -4451,7 +4456,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); - mark_page_accessed(p); + if (p != accessed) + mark_page_accessed(p); } } @@ -4472,7 +4478,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_extent_buffer_accessed(eb, NULL); return eb; } rcu_read_unlock(); @@ -4500,7 +4506,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, spin_unlock(&mapping->private_lock); unlock_page(p); page_cache_release(p); - mark_extent_buffer_accessed(exists); + mark_extent_buffer_accessed(exists, p); goto free_eb; } @@ -4515,7 +4521,6 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, attach_extent_buffer_page(eb, p); spin_unlock(&mapping->private_lock); WARN_ON(PageDirty(p)); - mark_page_accessed(p); eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; @@ -4545,7 +4550,7 @@ again: } spin_unlock(&tree->buffer_lock); radix_tree_preload_end(); - mark_extent_buffer_accessed(exists); + mark_extent_buffer_accessed(exists, NULL); goto free_eb; } /* add one reference for the tree */ @@ -4591,7 +4596,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_extent_buffer_accessed(eb, NULL); return eb; } rcu_read_unlock(); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a4a7a1a..0a38095 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -263,8 +263,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, if (!em) goto out; - if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) - list_move(&em->list, &tree->modified_extents); em->generation = gen; clear_bit(EXTENT_FLAG_PINNED, &em->flags); em->mod_start = em->start; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 4f53159..ced6aa4 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -420,7 +420,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, ret = 0; fail: while (ret < 0 && !list_empty(&tmplist)) { - sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); + sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); list_del(&sums->list); kfree(sums); } @@ -752,7 +752,7 @@ again: found_next = 1; if (ret != 0) goto insert; - slot = 0; + slot = path->slots[0]; } btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 72da4df..ad80dfa 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -426,13 +426,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, struct page *page = prepared_pages[pg]; /* * Copy data from userspace to the current page - * - * Disable pagefault to avoid recursive lock since - * the pages are already locked */ - pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, count); - pagefault_enable(); /* Flush processor's dcache for this page */ flush_dcache_page(page); @@ -476,11 +471,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) for (i = 0; i < num_pages; i++) { /* page checked is some magic around finding pages that * have been modified without going through btrfs_set_page_dirty - * clear it here + * clear it here. There should be no need to mark the pages + * accessed as prepare_pages should have marked them accessed + * in prepare_pages via find_or_create_page() */ ClearPageChecked(pages[i]); unlock_page(pages[i]); - mark_page_accessed(pages[i]); page_cache_release(pages[i]); } } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index b4f9904..5467f84 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -832,7 +832,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, if (!matched) { __btrfs_remove_free_space_cache(ctl); - btrfs_err(fs_info, "block group %llu has wrong amount of free space", + btrfs_warn(fs_info, "block group %llu has wrong amount of free space", block_group->key.objectid); ret = -1; } @@ -844,7 +844,7 @@ out: spin_unlock(&block_group->lock); ret = 0; - btrfs_err(fs_info, "failed to load free space cache for block group %llu", + btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", block_group->key.objectid); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3d03d2e..68f7a1f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -683,6 +683,18 @@ retry: unlock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); + + /* + * we need to redirty the pages if we decide to + * fallback to uncompressed IO, otherwise we + * will not submit these pages down to lower + * layers. + */ + extent_range_redirty_for_io(inode, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1); + goto retry; } goto out_free; @@ -1551,7 +1563,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } - if (*bits & EXTENT_DO_ACCOUNTING) + /* + * We don't reserve metadata space for space cache inodes so we + * don't need to call dellalloc_release_metadata if there is an + * error. + */ + if (*bits & EXTENT_DO_ACCOUNTING && + root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID @@ -2978,6 +2996,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret) { + atomic_dec(&root->orphan_inodes); if (reserve) { clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags); @@ -3027,14 +3046,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (trans && delete_item) - ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - - if (release_rsv) { - btrfs_orphan_release_metadata(inode); + if (delete_item) { atomic_dec(&root->orphan_inodes); + if (trans) + ret = btrfs_del_orphan_item(trans, root, + btrfs_ino(inode)); } + if (release_rsv) + btrfs_orphan_release_metadata(inode); + return ret; } @@ -3527,7 +3548,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * without delay */ if (!btrfs_is_free_space_inode(inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID + && !root->fs_info->log_root_recovering) { btrfs_update_root_times(trans, root); ret = btrfs_delayed_update_inode(trans, root, inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 669eb53..50a06de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1014,7 +1014,7 @@ out: static int cluster_pages_for_defrag(struct inode *inode, struct page **pages, unsigned long start_index, - int num_pages) + unsigned long num_pages) { unsigned long file_end; u64 isize = i_size_read(inode); @@ -1172,8 +1172,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; int extent_thresh = range->extent_thresh; - int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; - int cluster = max_cluster; + unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long cluster = max_cluster; u64 new_align = ~((u64)128 * 1024 - 1); struct page **pages = NULL; @@ -4564,9 +4564,21 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_logical_to_ino(root, argp); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(root, argp); - case BTRFS_IOC_SYNC: - btrfs_sync_fs(file->f_dentry->d_sb, 1); - return 0; + case BTRFS_IOC_SYNC: { + int ret; + + ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); + if (ret) + return ret; + ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); + /* + * The transaction thread may want to do more work, + * namely it pokes the cleaner ktread that will start + * processing uncleaned subvols. + */ + wake_up_process(root->fs_info->transaction_kthread); + return ret; + } case BTRFS_IOC_START_SYNC: return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 26450d8..7b83e0d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -732,7 +732,8 @@ again: err = ret; goto out; } - BUG_ON(!ret || !path1->slots[0]); + ASSERT(ret); + ASSERT(path1->slots[0]); path1->slots[0]--; @@ -742,10 +743,10 @@ again: * the backref was added previously when processing * backref of type BTRFS_TREE_BLOCK_REF_KEY */ - BUG_ON(!list_is_singular(&cur->upper)); + ASSERT(list_is_singular(&cur->upper)); edge = list_entry(cur->upper.next, struct backref_edge, list[LOWER]); - BUG_ON(!list_empty(&edge->list[UPPER])); + ASSERT(list_empty(&edge->list[UPPER])); exist = edge->node[UPPER]; /* * add the upper level block to pending list if we need @@ -827,7 +828,7 @@ again: cur->cowonly = 1; } #else - BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); + ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { #endif if (key.objectid == key.offset) { @@ -836,7 +837,7 @@ again: * backref of this type. */ root = find_reloc_root(rc, cur->bytenr); - BUG_ON(!root); + ASSERT(root); cur->root = root; break; } @@ -864,7 +865,7 @@ again: } else { upper = rb_entry(rb_node, struct backref_node, rb_node); - BUG_ON(!upper->checked); + ASSERT(upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); } list_add_tail(&edge->list[LOWER], &cur->upper); @@ -888,7 +889,7 @@ again: if (btrfs_root_level(&root->root_item) == cur->level) { /* tree root */ - BUG_ON(btrfs_root_bytenr(&root->root_item) != + ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); if (should_ignore_root(root)) list_add(&cur->list, &useless); @@ -923,7 +924,7 @@ again: need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { - BUG_ON(btrfs_root_bytenr(&root->root_item) != + ASSERT(btrfs_root_bytenr(&root->root_item) == lower->bytenr); if (should_ignore_root(root)) list_add(&lower->list, &useless); @@ -972,12 +973,15 @@ again: need_check = false; list_add_tail(&edge->list[UPPER], &list); - } else + } else { + if (upper->checked) + need_check = true; INIT_LIST_HEAD(&edge->list[UPPER]); + } } else { upper = rb_entry(rb_node, struct backref_node, rb_node); - BUG_ON(!upper->checked); + ASSERT(upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); if (!upper->owner) upper->owner = btrfs_header_owner(eb); @@ -1021,7 +1025,7 @@ next: * everything goes well, connect backref nodes and insert backref nodes * into the cache. */ - BUG_ON(!node->checked); + ASSERT(node->checked); cowonly = node->cowonly; if (!cowonly) { rb_node = tree_insert(&cache->rb_root, node->bytenr, @@ -1057,8 +1061,21 @@ next: continue; } - BUG_ON(!upper->checked); - BUG_ON(cowonly != upper->cowonly); + if (!upper->checked) { + /* + * Still want to blow up for developers since this is a + * logic bug. + */ + ASSERT(0); + err = -EINVAL; + goto out; + } + if (cowonly != upper->cowonly) { + ASSERT(0); + err = -EINVAL; + goto out; + } + if (!cowonly) { rb_node = tree_insert(&cache->rb_root, upper->bytenr, &upper->rb_node); @@ -1081,7 +1098,7 @@ next: while (!list_empty(&useless)) { upper = list_entry(useless.next, struct backref_node, list); list_del_init(&upper->list); - BUG_ON(!list_empty(&upper->upper)); + ASSERT(list_empty(&upper->upper)); if (upper == node) node = NULL; if (upper->lowest) { @@ -1114,29 +1131,45 @@ out: if (err) { while (!list_empty(&useless)) { lower = list_entry(useless.next, - struct backref_node, upper); - list_del_init(&lower->upper); + struct backref_node, list); + list_del_init(&lower->list); } - upper = node; - INIT_LIST_HEAD(&list); - while (upper) { - if (RB_EMPTY_NODE(&upper->rb_node)) { - list_splice_tail(&upper->upper, &list); - free_backref_node(cache, upper); - } - - if (list_empty(&list)) - break; - - edge = list_entry(list.next, struct backref_edge, - list[LOWER]); + while (!list_empty(&list)) { + edge = list_first_entry(&list, struct backref_edge, + list[UPPER]); + list_del(&edge->list[UPPER]); list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; upper = edge->node[UPPER]; free_backref_edge(cache, edge); + + /* + * Lower is no longer linked to any upper backref nodes + * and isn't in the cache, we can free it ourselves. + */ + if (list_empty(&lower->upper) && + RB_EMPTY_NODE(&lower->rb_node)) + list_add(&lower->list, &useless); + + if (!RB_EMPTY_NODE(&upper->rb_node)) + continue; + + /* Add this guy's upper edges to the list to proces */ + list_for_each_entry(edge, &upper->upper, list[LOWER]) + list_add_tail(&edge->list[UPPER], &list); + if (list_empty(&upper->upper)) + list_add(&upper->list, &useless); + } + + while (!list_empty(&useless)) { + lower = list_entry(useless.next, + struct backref_node, list); + list_del_init(&lower->list); + free_backref_node(cache, lower); } return ERR_PTR(err); } - BUG_ON(node && node->detached); + ASSERT(!node || !node->detached); return node; } @@ -1383,6 +1416,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct reloc_control *rc = root->fs_info->reloc_ctl; + struct btrfs_block_rsv *rsv; int clear_rsv = 0; int ret; @@ -1396,13 +1430,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) return 0; - if (!trans->block_rsv) { + if (!trans->reloc_reserved) { + rsv = trans->block_rsv; trans->block_rsv = rc->block_rsv; clear_rsv = 1; } reloc_root = create_reloc_root(trans, root, root->root_key.objectid); if (clear_rsv) - trans->block_rsv = NULL; + trans->block_rsv = rsv; ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a18e0e2..0b23100 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -553,8 +553,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { - ret = tree_backref_for_extent(&ptr, eb, ei, item_size, - &ref_root, &ref_level); + ret = tree_backref_for_extent(&ptr, eb, &found_key, ei, + item_size, &ref_root, + &ref_level); printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev %s, " "sector %llu: metadata %s (level %d) in tree " diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 741c839..76736b5 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1547,6 +1547,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root, goto out; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); + if (key.type == BTRFS_ROOT_ITEM_KEY) { + ret = -ENOENT; + goto out; + } *found_inode = key.objectid; *found_type = btrfs_dir_type(path->nodes[0], di); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 25d64e8..069c2fd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { __TRANS_JOIN_NOLOCK), }; -static void put_transaction(struct btrfs_transaction *transaction) +void btrfs_put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { @@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root) wait_event(root->fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || cur_trans->aborted); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -353,6 +353,17 @@ static int may_wait_transaction(struct btrfs_root *root, int type) return 0; } +static inline bool need_reserve_reloc_root(struct btrfs_root *root) +{ + if (!root->fs_info->reloc_ctl || + !root->ref_cows || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + root->reloc_root) + return false; + + return true; +} + static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, enum btrfs_reserve_flush_enum flush) @@ -360,8 +371,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; u64 num_bytes = 0; - int ret; u64 qgroup_reserved = 0; + bool reloc_reserved = false; + int ret; if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); @@ -390,6 +402,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, } num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + /* + * Do the reservation for the relocation root creation + */ + if (unlikely(need_reserve_reloc_root(root))) { + num_bytes += root->nodesize; + reloc_reserved = true; + } + ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, num_bytes, flush); @@ -451,6 +471,7 @@ again: h->delayed_ref_elem.seq = 0; h->type = type; h->allocating_chunk = false; + h->reloc_reserved = false; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); @@ -466,6 +487,7 @@ again: h->transid, num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; + h->reloc_reserved = reloc_reserved; } h->qgroup_reserved = qgroup_reserved; @@ -572,7 +594,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) if (transid <= root->fs_info->last_trans_committed) goto out; - ret = -EINVAL; /* find specified transaction */ spin_lock(&root->fs_info->trans_lock); list_for_each_entry(t, &root->fs_info->trans_list, list) { @@ -588,9 +609,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } } spin_unlock(&root->fs_info->trans_lock); - /* The specified transaction doesn't exist */ - if (!cur_trans) + + /* + * The specified transaction doesn't exist, or we + * raced with btrfs_commit_transaction + */ + if (!cur_trans) { + if (transid > root->fs_info->last_trans_committed) + ret = -EINVAL; goto out; + } } else { /* find newest transaction that is committing | committed */ spin_lock(&root->fs_info->trans_lock); @@ -610,7 +638,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } wait_for_commit(root, cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); out: return ret; } @@ -729,7 +757,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); if (current->journal_info == trans) current->journal_info = NULL; @@ -738,8 +766,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_run_delayed_iputs(root); if (trans->aborted || - test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) + test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { + wake_up_process(info->transaction_kthread); err = -EIO; + } assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -1504,7 +1534,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, if (current->journal_info == trans) current->journal_info = NULL; - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); return 0; } @@ -1548,8 +1578,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); - put_transaction(cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); trace_btrfs_transaction_commit(root); @@ -1665,7 +1695,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); return ret; } @@ -1682,7 +1712,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, prev_trans); - put_transaction(prev_trans); + btrfs_put_transaction(prev_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -1881,8 +1911,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_del_init(&cur_trans->list); spin_unlock(&root->fs_info->trans_lock); - put_transaction(cur_trans); - put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); + btrfs_put_transaction(cur_trans); if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 5c2af84..7657d11 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -92,6 +92,7 @@ struct btrfs_trans_handle { short aborted; short adding_csums; bool allocating_chunk; + bool reloc_reserved; unsigned int type; /* * this root is only needed to validate that the root passed to @@ -166,4 +167,5 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); +void btrfs_put_transaction(struct btrfs_transaction *transaction); #endif diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index fb36731..3e62b57 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -74,6 +74,21 @@ void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, u64 *old_aux, gfp_t gfp_mask); + +/* just like ulist_add_merge() but take a pointer for the aux data */ +static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux, + void **old_aux, gfp_t gfp_mask) +{ +#if BITS_PER_LONG == 32 + u64 old64 = (uintptr_t)*old_aux; + int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask); + *old_aux = (void *)((uintptr_t)old64); + return ret; +#else + return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask); +#endif +} + struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b691f37..7fae00b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1438,6 +1438,22 @@ out: return ret; } +/* + * Function to update ctime/mtime for a given device path. + * Mainly used for ctime/mtime based probe like libblkid. + */ +static void update_dev_time(char *path_name) +{ + struct file *filp; + + filp = filp_open(path_name, O_RDWR, 0); + if (!filp) + return; + file_update_time(filp); + filp_close(filp, NULL); + return; +} + static int btrfs_rm_dev_item(struct btrfs_root *root, struct btrfs_device *device) { @@ -1660,11 +1676,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) struct btrfs_fs_devices *fs_devices; fs_devices = root->fs_info->fs_devices; while (fs_devices) { - if (fs_devices->seed == cur_devices) + if (fs_devices->seed == cur_devices) { + fs_devices->seed = cur_devices->seed; break; + } fs_devices = fs_devices->seed; } - fs_devices->seed = cur_devices->seed; cur_devices->seed = NULL; lock_chunks(root); __btrfs_close_devices(cur_devices); @@ -1690,10 +1707,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) ret = 0; - /* Notify udev that device has changed */ - if (bdev) + if (bdev) { + /* Notify udev that device has changed */ btrfs_kobject_uevent(bdev, KOBJ_CHANGE); + /* Update ctime/mtime for device path for libblkid */ + update_dev_time(device_path); + } + error_brelse: brelse(bh); if (bdev) @@ -1869,7 +1890,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; - fs_devices->total_devices = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -2131,6 +2151,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_commit_transaction(trans, root); } + /* Update ctime/mtime for libblkid */ + update_dev_time(device_path); return ret; error_trans: @@ -6029,10 +6051,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) - device->dev_root = fs_info->dev_root; - mutex_unlock(&fs_devices->device_list_mutex); + while (fs_devices) { + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) + device->dev_root = fs_info->dev_root; + mutex_unlock(&fs_devices->device_list_mutex); + + fs_devices = fs_devices->seed; + } } static void __btrfs_reset_dev_stats(struct btrfs_device *dev) diff --git a/fs/buffer.c b/fs/buffer.c index e0fa2a4..fe82b9d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -227,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits); - page = find_get_page(bd_mapping, index); + page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); if (!page) goto out; @@ -1021,7 +1021,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, bh = page_buffers(page); if (bh->b_size == size) { end_block = init_page_buffers(page, bdev, - index << sizebits, size); + (sector_t)index << sizebits, + size); goto done; } if (!try_to_free_buffers(page)) @@ -1042,7 +1043,8 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - end_block = init_page_buffers(page, bdev, index << sizebits, size); + end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits, + size); spin_unlock(&inode->i_mapping->private_lock); done: ret = (block < end_block) ? 1 : -ENXIO; @@ -1358,12 +1360,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { + /* __find_get_block_slow will mark the page accessed */ bh = __find_get_block_slow(bdev, block); if (bh) bh_lru_install(bh); - } - if (bh) + } else touch_buffer(bh); + return bh; } EXPORT_SYMBOL(__find_get_block); @@ -1475,16 +1478,27 @@ EXPORT_SYMBOL(set_bh_page); /* * Called when truncating a buffer on a page completely. */ + +/* Bits that are cleared during an invalidate */ +#define BUFFER_FLAGS_DISCARD \ + (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \ + 1 << BH_Delay | 1 << BH_Unwritten) + static void discard_buffer(struct buffer_head * bh) { + unsigned long b_state, b_state_old; + lock_buffer(bh); clear_buffer_dirty(bh); bh->b_bdev = NULL; - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); + b_state = bh->b_state; + for (;;) { + b_state_old = cmpxchg(&bh->b_state, b_state, + (b_state & ~BUFFER_FLAGS_DISCARD)); + if (b_state_old == b_state) + break; + b_state = b_state_old; + } unlock_buffer(bh); } @@ -2067,6 +2081,7 @@ int generic_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = mapping->host; + loff_t old_size = inode->i_size; int i_size_changed = 0; copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -2086,6 +2101,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); /* * Don't mark the inode dirty under page lock. First, it unnecessarily * makes the holding time of page lock longer. Second, it forces lock @@ -2303,6 +2320,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, err = 0; balance_dirty_pages_ratelimited(mapping); + + if (unlikely(fatal_signal_pending(current))) { + err = -EINTR; + goto out; + } } /* page covers the boundary, find the boundary offset */ diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ec3ba43..f757dff 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -672,7 +672,7 @@ static int ceph_writepages_start(struct address_space *mapping, int rc = 0; unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; - int do_sync; + int do_sync = 0; u64 truncate_size, snap_size; u32 truncate_seq; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 868b61d..ea3de8b 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -103,7 +103,7 @@ static unsigned fpos_off(loff_t p) /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on - * d_u.d_child when we initially get results back from the MDS, and + * d_child when we initially get results back from the MDS, and * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * @@ -138,11 +138,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx) p = parent->d_subdirs.prev; dout(" initial p %p/%p\n", p->prev, p->next); } else { - p = last->d_u.d_child.prev; + p = last->d_child.prev; } more: - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); while (1) { dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next, @@ -164,7 +164,7 @@ more: !dentry->d_inode ? " null" : ""); spin_unlock(&dentry->d_lock); p = p->prev; - dentry = list_entry(p, struct dentry, d_u.d_child); + dentry = list_entry(p, struct dentry, d_child); di = ceph_dentry(dentry); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8549a48..74a479e 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -880,9 +880,9 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dir->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &dir->d_subdirs); + list_move(&dn->d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); + dn->d_child.prev, dn->d_child.next); spin_unlock(&dn->d_lock); spin_unlock(&dir->d_lock); } @@ -1309,7 +1309,7 @@ retry_lookup: /* reorder parent's d_subdirs */ spin_lock(&parent->d_lock); spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &parent->d_subdirs); + list_move(&dn->d_child, &parent->d_subdirs); spin_unlock(&dn->d_lock); spin_unlock(&parent->d_lock); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7889015..6f11613 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1420,15 +1420,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, dout("discard_cap_releases mds%d\n", session->s_mds); spin_lock(&session->s_cap_lock); - /* zero out the in-progress message */ - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - head = msg->front.iov_base; - num = le32_to_cpu(head->num); - dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); - head->num = cpu_to_le32(0); - msg->front.iov_len = sizeof(*head); - session->s_num_cap_releases += num; + if (!list_empty(&session->s_cap_releases)) { + /* zero out the in-progress message */ + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + head = msg->front.iov_base; + num = le32_to_cpu(head->num); + dout("discard_cap_releases mds%d %p %u\n", + session->s_mds, msg, num); + head->num = cpu_to_le32(0); + msg->front.iov_len = sizeof(*head); + session->s_num_cap_releases += num; + } /* requeue completed messages */ while (!list_empty(&session->s_cap_releases_done)) { diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0227b45..15e9505 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -290,7 +290,8 @@ int cifsConvertToUTF16(__le16 *target, const char *source, int srclen, const struct nls_table *cp, int mapChars) { - int i, j, charlen; + int i, charlen; + int j = 0; char src_char; __le16 dst_char; wchar_t tmp; @@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, if (!mapChars) return cifs_strtoUTF16(target, source, PATH_MAX, cp); - for (i = 0, j = 0; i < srclen; j++) { + for (i = 0; i < srclen; j++) { src_char = source[i]; charlen = 1; switch (src_char) { case 0: - put_unaligned(0, &target[j]); goto ctoUTF16_out; case ':': dst_char = cpu_to_le16(UNI_COLON); @@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, } ctoUTF16_out: + put_unaligned(0, &target[j]); /* Null terminate target unicode string */ return j; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 77fc5e1..b9f5709 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -87,10 +87,6 @@ extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; -#ifdef CONFIG_CIFS_SMB2 -__u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; -#endif - /* * Bumps refcount for cifs super block. * Note that it should be only called if a referece to VFS super block is @@ -253,6 +249,11 @@ cifs_alloc_inode(struct super_block *sb) cifs_set_oplock_level(cifs_inode, 0); cifs_inode->delete_pending = false; cifs_inode->invalid_mapping = false; + clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags); + clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags); + clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags); + spin_lock_init(&cifs_inode->writers_lock); + cifs_inode->writers = 0; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ cifs_inode->server_eof = 0; cifs_inode->uniqueid = 0; @@ -731,19 +732,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct inode *inode = file_inode(iocb->ki_filp); + struct cifsInodeInfo *cinode = CIFS_I(inode); ssize_t written; int rc; + written = cifs_get_writer(cinode); + if (written) + return written; + written = generic_file_aio_write(iocb, iov, nr_segs, pos); if (CIFS_CACHE_WRITE(CIFS_I(inode))) - return written; + goto out; rc = filemap_fdatawrite(inode->i_mapping); if (rc) cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", rc, inode); +out: + cifs_put_writer(cinode); return written; } @@ -1180,10 +1188,6 @@ init_cifs(void) spin_lock_init(&cifs_file_list_lock); spin_lock_init(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_SMB2 - get_random_bytes(cifs_client_guid, SMB2_CLIENT_GUID_SIZE); -#endif - if (cifs_max_pending < 2) { cifs_max_pending = 2; cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index db95dca..cee6a79 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -70,11 +70,6 @@ #define SERVER_NAME_LENGTH 40 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) -/* used to define string lengths for reversing unicode strings */ -/* (256+1)*2 = 514 */ -/* (max path length + 1 for null) * 2 for unicode */ -#define MAX_NAME 514 - /* SMB echo "timeout" -- FIXME: tunable? */ #define SMB_ECHO_INTERVAL (60 * HZ) @@ -228,6 +223,8 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); + void (*downgrade_oplock)(struct TCP_Server_Info *, + struct cifsInodeInfo *, bool); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); @@ -389,6 +386,9 @@ struct smb_version_operations { const char *, u32 *); int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, int); + int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); + /* check if we need to issue closedir */ + bool (*dir_needs_close)(struct cifsFileInfo *); }; struct smb_version_values { @@ -544,6 +544,7 @@ struct TCP_Server_Info { int echo_credits; /* echo reserved slots */ int oplock_credits; /* oplock break reserved slots */ bool echoes:1; /* enable echoes */ + __u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */ #endif u16 dialect; /* dialect index that server chose */ bool oplocks:1; /* enable oplocks */ @@ -1072,6 +1073,12 @@ struct cifsInodeInfo { unsigned int epoch; /* used to track lease state changes */ bool delete_pending; /* DELETE_ON_CLOSE is set */ bool invalid_mapping; /* pagecache is invalid */ + unsigned long flags; +#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ +#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ +#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */ + spinlock_t writers_lock; + unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ u64 server_eof; /* current file size on server -- protected by i_lock */ u64 uniqueid; /* server inode number */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 45ccfbd..c6bfe5b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset); extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); +extern int cifs_get_writer(struct cifsInodeInfo *cinode); +extern void cifs_put_writer(struct cifsInodeInfo *cinode); +extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode); extern int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a279ffc..89b5519 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2144,6 +2144,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info) sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr, sizeof(tcp_ses->dstaddr)); +#ifdef CONFIG_CIFS_SMB2 + get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE); +#endif /* * at this point we are the only ones with the pointer * to the struct since the kernel thread not created yet diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 81476e1..f971527 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -762,7 +762,7 @@ int cifs_closedir(struct inode *inode, struct file *file) cifs_dbg(FYI, "Freeing private data in close dir\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); if (server->ops->close_dir) @@ -2590,8 +2590,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, if (rc > 0) { ssize_t err; - err = generic_write_sync(file, pos, rc); - if (err < 0 && rc > 0) + err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) rc = err; } @@ -2611,12 +2611,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); ssize_t written; + written = cifs_get_writer(cinode); + if (written) + return written; + if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) - && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) - return generic_file_aio_write(iocb, iov, nr_segs, pos); - return cifs_writev(iocb, iov, nr_segs, pos); + && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + written = generic_file_aio_write( + iocb, iov, nr_segs, pos); + goto out; + } + written = cifs_writev(iocb, iov, nr_segs, pos); + goto out; } /* * For non-oplocked files in strict cache mode we need to write the data @@ -2636,6 +2644,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, inode); cinode->oplock = 0; } +out: + cifs_put_writer(cinode); return written; } @@ -2837,7 +2847,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } static ssize_t @@ -3260,7 +3270,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, total_read += result; } - return total_read > 0 ? total_read : result; + return total_read > 0 && result != -EAGAIN ? total_read : result; } static int cifs_readpages(struct file *file, struct address_space *mapping, @@ -3647,6 +3657,13 @@ static int cifs_launder_page(struct page *page) return rc; } +static int +cifs_pending_writers_wait(void *unused) +{ + schedule(); + return 0; +} + void cifs_oplock_break(struct work_struct *work) { struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, @@ -3654,8 +3671,15 @@ void cifs_oplock_break(struct work_struct *work) struct inode *inode = cfile->dentry->d_inode; struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; int rc = 0; + wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, + cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE); + + server->ops->downgrade_oplock(server, cinode, + test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); + if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", @@ -3692,6 +3716,7 @@ void cifs_oplock_break(struct work_struct *work) cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } + cifs_done_oplock_break(cinode); } const struct address_space_operations cifs_addr_ops = { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 5f8bdff..ab9f992 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -874,7 +874,7 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; @@ -1682,13 +1682,22 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry, unlink_target: /* Try unlinking the target dentry if it's not negative */ if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { - tmprc = cifs_unlink(target_dir, target_dentry); + if (S_ISDIR(target_dentry->d_inode->i_mode)) + tmprc = cifs_rmdir(target_dir, target_dentry); + else + tmprc = cifs_unlink(target_dir, target_dentry); if (tmprc) goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); } + /* force revalidate to go get info when needed */ + CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0; + + source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime = + target_dir->i_mtime = current_fs_time(source_dir->i_sb); + cifs_rename_exit: kfree(info_buf_source); kfree(from_name); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 138a011..912a52e 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -472,8 +472,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) cifs_dbg(FYI, "file id match, oplock break\n"); pCifsInode = CIFS_I(netfile->dentry->d_inode); - cifs_set_oplock_level(pCifsInode, - pSMB->OplockLevel ? OPLOCK_READ : 0); + set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, + &pCifsInode->flags); + + /* + * Set flag if the server downgrades the oplock + * to L2 else clear. + */ + if (pSMB->OplockLevel) + set_bit( + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &pCifsInode->flags); + else + clear_bit( + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &pCifsInode->flags); + queue_work(cifsiod_wq, &netfile->oplock_break); netfile->oplock_break_cancelled = false; @@ -557,6 +571,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) cinode->oplock = 0; } +static int +cifs_oplock_break_wait(void *unused) +{ + schedule(); + return signal_pending(current) ? -ERESTARTSYS : 0; +} + +/* + * We wait for oplock breaks to be processed before we attempt to perform + * writes. + */ +int cifs_get_writer(struct cifsInodeInfo *cinode) +{ + int rc; + +start: + rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, + cifs_oplock_break_wait, TASK_KILLABLE); + if (rc) + return rc; + + spin_lock(&cinode->writers_lock); + if (!cinode->writers) + set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); + cinode->writers++; + /* Check to see if we have started servicing an oplock break */ + if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) { + cinode->writers--; + if (cinode->writers == 0) { + clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); + } + spin_unlock(&cinode->writers_lock); + goto start; + } + spin_unlock(&cinode->writers_lock); + return 0; +} + +void cifs_put_writer(struct cifsInodeInfo *cinode) +{ + spin_lock(&cinode->writers_lock); + cinode->writers--; + if (cinode->writers == 0) { + clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags); + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS); + } + spin_unlock(&cinode->writers_lock); +} + +void cifs_done_oplock_break(struct cifsInodeInfo *cinode) +{ + clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags); + wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK); +} + bool backup_cred(struct cifs_sb_info *cifs_sb) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5940eca..e327a92 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -593,11 +593,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, /* close and restart search */ cifs_dbg(FYI, "search backing up - close and restart search\n"); spin_lock(&cifs_file_list_lock); - if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) { + if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - if (server->ops->close) - server->ops->close(xid, tcon, &cfile->fid); + if (server->ops->close_dir) + server->ops->close_dir(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); if (cfile->srch_inf.ntwrk_buf_start) { diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index e6ed0dc..09b0323 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) return 0; } +static void +cifs_downgrade_oplock(struct TCP_Server_Info *server, + struct cifsInodeInfo *cinode, bool set_level2) +{ + if (set_level2) + cifs_set_oplock_level(cinode, OPLOCK_READ); + else + cifs_set_oplock_level(cinode, 0); +} + static bool cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, char *buf, int malformed) @@ -937,6 +947,12 @@ cifs_is_read_op(__u32 oplock) return oplock == OPLOCK_READ; } +static bool +cifs_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -957,6 +973,7 @@ struct smb_version_operations smb1_operations = { .clear_stats = cifs_clear_stats, .print_stats = cifs_print_stats, .is_oplock_break = is_valid_oplock_break, + .downgrade_oplock = cifs_downgrade_oplock, .check_trans2 = cifs_check_trans2, .need_neg = cifs_need_neg, .negotiate = cifs_negotiate, @@ -1003,6 +1020,7 @@ struct smb_version_operations smb1_operations = { .push_mand_locks = cifs_push_mandatory_locks, .query_mf_symlink = open_query_close_cifs_symlink, .is_read_op = cifs_is_read_op, + .dir_needs_close = cifs_dir_needs_close, #ifdef CONFIG_CIFS_XATTR .query_all_EAs = CIFSSMBQAllEAs, .set_EA = CIFSSMBSetEA, diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 3f17b45..4599294 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; } - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 84c012a..215f8d3 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, *adjust_tz = false; *symlink = false; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7c2f45c..4768cf8 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"}, {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"}, {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"}, - {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"}, + {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"}, {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"}, {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"}, {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"}, @@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"}, {STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"}, {STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"}, - {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"}, + {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"}, {STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"}, {STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"}, {STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"}, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index fb39662..b8021fd 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) else cfile->oplock_break_cancelled = false; - server->ops->set_oplock_level(cinode, - rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, - 0, NULL); + set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, + &cinode->flags); + + /* + * Set flag if the server downgrades the oplock + * to L2 else clear. + */ + if (rsp->OplockLevel) + set_bit( + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &cinode->flags); + else + clear_bit( + CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, + &cinode->flags); queue_work(cifsiod_wq, &cfile->oplock_break); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 027a0c6..6f79cd8 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -251,7 +251,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_file_all_info *smb2_data; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; @@ -646,6 +646,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, } static void +smb2_downgrade_oplock(struct TCP_Server_Info *server, + struct cifsInodeInfo *cinode, bool set_level2) +{ + if (set_level2) + server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II, + 0, NULL); + else + server->ops->set_oplock_level(cinode, 0, 0, NULL); +} + +static void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache) { @@ -832,6 +843,12 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch) return le32_to_cpu(lc->lcontext.LeaseState); } +static bool +smb2_dir_needs_close(struct cifsFileInfo *cfile) +{ + return !cfile->invalidHandle; +} + struct smb_version_operations smb20_operations = { .compare_fids = smb2_compare_fids, .setup_request = smb2_setup_request, @@ -851,6 +868,7 @@ struct smb_version_operations smb20_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -901,6 +919,7 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb21_operations = { @@ -922,6 +941,7 @@ struct smb_version_operations smb21_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -972,6 +992,7 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_operations smb30_operations = { @@ -994,6 +1015,7 @@ struct smb_version_operations smb30_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, .negotiate_wsize = smb2_negotiate_wsize, @@ -1045,6 +1067,8 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, + .validate_negotiate = smb3_validate_negotiate, + .dir_needs_close = smb2_dir_needs_close, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 06d29e3..1f096f6 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -375,7 +375,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->Capabilities = cpu_to_le32(ses->server->vals->req_capabilities); - memcpy(req->ClientGUID, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); + /* ClientGUID must be zero for SMB2.02 dialect */ + if (ses->server->vals->protocol_id == SMB20_PROT_ID) + memset(req->ClientGUID, 0, SMB2_CLIENT_GUID_SIZE); + else + memcpy(req->ClientGUID, server->client_guid, + SMB2_CLIENT_GUID_SIZE); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ @@ -456,6 +461,82 @@ neg_exit: return rc; } +int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) +{ + int rc = 0; + struct validate_negotiate_info_req vneg_inbuf; + struct validate_negotiate_info_rsp *pneg_rsp; + u32 rsplen; + + cifs_dbg(FYI, "validate negotiate\n"); + + /* + * validation ioctl must be signed, so no point sending this if we + * can not sign it. We could eventually change this to selectively + * sign just this, the first and only signed request on a connection. + * This is good enough for now since a user who wants better security + * would also enable signing on the mount. Having validation of + * negotiate info for signed connections helps reduce attack vectors + */ + if (tcon->ses->server->sign == false) + return 0; /* validation requires signing */ + + vneg_inbuf.Capabilities = + cpu_to_le32(tcon->ses->server->vals->req_capabilities); + memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, + SMB2_CLIENT_GUID_SIZE); + + if (tcon->ses->sign) + vneg_inbuf.SecurityMode = + cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); + else if (global_secflags & CIFSSEC_MAY_SIGN) + vneg_inbuf.SecurityMode = + cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); + else + vneg_inbuf.SecurityMode = 0; + + vneg_inbuf.DialectCount = cpu_to_le16(1); + vneg_inbuf.Dialects[0] = + cpu_to_le16(tcon->ses->server->vals->protocol_id); + + rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, + FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, + (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), + (char **)&pneg_rsp, &rsplen); + + if (rc != 0) { + cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); + return -EIO; + } + + if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { + cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); + return -EIO; + } + + /* check validate negotiate info response matches what we got earlier */ + if (pneg_rsp->Dialect != + cpu_to_le16(tcon->ses->server->vals->protocol_id)) + goto vneg_out; + + if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) + goto vneg_out; + + /* do not validate server guid because not saved at negprot time yet */ + + if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND | + SMB2_LARGE_FILES) != tcon->ses->server->capabilities) + goto vneg_out; + + /* validate negotiate successful */ + cifs_dbg(FYI, "validate negotiate info successful\n"); + return 0; + +vneg_out: + cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); + return -EIO; +} + int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -821,6 +902,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); + if (tcon->ses->server->ops->validate_negotiate) + rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); tcon_exit: free_rsp_buf(resp_buftype, rsp); kfree(unc_path); @@ -829,7 +912,8 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - tcon->bad_network_name = true; + if (tcon) + tcon->bad_network_name = true; } goto tcon_exit; } @@ -1002,6 +1086,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, int rc = 0; unsigned int num_iovecs = 2; __u32 file_attributes = 0; + char *dhc_buf = NULL, *lc_buf = NULL; cifs_dbg(FYI, "create/open\n"); @@ -1068,6 +1153,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, kfree(copy_path); return rc; } + lc_buf = iov[num_iovecs-1].iov_base; } if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { @@ -1082,9 +1168,10 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc) { cifs_small_buf_release(req); kfree(copy_path); - kfree(iov[num_iovecs-1].iov_base); + kfree(lc_buf); return rc; } + dhc_buf = iov[num_iovecs-1].iov_base; } rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); @@ -1116,6 +1203,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, *oplock = rsp->OplockLevel; creat_exit: kfree(copy_path); + kfree(lc_buf); + kfree(dhc_buf); free_rsp_buf(resp_buftype, rsp); return rc; } @@ -1400,7 +1489,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_ALL_INFORMATION, - sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info) + PATH_MAX * 2, sizeof(struct smb2_file_all_info), data); } @@ -1995,6 +2084,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base; if (rc) { + if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) { + srch_inf->endOfSearch = true; + rc = 0; + } cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE); goto qdir_exit; } @@ -2032,11 +2125,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, else cifs_dbg(VFS, "illegal search buffer type\n"); - if (rsp->hdr.Status == STATUS_NO_MORE_FILES) - srch_inf->endOfSearch = 1; - else - srch_inf->endOfSearch = 0; - return rc; qdir_exit: diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index b83d011..6133a4e 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -166,8 +166,6 @@ struct smb2_symlink_err_rsp { #define SMB2_CLIENT_GUID_SIZE 16 -extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; - struct smb2_negotiate_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 36 */ @@ -546,13 +544,19 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; -/* Response and Request are the same format */ -struct validate_negotiate_info { +struct validate_negotiate_info_req { __le32 Capabilities; __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialect[1]; + __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ +} __packed; + +struct validate_negotiate_info_rsp { + __le32 Capabilities; + __u8 Guid[SMB2_CLIENT_GUID_SIZE]; + __le16 SecurityMode; + __le16 Dialect; /* Dialect in use for the connection */ } __packed; #define RSS_CAPABLE 0x00000001 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 7db5db0..d18b19e 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -158,5 +158,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_lock_element *buf); extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, __u8 *lease_key, const __le32 lease_state); +extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a4b2391f..0e538b5 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ -#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ +#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* Perform server-side data movement */ #define FSCTL_SRV_COPYCHUNK 0x001440F2 #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 1da168c..9bc1147 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag) struct dentry *de; spin_lock(&parent->d_lock); - list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(de, &parent->d_subdirs, d_child) { /* don't know what to do with negative dentries */ if (de->d_inode ) coda_flag_inode(de->d_inode, flag); diff --git a/fs/coredump.c b/fs/coredump.c index 9bdeca1..88adbdd 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -74,10 +74,15 @@ static int expand_corename(struct core_name *cn, int size) static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) { int free, need; + va_list arg_copy; again: free = cn->size - cn->used; - need = vsnprintf(cn->corename + cn->used, free, fmt, arg); + + va_copy(arg_copy, arg); + need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy); + va_end(arg_copy); + if (need < free) { cn->used += need; return 0; @@ -302,7 +307,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(nr < 0)) return nr; - tsk->flags = PF_DUMPCORE; + tsk->flags |= PF_DUMPCORE; if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index e501ac3..2f6cfca 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -179,8 +179,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i struct page *page = NULL; if (blocknr + i < devsize) { - page = read_mapping_page_async(mapping, blocknr + i, - NULL); + page = read_mapping_page(mapping, blocknr + i, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; diff --git a/fs/dcache.c b/fs/dcache.c index 2f9d6f3..ed35215 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -45,7 +45,7 @@ /* * Usage: * dcache->d_inode->i_lock protects: - * - i_dentry, d_alias, d_inode of aliases + * - i_dentry, d_u.d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_anon bl list spinlock protects: @@ -60,7 +60,7 @@ * - d_unhashed() * - d_parent and d_subdirs * - childrens' d_child and d_parent - * - d_alias, d_inode + * - d_u.d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock @@ -126,8 +126,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -138,8 +136,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + return dentry_hashtable + hash_32(hash, d_hash_shift); } /* Statistics gathering. */ @@ -272,22 +269,14 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } -/* - * no locks, please. - */ -static void d_free(struct dentry *dentry) +static void dentry_free(struct dentry *dentry) { - BUG_ON((int)dentry->d_lockref.count > 0); - this_cpu_dec(nr_dentry); - if (dentry->d_op && dentry->d_op->d_release) - dentry->d_op->d_release(dentry); - + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -321,7 +310,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -345,7 +334,7 @@ static void dentry_unlink_inode(struct dentry * dentry) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -434,77 +423,6 @@ static void dentry_lru_add(struct dentry *dentry) d_lru_add(dentry); } -/* - * Remove a dentry with references from the LRU. - * - * If we are on the shrink list, then we can get to try_prune_one_dentry() and - * lose our last reference through the parent walk. In this case, we need to - * remove ourselves from the shrink list, not the LRU. - */ -static void dentry_lru_del(struct dentry *dentry) -{ - if (dentry->d_flags & DCACHE_LRU_LIST) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) - return d_shrink_del(dentry); - d_lru_del(dentry); - } -} - -/** - * d_kill - kill dentry and return parent - * @dentry: dentry to kill - * @parent: parent dentry - * - * The dentry must already be unhashed and removed from the LRU. - * - * If this is the root of the dentry tree, return NULL. - * - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by - * d_kill. - */ -static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) - __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dentry->d_inode->i_lock) -{ - list_del(&dentry->d_u.d_child); - /* - * Inform try_to_ascend() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; - if (parent) - spin_unlock(&parent->d_lock); - dentry_iput(dentry); - /* - * dentry_iput drops the locks, at which point nobody (except - * transient RCU lookups) can reach this dentry. - */ - d_free(dentry); - return parent; -} - -/* - * Unhash a dentry without inserting an RCU walk barrier or checking that - * dentry->d_lock is locked. The caller must take care of that, if - * appropriate. - */ -static void __d_shrink(struct dentry *dentry) -{ - if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); - } -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -523,7 +441,16 @@ static void __d_shrink(struct dentry *dentry) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - __d_shrink(dentry); + struct hlist_bl_head *b; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } } @@ -537,37 +464,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; - struct dentry *parent; - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_chill(); - } - return dentry; /* try again with same dentry */ - } - if (IS_ROOT(dentry)) - parent = NULL; - else +struct dentry *parent = NULL; + bool can_free = true; + if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -581,10 +483,105 @@ relock: if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) dentry->d_op->d_prune(dentry); - dentry_lru_del(dentry); + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) + d_lru_del(dentry); + } /* if it was on the hash then remove it */ __d_drop(dentry); - return d_kill(dentry, parent); + __list_del_entry(&dentry->d_child); + /* + * Inform d_walk() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (parent) + spin_unlock(&parent->d_lock); + dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + BUG_ON((int)dentry->d_lockref.count > 0); + this_cpu_dec(nr_dentry); + if (dentry->d_op && dentry->d_op->d_release) + dentry->d_op->d_release(dentry); + + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + dentry->d_flags |= DCACHE_MAY_FREE; + can_free = false; + } + spin_unlock(&dentry->d_lock); + if (likely(can_free)) + dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry *dentry_kill(struct dentry *dentry) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); + return parent; + +failed: + spin_unlock(&dentry->d_lock); + cpu_chill(); + return dentry; /* try again with same dentry */ +} + +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *parent = dentry->d_parent; + if (IS_ROOT(dentry)) + return NULL; + if (unlikely((int)dentry->d_lockref.count < 0)) + return NULL; + if (likely(spin_trylock(&parent->d_lock))) + return parent; + rcu_read_lock(); + spin_unlock(&dentry->d_lock); +again: + parent = ACCESS_ONCE(dentry->d_parent); + spin_lock(&parent->d_lock); + /* + * We can't blindly lock dentry until we are sure + * that we won't violate the locking order. + * Any changes of dentry->d_parent must have + * been done with parent->d_lock held, so + * spin_lock() above is enough of a barrier + * for checking if it's still our child. + */ + if (unlikely(parent != dentry->d_parent)) { + spin_unlock(&parent->d_lock); + goto again; + } + rcu_read_unlock(); + if (parent != dentry) + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + else + parent = NULL; + return parent; } /* @@ -640,7 +637,7 @@ repeat: return; kill_it: - dentry = dentry_kill(dentry, 1); + dentry = dentry_kill(dentry); if (dentry) goto repeat; } @@ -776,7 +773,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon) again: discon_alias = NULL; - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -829,7 +826,7 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; restart: spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) { /* @@ -853,64 +850,15 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -/* - * Try to throw away a dentry - free the inode, dput the parent. - * Requires dentry->d_lock is held, and dentry->d_count == 0. - * Releases dentry->d_lock. - * - * This may fail if locks cannot be acquired no problem, just try again. - */ -static struct dentry * try_prune_one_dentry(struct dentry *dentry) - __releases(dentry->d_lock) -{ - struct dentry *parent; - - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - * if it returns the same dentry, trylocks failed. In either - * case, just loop again. - * - * Otherwise, we need to prune ancestors too. This is necessary - * to prevent quadratic behavior of shrink_dcache_parent(), but - * is also expected to be beneficial in reducing dentry cache - * fragmentation. - */ - if (!parent) - return NULL; - if (parent == dentry) - return dentry; - - /* Prune ancestors. */ - dentry = parent; - while (dentry) { - if (lockref_put_or_lock(&dentry->d_lockref)) - return NULL; - dentry = dentry_kill(dentry, 1); - } - return NULL; -} - static void shrink_dentry_list(struct list_head *list) { - struct dentry *dentry; + struct dentry *dentry, *parent; - rcu_read_lock(); - for (;;) { - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); - if (&dentry->d_lru == list) - break; /* empty */ - - /* - * Get the dentry lock, and re-verify that the dentry is - * this on the shrinking list. If it is, we know that - * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. - */ + while (!list_empty(list)) { + struct inode *inode; + dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); - if (dentry != list_entry(list->prev, struct dentry, d_lru)) { - spin_unlock(&dentry->d_lock); - continue; - } + parent = lock_parent(dentry); /* * The dispose list is isolated and dentries are not accounted @@ -923,30 +871,63 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if (dentry->d_lockref.count) { + if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); continue; } - rcu_read_unlock(); - /* - * If 'try_to_prune()' returns a dentry, it will - * be the same one we passed in, and d_lock will - * have been held the whole time, so it will not - * have been added to any other lists. We failed - * to get the inode lock. - * - * We just add it back to the shrink list. - */ - dentry = try_prune_one_dentry(dentry); - rcu_read_lock(); - if (dentry) { + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + if (can_free) + dentry_free(dentry); + continue; + } + + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + continue; + } + + __dentry_kill(dentry); + + /* + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also + * expected to be beneficial in reducing dentry cache + * fragmentation. + */ + dentry = parent; + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { + parent = lock_parent(dentry); + if (dentry->d_lockref.count != 1) { + dentry->d_lockref.count--; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + break; + } + inode = dentry->d_inode; /* can't be NULL */ + if (unlikely(!spin_trylock(&inode->i_lock))) { + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + cpu_chill(); + continue; + } + __dentry_kill(dentry); + dentry = parent; } } - rcu_read_unlock(); } static enum lru_status @@ -1076,144 +1057,6 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); -/* - * destroy a single subtree of dentries for unmount - * - see the comments on shrink_dcache_for_umount() for a description of the - * locking - */ -static void shrink_dcache_for_umount_subtree(struct dentry *dentry) -{ - struct dentry *parent; - - BUG_ON(!IS_ROOT(dentry)); - - for (;;) { - /* descend to the first leaf in the current subtree */ - while (!list_empty(&dentry->d_subdirs)) - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - - /* consume the dentries from this leaf up through its parents - * until we find one with children or run out altogether */ - do { - struct inode *inode; - - /* - * inform the fs that this dentry is about to be - * unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - dentry_lru_del(dentry); - __d_shrink(dentry); - - if (dentry->d_lockref.count != 0) { - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - dentry->d_lockref.count, - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } - - if (IS_ROOT(dentry)) { - parent = NULL; - list_del(&dentry->d_u.d_child); - } else { - parent = dentry->d_parent; - parent->d_lockref.count--; - list_del(&dentry->d_u.d_child); - } - - inode = dentry->d_inode; - if (inode) { - dentry->d_inode = NULL; - hlist_del_init(&dentry->d_alias); - if (dentry->d_op && dentry->d_op->d_iput) - dentry->d_op->d_iput(dentry, inode); - else - iput(inode); - } - - d_free(dentry); - - /* finished when we fall off the top of the tree, - * otherwise we ascend to the parent and move to the - * next sibling if there is one */ - if (!parent) - return; - dentry = parent; - } while (list_empty(&dentry->d_subdirs)); - - dentry = list_entry(dentry->d_subdirs.next, - struct dentry, d_u.d_child); - } -} - -/* - * destroy the dentries attached to a superblock on unmounting - * - we don't need to use dentry->d_lock because: - * - the superblock is detached from all mountings and open files, so the - * dentry trees will not be rearranged by the VFS - * - s_umount is write-locked, so the memory pressure shrinker will ignore - * any dentries belonging to this superblock that it comes across - * - the filesystem itself is no longer permitted to rearrange the dentries - * in this superblock - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - dentry->d_lockref.count--; - shrink_dcache_for_umount_subtree(dentry); - - while (!hlist_bl_empty(&sb->s_anon)) { - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - shrink_dcache_for_umount_subtree(dentry); - } -} - -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1268,7 +1111,7 @@ repeat: resume: while (next != &this_parent->d_subdirs) { struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -1300,18 +1143,31 @@ resume: /* * All done at this level ... ascend and resume the search. */ + rcu_read_lock(); +ascend: if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, seq); - if (!this_parent) + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* might go back up the wrong parent if we have had a rename. */ + if (need_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_u.d_child.next; + next = child->d_child.next; + while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + next = next->next; + } + rcu_read_unlock(); goto resume; } - if (need_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); + if (need_seqretry(&rename_lock, seq)) goto rename_retry; - } + rcu_read_unlock(); if (finish) finish(data); @@ -1321,6 +1177,9 @@ out_unlock: return; rename_retry: + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + BUG_ON(seq & 1); if (!retry) return; seq = 1; @@ -1422,34 +1281,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - /* - * move only zero ref count dentries to the dispose list. - * - * Those which are presently on the shrink list, being processed - * by shrink_dentry_list(), shouldn't be moved. Otherwise the - * loop in shrink_dcache_parent() might not make any progress - * and loop forever. - */ - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* - * We can't use d_lru_shrink_move() because we - * need to get the global LRU lock and do the - * LRU accounting. - */ - d_lru_del(dentry); - d_shrink_add(dentry, &data->dispose); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; - ret = D_WALK_NORETRY; + } else { + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + if (!dentry->d_lockref.count) { + d_shrink_add(dentry, &data->dispose); + data->found++; + } } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (data->found && need_resched()) - ret = D_WALK_QUIT; + if (!list_empty(&data->dispose)) + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } @@ -1479,6 +1327,56 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); +static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) +{ + /* it has busy descendents; complain about those instead */ + if (!list_empty(&dentry->d_subdirs)) + return D_WALK_CONTINUE; + + /* root with refcount 1 is fine */ + if (dentry == _data && dentry->d_lockref.count == 1) + return D_WALK_CONTINUE; + + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " + " still in use (%d) [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + WARN_ON(1); + return D_WALK_CONTINUE; +} + +static void do_one_tree(struct dentry *dentry) +{ + shrink_dcache_parent(dentry); + d_walk(dentry, dentry, umount_check, NULL); + d_drop(dentry); + dput(dentry); +} + +/* + * destroy the dentries attached to a superblock on unmounting + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); + + dentry = sb->s_root; + sb->s_root = NULL; + do_one_tree(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); + do_one_tree(dentry); + } +} + static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1601,8 +1499,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_HLIST_NODE(&dentry->d_alias); - INIT_LIST_HEAD(&dentry->d_u.d_child); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); this_cpu_inc(nr_dentry); @@ -1632,7 +1530,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) */ __dget_dlock(parent); dentry->d_parent = parent; - list_add(&dentry->d_u.d_child, &parent->d_subdirs); + list_add(&dentry->d_child, &parent->d_subdirs); spin_unlock(&parent->d_lock); return dentry; @@ -1692,7 +1590,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) { if (unlikely(IS_AUTOMOUNT(inode))) dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - hlist_add_head(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); @@ -1717,7 +1615,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1756,7 +1654,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return NULL; } - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or @@ -1782,7 +1680,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1825,7 +1723,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (hlist_empty(&inode->i_dentry)) return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); __dget(alias); return alias; } @@ -1899,7 +1797,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - hlist_add_head(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); @@ -2342,7 +2240,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent) struct dentry *child; spin_lock(&dparent->d_lock); - list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dparent->d_subdirs, d_child) { if (dentry == child) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); __dget_dlock(dentry); @@ -2589,8 +2487,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) /* Unhash the target: dput() will then get rid of it */ __d_drop(target); - list_del(&dentry->d_u.d_child); - list_del(&target->d_u.d_child); + list_del(&dentry->d_child); + list_del(&target->d_child); /* Switch the names.. */ switch_names(dentry, target); @@ -2600,15 +2498,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) if (IS_ROOT(dentry)) { dentry->d_parent = target->d_parent; target->d_parent = target; - INIT_LIST_HEAD(&target->d_u.d_child); + INIT_LIST_HEAD(&target->d_child); } else { swap(dentry->d_parent, target->d_parent); /* And add them back to the (new) parent lists */ - list_add(&target->d_u.d_child, &target->d_parent->d_subdirs); + list_add(&target->d_child, &target->d_parent->d_subdirs); } - list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); + list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); @@ -2715,9 +2613,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) swap(dentry->d_name.hash, anon->d_name.hash); dentry->d_parent = dentry; - list_del_init(&dentry->d_u.d_child); + list_del_init(&dentry->d_child); anon->d_parent = dparent; - list_move(&anon->d_u.d_child, &dparent->d_subdirs); + list_move(&anon->d_child, &dparent->d_subdirs); write_seqcount_end(&dentry->d_seq); write_seqcount_end(&anon->d_seq); @@ -2840,6 +2738,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) * the beginning of the name. The sequence number check at the caller will * retry it again when a d_move() does happen. So any garbage in the buffer * due to mismatched pointer and length will be discarded. + * + * Data dependency barrier is needed to make sure that we see that terminating + * NUL. Alpha strikes again, film at 11... */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { @@ -2847,6 +2748,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; + smp_read_barrier_depends(); + *buflen -= dlen + 1; if (*buflen < 0) return -ENAMETOOLONG; @@ -3340,7 +3243,7 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode) { inode_dec_link_count(inode); BUG_ON(dentry->d_name.name != dentry->d_iname || - !hlist_unhashed(&dentry->d_alias) || + !hlist_unhashed(&dentry->d_u.d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c7c83ff..f3784dd 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -533,7 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child, *next, *parent; + struct dentry *child, *parent; if (IS_ERR_OR_NULL(dentry)) return; @@ -545,31 +545,49 @@ void debugfs_remove_recursive(struct dentry *dentry) parent = dentry; down: mutex_lock(&parent->d_inode->i_mutex); - list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + loop: + /* + * The parent->d_subdirs is protected by the d_lock. Outside that + * lock, the child can be unlinked and set to be freed which can + * use the d_child as the rcu head and corrupt this list. + */ + spin_lock(&parent->d_lock); + list_for_each_entry(child, &parent->d_subdirs, d_child) { if (!debugfs_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { + spin_unlock(&parent->d_lock); mutex_unlock(&parent->d_inode->i_mutex); parent = child; goto down; } - up: + + spin_unlock(&parent->d_lock); + if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + + /* + * The parent->d_lock protects agaist child from unlinking + * from d_subdirs. When releasing the parent->d_lock we can + * no longer trust that the next pointer is valid. + * Restart the loop. We'll skip this one with the + * debugfs_positive() check. + */ + goto loop; } + spin_unlock(&parent->d_lock); mutex_unlock(&parent->d_inode->i_mutex); child = parent; parent = parent->d_parent; mutex_lock(&parent->d_inode->i_mutex); - if (child != dentry) { - next = list_entry(child->d_u.d_child.next, struct dentry, - d_u.d_child); - goto up; - } + if (child != dentry) + /* go up */ + goto loop; if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 88556dc..d5abafd 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -706,9 +706,7 @@ static int lkb_idr_is_local(int id, void *p, void *data) { struct dlm_lkb *lkb = p; - if (!lkb->lkb_nodeid) - return 1; - return 0; + return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV; } static int lkb_idr_is_any(int id, void *p, void *data) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index a5e34dd..1381d3f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, int nodeid = sn_send_failed->ssf_info.sinfo_ppid; log_print("Retry sending %d bytes to node id %d", len, nodeid); + + if (!nodeid) { + log_print("Shouldn't resend data via listening connection."); + return; + } con = nodeid2con(nodeid, 0); if (!con) { diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 000eae2..bf926f7 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, break; case 2: dst[dst_byte_offset++] |= (src_byte); - dst[dst_byte_offset] = 0; current_bit_offset = 0; break; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 992cf95..f3fd66a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -191,23 +191,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file) { int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; - mount_crypt_stat = &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; - if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR) - || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC) - || (file->f_flags & O_APPEND))) { - printk(KERN_WARNING "Mount has encrypted view enabled; " - "files may only be read\n"); - rc = -EPERM; - goto out; - } /* Released in ecryptfs_release or end of function if failure */ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 67e9b63..69b488c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1051,7 +1051,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); - if (!rc) + if (!rc && dentry->d_inode) fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index eb1c597..539a399 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags { struct super_block *s; struct ecryptfs_sb_info *sbi; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct ecryptfs_dentry_info *root_info; const char *err = "Getting sb failed"; struct inode *inode; @@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags err = "Error parsing options"; goto out; } + mount_crypt_stat = &sbi->mount_crypt_stat; s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { @@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /** * Set the POSIX ACL flag based on whether they're enabled in the lower - * mount. Force a read-only eCryptfs mount if the lower mount is ro. - * Allow a ro eCryptfs mount even when the lower mount is rw. + * mount. */ s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + + /** + * Force a read-only eCryptfs mount when: + * 1) The lower mount is ro + * 2) The ecryptfs_encrypted_view mount option is specified + */ + if (path.dentry->d_sb->s_flags & MS_RDONLY || + mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + s->s_flags |= MS_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9691a6e..5ff7682 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -910,7 +910,7 @@ static const struct file_operations eventpoll_fops = { void eventpoll_release_file(struct file *file) { struct eventpoll *ep; - struct epitem *epi; + struct epitem *epi, *next; /* * We don't want to get "file->f_lock" because it is not @@ -926,7 +926,7 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { + list_for_each_entry_safe(epi, next, &file->f_ep_links, fllink) { ep = epi->ep; mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); @@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) + if (ep_op_has_event(op) && (epds.events & EPOLLWAKEUP) && + !capable(CAP_BLOCK_SUSPEND)) epds.events &= ~EPOLLWAKEUP; /* @@ -26,6 +26,7 @@ #include <linux/file.h> #include <linux/fdtable.h> #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> @@ -657,10 +658,10 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP - /* Limit stack size to 1GB */ + /* Limit stack size */ stack_base = rlimit_max(RLIMIT_STACK); - if (stack_base > (1 << 30)) - stack_base = 1 << 30; + if (stack_base > STACK_SIZE_MAX) + stack_base = STACK_SIZE_MAX; /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) @@ -818,7 +819,7 @@ EXPORT_SYMBOL(read_code); static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; - struct mm_struct * old_mm, *active_mm; + struct mm_struct *old_mm, *active_mm; /* Notify parent that we're no longer interested in the old VM */ tsk = current; @@ -845,6 +846,8 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm = mm; tsk->active_mm = mm; activate_mm(active_mm, mm); + tsk->mm->vmacache_seqnum = 0; + vmacache_flush(tsk); preempt_enable_rt(); task_unlock(tsk); arch_pick_mmap_layout(mm); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c43fe9b..b75f174 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c260de6..8a33764 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode, int count = 0; ext2_fsblk_t first_block = 0; + BUG_ON(maxblocks == 0); + depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 1c33128..e98171a 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, int rc; memset(&tmp, 0, sizeof(struct buffer_head)); + tmp.b_size = 1 << inode->i_blkbits; rc = ext2_get_block(inode, pgoff, &tmp, create); *result = tmp.b_blocknr; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index c50c761..03fd6bc 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1354,13 +1354,6 @@ set_qf_format: "not specified."); return 0; } - } else { - if (sbi->s_jquota_fmt) { - ext3_msg(sb, KERN_ERR, "error: journaled quota format " - "specified with no journaling " - "enabled."); - return 0; - } } #endif return 1; diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index 3285aa5..b610779 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, __u32 provided, calculated; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); @@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); @@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct ext4_sb_info *sbi = EXT4_SB(sb); int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); @@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group, __u32 csum; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a9d2bf9..29c4e30 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2093,6 +2093,7 @@ int do_journal_get_write_access(handle_t *handle, #define CONVERT_INLINE_DATA 2 extern struct inode *ext4_iget(struct super_block *, unsigned long); +extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, @@ -2323,10 +2324,18 @@ extern int ext4_register_li_request(struct super_block *sb, static inline int ext4_has_group_desc_csum(struct super_block *sb) { return EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_GDT_CSUM | - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); + EXT4_FEATURE_RO_COMPAT_GDT_CSUM) || + (EXT4_SB(sb)->s_chksum_driver != NULL); } +static inline int ext4_has_metadata_csum(struct super_block *sb) +{ + WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + !EXT4_SB(sb)->s_chksum_driver); + + return (EXT4_SB(sb)->s_chksum_driver != NULL); +} static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | @@ -2445,23 +2454,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } -/* - * Update i_disksize after writeback has been started. Races with truncate - * are avoided by checking i_size under i_data_sem. - */ -static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize) -{ - loff_t i_size; - - down_write(&EXT4_I(inode)->i_data_sem); - i_size = i_size_read(inode); - if (newsize > i_size) - newsize = i_size; - if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; - up_write(&EXT4_I(inode)->i_data_sem); -} - struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; @@ -2766,7 +2758,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc); + struct writeback_control *wbc, + bool keep_towrite); /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8dd9659..33a6765 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -74,8 +74,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode, { struct ext4_extent_tail *et; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; et = find_ext4_extent_tail(eh); @@ -89,8 +88,7 @@ static void ext4_extent_block_csum_set(struct inode *inode, { struct ext4_extent_tail *et; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; et = find_ext4_extent_tail(eh); diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 3981ff7..171b9fa 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -962,10 +962,10 @@ retry: continue; } - if (ei->i_es_lru_nr == 0 || ei == locked_ei) + if (ei->i_es_lru_nr == 0 || ei == locked_ei || + !write_trylock(&ei->i_es_lock)) continue; - write_lock(&ei->i_es_lock); shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); if (ei->i_es_lru_nr == 0) list_del_init(&ei->i_es_lru); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3da2194..7b31601 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, size_t count = iov_length(iov, nr_segs); loff_t final_size = pos + count; - if (pos >= inode->i_size) + if (pos >= i_size_read(inode)) return 0; if ((pos & blockmask) || (final_size & blockmask)) @@ -152,7 +152,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0 && ret > 0) ret = err; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 137193f..fbc6df7 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -851,12 +851,23 @@ got: goto out; } + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, group_desc_bh); + if (err) { + ext4_std_error(sb, err); + goto out; + } + /* We may have to initialize the block bitmap if it isn't already */ if (ext4_has_group_desc_csum(sb) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); + if (!block_bitmap_bh) { + err = -EIO; + goto out; + } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); err = ext4_journal_get_write_access(handle, block_bitmap_bh); if (err) { @@ -887,13 +898,6 @@ got: } } - BUFFER_TRACE(group_desc_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, group_desc_bh); - if (err) { - ext4_std_error(sb, err); - goto out; - } - /* Update the relevant bg descriptor fields */ if (ext4_has_group_desc_csum(sb)) { int free; @@ -988,8 +992,7 @@ got: spin_unlock(&sbi->s_next_gen_lock); /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { __u32 csum; __le32 inum = cpu_to_le32(inode->i_ino); __le32 gen = cpu_to_le32(inode->i_generation); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 594009f..c30cbe2 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -389,7 +389,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, return 0; failed: for (; i >= 0; i--) { - if (i != indirect_blks && branch[i].bh) + /* + * We want to ext4_forget() only freshly allocated indirect + * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and + * buffer at branch[0].bh is indirect block / inode already + * existing before ext4_alloc_branch() was called. + */ + if (i > 0 && i != indirect_blks && branch[i].bh) ext4_forget(handle, 1, inode, branch[i].bh, branch[i].bh->b_blocknr); ext4_free_blocks(handle, inode, NULL, new_blocks[i], @@ -1312,16 +1318,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, blk = *i_data; if (level > 0) { ext4_lblk_t first2; + ext4_lblk_t count2; + bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); if (!bh) { EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), "Read failure"); return -EIO; } - first2 = (first > offset) ? first - offset : 0; + if (first > offset) { + first2 = first - offset; + count2 = count; + } else { + first2 = 0; + count2 = count - (offset - first); + } ret = free_hole_blocks(handle, inode, bh, (__le32 *)bh->b_data, level - 1, - first2, count - offset, + first2, count2, inode->i_sb->s_blocksize >> 2); if (ret) { brelse(bh); @@ -1331,8 +1345,8 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode, if (level == 0 || (bh && all_zeroes((__le32 *)bh->b_data, (__le32 *)bh->b_data + addr_per_block))) { - ext4_free_data(handle, inode, parent_bh, &blk, &blk+1); - *i_data = 0; + ext4_free_data(handle, inode, parent_bh, + i_data, i_data + 1); } brelse(bh); bh = NULL; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 46b3668..b7e4910 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1126,8 +1126,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, inline_size - EXT4_INLINE_DOTDOT_SIZE); - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); inode->i_size = inode->i_sb->s_blocksize; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f173ef1..ba68d21 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + !ext4_has_metadata_csum(inode->i_sb)) return 1; provided = le16_to_cpu(raw->i_checksum_lo); @@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != cpu_to_le32(EXT4_OS_LINUX) || - !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + !ext4_has_metadata_csum(inode->i_sb)) return; csum = ext4_inode_csum(inode, raw, ei); @@ -515,6 +513,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, "logical block %lu\n", inode->i_ino, flags, map->m_len, (unsigned long) map->m_lblk); + /* We can handle the block number less than EXT_MAX_BLOCKS */ + if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) + return -EIO; + /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { ext4_es_lru_add(inode); @@ -1831,6 +1833,7 @@ static int ext4_writepage(struct page *page, struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; struct ext4_io_submit io_submit; + bool keep_towrite = false; trace_ext4_writepage(page); size = i_size_read(inode); @@ -1861,6 +1864,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return 0; } + keep_towrite = true; } if (PageChecked(page) && ext4_should_journal_data(inode)) @@ -1877,7 +1881,7 @@ static int ext4_writepage(struct page *page, unlock_page(page); return -ENOMEM; } - ret = ext4_bio_write_page(&io_submit, page, len, wbc); + ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); ext4_io_submit(&io_submit); /* Drop io_end reference we got from init */ ext4_put_io_end_defer(io_submit.io_end); @@ -1896,7 +1900,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) else len = PAGE_CACHE_SIZE; clear_page_dirty_for_io(page); - err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); + err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); if (!err) mpd->wbc->nr_to_write--; mpd->first_page++; @@ -2186,6 +2190,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, struct ext4_map_blocks *map = &mpd->map; int err; loff_t disksize; + int progress = 0; mpd->io_submit.io_end->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; @@ -2202,8 +2207,11 @@ static int mpage_map_and_submit_extent(handle_t *handle, * is non-zero, a commit should free up blocks. */ if ((err == -ENOMEM) || - (err == -ENOSPC && ext4_count_free_clusters(sb))) + (err == -ENOSPC && ext4_count_free_clusters(sb))) { + if (progress) + goto update_disksize; return err; + } ext4_msg(sb, KERN_CRIT, "Delayed block allocation failed for " "inode %lu at logical offset %llu with" @@ -2220,22 +2228,34 @@ static int mpage_map_and_submit_extent(handle_t *handle, *give_up_on_write = true; return err; } + progress = 1; /* * Update buffer state, submit mapped pages, and get us new * extent to map */ err = mpage_map_and_submit_buffers(mpd); if (err < 0) - return err; + goto update_disksize; } while (map->m_len); - /* Update on-disk size after IO is submitted */ +update_disksize: + /* + * Update on-disk size after IO is submitted. Races with + * truncate are avoided by checking i_size under i_data_sem. + */ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; if (disksize > EXT4_I(inode)->i_disksize) { int err2; - - ext4_wb_update_i_disksize(inode, disksize); + loff_t i_size; + + down_write(&EXT4_I(inode)->i_data_sem); + i_size = i_size_read(inode); + if (disksize > i_size) + disksize = i_size; + if (disksize > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = disksize; err2 = ext4_mark_inode_dirty(handle, inode); + up_write(&EXT4_I(inode)->i_data_sem); if (err2) ext4_error(inode->i_sb, "Failed to mark inode %lu dirty", @@ -2607,6 +2627,20 @@ static int ext4_nonda_switch(struct super_block *sb) return 0; } +/* We always reserve for an inode update; the superblock could be there too */ +static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) +{ + if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_LARGE_FILE))) + return 1; + + if (pos + len <= 0x7fffffffULL) + return 1; + + /* We might need to update the superblock to set LARGE_FILE */ + return 2; +} + static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -2657,7 +2691,8 @@ retry_grab: * of file which has an already mapped buffer. */ retry_journal: - handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); + handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, + ext4_da_write_credits(inode, pos, len)); if (IS_ERR(handle)) { page_cache_release(page); return PTR_ERR(handle); @@ -3935,8 +3970,8 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) new_fl |= S_DIRSYNC; - set_mask_bits(&inode->i_flags, - S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); + inode_set_flags(inode, new_fl, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ @@ -4040,8 +4075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_extra_isize = 0; /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 csum; __le32 inum = cpu_to_le32(inode->i_ino); @@ -4229,6 +4263,13 @@ bad_inode: return ERR_PTR(ret); } +struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino) +{ + if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) + return ERR_PTR(-EIO); + return ext4_iget(sb, ino); +} + static int ext4_inode_blocks_set(handle_t *handle, struct ext4_inode *raw_inode, struct ext4_inode_info *ei) @@ -4633,8 +4674,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_orphan_del(NULL, inode); goto err_out; } - } else + } else { + loff_t oldsize = inode->i_size; + i_size_write(inode, attr->ia_size); + pagecache_isize_extended(inode, oldsize, inode->i_size); + } /* * Blocks are going to be removed from the inode. Wait diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index d011b69..54d4911 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -347,8 +347,7 @@ flags_out: if (!inode_owner_or_capable(inode)) return -EPERM; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(inode->i_sb)) { ext4_warning(sb, "Setting inode version is not " "supported with metadata_csum enabled."); return -ENOTTY; @@ -548,9 +547,17 @@ group_add_out: } case EXT4_IOC_SWAP_BOOT: + { + int err; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - return swap_inode_boot_loader(sb, inode); + err = mnt_want_write_file(filp); + if (err) + return err; + err = swap_inode_boot_loader(sb, inode); + mnt_drop_write_file(filp); + return err; + } case EXT4_IOC_RESIZE_FS: { ext4_fsblk_t n_blocks_count; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 04a5c75..7620133 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -751,8 +751,8 @@ void ext4_mb_generate_buddy(struct super_block *sb, if (free != grp->bb_free) { ext4_grp_locked_error(sb, group, 0, 0, - "%u clusters in bitmap, %u in gd; " - "block bitmap corrupt.", + "block bitmap and bg descriptor " + "inconsistent: %u vs %u free clusters", free, grp->bb_free); /* * If we intend to continue, we consider group descriptor @@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy page to page cache. + * The call to ext4_mb_get_buddy_page_lock will mark the + * page accessed. */ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { @@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); if (e4b.bd_buddy_page == NULL) { /* @@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); err: ext4_mb_put_buddy_page_lock(&e4b); return ret; @@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, /* we could use find_or_create_page(), but it locks page * what we'd like to avoid in fast path ... */ - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) /* @@ -1172,15 +1172,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_bitmap_page = page; e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); block++; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) page_cache_release(page); @@ -1201,9 +1202,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_buddy_page = page; e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); @@ -1398,6 +1400,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, int last = first + count - 1; struct super_block *sb = e4b->bd_sb; + if (WARN_ON(count == 0)) + return; BUG_ON(last >= (sb->s_blocksize << 3)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); /* Don't bother if the block group is corrupt. */ @@ -3135,7 +3139,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); + BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ @@ -3196,8 +3200,30 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; + struct ext4_buddy e4b; + int err; - if (pa && pa->pa_type == MB_INODE_PA) + if (pa == NULL) { + if (ac->ac_f_ex.fe_len == 0) + return; + err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); + if (err) { + /* + * This should never happen since we pin the + * pages in the ext4_allocation_context so + * ext4_mb_load_buddy() should never fail. + */ + WARN(1, "mb_load_buddy failed (%d)", err); + return; + } + ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, + ac->ac_f_ex.fe_len); + ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + ext4_mb_unload_buddy(&e4b); + return; + } + if (pa->pa_type == MB_INODE_PA) pa->pa_free += ac->ac_b_ex.fe_len; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 214461e..b69ca47 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); @@ -29,8 +28,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5a0408d..7e6954c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -123,8 +123,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, "directory leaf block found instead of index block"); return ERR_PTR(-EIO); } - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) || + if (!ext4_has_metadata_csum(inode->i_sb) || buffer_verified(bh)) return bh; @@ -339,8 +338,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent) { struct ext4_dir_entry_tail *t; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; t = get_dirent_tail(inode, dirent); @@ -361,8 +359,7 @@ static void ext4_dirent_csum_set(struct inode *inode, { struct ext4_dir_entry_tail *t; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; t = get_dirent_tail(inode, dirent); @@ -437,8 +434,7 @@ static int ext4_dx_csum_verify(struct inode *inode, struct dx_tail *t; int count_offset, limit, count; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return 1; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -467,8 +463,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent) struct dx_tail *t; int count_offset, limit, count; - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; c = get_dx_countlimit(inode, dirent, &count_offset); @@ -556,8 +551,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -566,8 +560,7 @@ static inline unsigned dx_node_limit(struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) entry_space -= sizeof(struct dx_tail); return entry_space / sizeof(struct dx_entry); } @@ -1430,7 +1423,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi dentry->d_name.name); return ERR_PTR(-EIO); } - inode = ext4_iget(dir->i_sb, ino); + inode = ext4_iget_normal(dir->i_sb, ino); if (inode == ERR_PTR(-ESTALE)) { EXT4_ERROR_INODE(dir, "deleted inode referenced: %u", @@ -1461,7 +1454,7 @@ struct dentry *ext4_get_parent(struct dentry *child) return ERR_PTR(-EIO); } - return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); + return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino)); } /* @@ -1535,8 +1528,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, int csum_size = 0; int err = 0, i; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); bh2 = ext4_append(handle, dir, &newblock); @@ -1705,8 +1697,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, int csum_size = 0; int err; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (!de) { @@ -1773,8 +1764,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; int csum_size = 0; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); blocksize = dir->i_sb->s_blocksize; @@ -1890,8 +1880,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, ext4_lblk_t block, blocks; int csum_size = 0; - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(inode->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; @@ -2153,8 +2142,7 @@ static int ext4_delete_entry(handle_t *handle, return err; } - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); BUFFER_TRACE(bh, "get_write_access"); @@ -2373,8 +2361,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, int csum_size = 0; int err; - if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(dir->i_sb)) csum_size = sizeof(struct ext4_dir_entry_tail); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index d7d0c7b..f1ecd13 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -307,13 +307,14 @@ static void ext4_end_bio(struct bio *bio, int error) if (error) { struct inode *inode = io_end->inode; - ext4_warning(inode->i_sb, "I/O error writing to inode %lu " + ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " "(offset %llu size %ld starting block %llu)", - inode->i_ino, + error, inode->i_ino, (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); + mapping_set_error(inode->i_mapping, error); } if (io_end->flag & EXT4_IO_END_UNWRITTEN) { @@ -399,7 +400,8 @@ submit_and_retry: int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, int len, - struct writeback_control *wbc) + struct writeback_control *wbc, + bool keep_towrite) { struct inode *inode = page->mapping->host; unsigned block_start, blocksize; @@ -412,10 +414,24 @@ int ext4_bio_write_page(struct ext4_io_submit *io, BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - set_page_writeback(page); + if (keep_towrite) + set_page_writeback_keepwrite(page); + else + set_page_writeback(page); ClearPageError(page); /* + * Comments copied from block_write_full_page_endio: + * + * The page straddles i_size. It must be zeroed out on each and every + * writepage invocation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + if (len < PAGE_CACHE_SIZE) + zero_user_segment(page, len, PAGE_CACHE_SIZE); + /* * In the first loop we prepare and mark buffers to submit. We have to * mark all buffers in the page before submitting so that * end_page_writeback() cannot be called from ext4_bio_end_io() when IO @@ -426,19 +442,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { block_start = bh_offset(bh); if (block_start >= len) { - /* - * Comments copied from block_write_full_page_endio: - * - * The page straddles i_size. It must be zeroed out on - * each and every writepage invocation because it may - * be mmapped. "A file is mapped in multiples of the - * page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when - * mapped, and writes to that region are not written - * out to the file." - */ - zero_user_segment(page, block_start, - block_start + blocksize); clear_buffer_dirty(bh); set_buffer_uptodate(bh); continue; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f3b84cd..2400ad1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1071,7 +1071,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, break; if (meta_bg == 0) - backup_block = group * bpg + blk_off; + backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else backup_block = (ext4_group_first_block_no(sb, group) + ext4_bg_has_super(sb, group)); @@ -1200,8 +1200,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb, { struct buffer_head *bh; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 0; bh = ext4_get_bitmap(sb, group_data->inode_bitmap); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d9711dc..6795499 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -140,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb, int ext4_superblock_csum_verify(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return es->s_checksum == ext4_superblock_csum(sb, es); @@ -151,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; es->s_checksum = ext4_superblock_csum(sb, es); @@ -977,7 +975,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = ext4_iget(sb, ino); + inode = ext4_iget_normal(sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); if (generation && inode->i_generation != generation) { @@ -1500,8 +1498,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, arg = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * arg; } else if (token == Opt_max_batch_time) { - if (arg == 0) - arg = EXT4_DEF_MAX_BATCH_TIME; sbi->s_max_batch_time = arg; } else if (token == Opt_min_batch_time) { sbi->s_min_batch_time = arg; @@ -1689,13 +1685,6 @@ static int parse_options(char *options, struct super_block *sb, "not specified"); return 0; } - } else { - if (sbi->s_jquota_fmt) { - ext4_msg(sb, KERN_ERR, "journaled quota format " - "specified with no journaling " - "enabled"); - return 0; - } } #endif if (test_opt(sb, DIOREAD_NOLOCK)) { @@ -1993,8 +1982,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { + if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ __le16 save_csum; __u32 csum32; @@ -2012,6 +2000,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, } /* old crc16 code */ + if (!(sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + return 0; + offset = offsetof(struct ext4_group_desc, bg_checksum); crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); @@ -2762,10 +2754,11 @@ static void print_daily_error_info(unsigned long arg) es = sbi->s_es; if (es->s_error_count) - ext4_msg(sb, KERN_NOTICE, "error count: %u", + /* fsck newer than v1.41.13 is needed to clean this condition. */ + ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u", le32_to_cpu(es->s_error_count)); if (es->s_first_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", + printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d", sb->s_id, le32_to_cpu(es->s_first_error_time), (int) sizeof(es->s_first_error_func), es->s_first_error_func, @@ -2779,7 +2772,7 @@ static void print_daily_error_info(unsigned long arg) printk("\n"); } if (es->s_last_error_time) { - printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", + printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", sb->s_id, le32_to_cpu(es->s_last_error_time), (int) sizeof(es->s_last_error_func), es->s_last_error_func, @@ -3140,11 +3133,10 @@ static int set_journal_csum_feature_set(struct super_block *sb) int compat, incompat; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - /* journal checksum v2 */ + if (ext4_has_metadata_csum(sb)) { + /* journal checksum v3 */ compat = 0; - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; } else { /* journal checksum v1 */ compat = JBD2_FEATURE_COMPAT_CHECKSUM; @@ -3166,6 +3158,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) jbd2_journal_clear_features(sbi->s_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | + JBD2_FEATURE_INCOMPAT_CSUM_V3 | JBD2_FEATURE_INCOMPAT_CSUM_V2); } @@ -3447,8 +3440,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); @@ -3466,6 +3458,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif + /* don't forget to enable journal_csum when metadata_csum is enabled. */ + if (ext4_has_metadata_csum(sb)) + set_opt(sb, JOURNAL_CHECKSUM); + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) set_opt(sb, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1423c48..a5d2f1b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -141,8 +141,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode, sector_t block_nr, struct ext4_xattr_header *hdr) { - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + if (ext4_has_metadata_csum(inode->i_sb) && (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) return 0; return 1; @@ -152,8 +151,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode, sector_t block_nr, struct ext4_xattr_header *hdr) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(inode->i_sb)) return; hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); @@ -189,14 +187,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) } static int -ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) +ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, + void *value_start) { - while (!IS_LAST_ENTRY(entry)) { - struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); + struct ext4_xattr_entry *e = entry; + + while (!IS_LAST_ENTRY(e)) { + struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); if ((void *)next >= end) return -EIO; - entry = next; + e = next; + } + + while (!IS_LAST_ENTRY(entry)) { + if (entry->e_value_size != 0 && + (value_start + le16_to_cpu(entry->e_value_offs) < + (void *)e + sizeof(__u32) || + value_start + le16_to_cpu(entry->e_value_offs) + + le32_to_cpu(entry->e_value_size) > end)) + return -EIO; + entry = EXT4_XATTR_NEXT(entry); } + return 0; } @@ -213,7 +225,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) return -EIO; if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) return -EIO; - error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); + error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, + bh->b_data); if (!error) set_buffer_verified(bh); return error; @@ -329,7 +342,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(entry, end); + error = ext4_xattr_check_names(entry, end, entry); if (error) goto cleanup; error = ext4_xattr_find_entry(&entry, name_index, name, @@ -457,7 +470,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(IFIRST(header), end); + error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); if (error) goto cleanup; error = ext4_xattr_list_entries(dentry, IFIRST(header), @@ -517,8 +530,8 @@ static void ext4_xattr_update_super_block(handle_t *handle, } /* - * Release the xattr block BH: If the reference count is > 1, decrement - * it; otherwise free the block. + * Release the xattr block BH: If the reference count is > 1, decrement it; + * otherwise free the block. */ static void ext4_xattr_release_block(handle_t *handle, struct inode *inode, @@ -538,16 +551,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (ce) mb_cache_entry_free(ce); get_bh(bh); + unlock_buffer(bh); ext4_free_blocks(handle, inode, bh, 0, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); - unlock_buffer(bh); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); + /* + * Beware of this ugliness: Releasing of xattr block references + * from different inodes can race and so we have to protect + * from a race where someone else frees the block (and releases + * its journal_head) before we are done dirtying the buffer. In + * nojournal mode this race is harmless and we actually cannot + * call ext4_handle_dirty_xattr_block() with locked buffer as + * that function can call sync_dirty_buffer() so for that case + * we handle the dirtying after unlocking the buffer. + */ + if (ext4_handle_valid(handle)) + error = ext4_handle_dirty_xattr_block(handle, inode, + bh); unlock_buffer(bh); - error = ext4_handle_dirty_xattr_block(handle, inode, bh); + if (!ext4_handle_valid(handle)) + error = ext4_handle_dirty_xattr_block(handle, inode, + bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); @@ -957,7 +985,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { - error = ext4_xattr_check_names(IFIRST(header), is->s.end); + error = ext4_xattr_check_names(IFIRST(header), is->s.end, + IFIRST(header)); if (error) return error; /* Find the named attribute. */ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bb31220..15a29af 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -70,7 +70,6 @@ repeat: goto repeat; } out: - mark_page_accessed(page); return page; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 51ef278..d0335bd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -970,7 +970,6 @@ repeat: } got_it: BUG_ON(nid != nid_of_node(page)); - mark_page_accessed(page); return page; } @@ -1026,7 +1025,6 @@ page_hit: f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - mark_page_accessed(page); return page; } diff --git a/fs/file_table.c b/fs/file_table.c index e900ca5..05e2ac1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -211,10 +211,10 @@ static void drop_file_write_access(struct file *file) struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - put_write_access(inode); - if (special_file(inode->i_mode)) return; + + put_write_access(inode); if (file_check_writeable(file) != 0) return; __mnt_drop_write(mnt); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5bbec31..a1b2062 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -475,12 +475,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * write_inode() */ spin_lock(&inode->i_lock); - /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - inode->i_state &= ~I_DIRTY_PAGES; + dirty = inode->i_state & I_DIRTY; - inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); + inode->i_state &= ~I_DIRTY; + + /* + * Paired with smp_mb() in __mark_inode_dirty(). This allows + * __mark_inode_dirty() to test i_state without grabbing i_lock - + * either they see the I_DIRTY bits cleared or we see the dirtied + * inode. + * + * I_DIRTY_PAGES is always cleared together above even if @mapping + * still has dirty pages. The flag is reinstated after smp_mb() if + * necessary. This guarantees that either __mark_inode_dirty() + * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY. + */ + smp_mb(); + + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + inode->i_state |= I_DIRTY_PAGES; + spin_unlock(&inode->i_lock); + /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { int err = write_inode(inode, wbc); @@ -1144,12 +1160,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) } /* - * make sure that changes are seen by all cpus before we test i_state - * -- mikulas + * Paired with smp_mb() in __writeback_single_inode() for the + * following lockless i_state test. See there for details. */ smp_mb(); - /* avoid the locking if we can */ if ((inode->i_state & flags) == flags) return; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fa8cb4b..fc8e499 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1613,7 +1613,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - release_pages(req->pages, req->num_pages, 0); + release_pages(req->pages, req->num_pages, false); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7989f2..936d404 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -188,7 +188,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) goto invalid; - else if (fuse_dentry_time(entry) < get_jiffies_64()) { + else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || + (flags & LOOKUP_REVAL)) { int err; struct fuse_entry_out outarg; struct fuse_req *req; @@ -945,7 +946,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, int err; bool r; - if (fi->i_time < get_jiffies_64()) { + if (time_before64(fi->i_time, get_jiffies_64())) { r = true; err = fuse_do_getattr(inode, stat, file); } else { @@ -1131,7 +1132,7 @@ static int fuse_permission(struct inode *inode, int mask) ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { struct fuse_inode *fi = get_fuse_inode(inode); - if (fi->i_time < get_jiffies_64()) { + if (time_before64(fi->i_time, get_jiffies_64())) { refreshed = true; err = fuse_perm_getattr(inode, mask); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4598345..d08c108 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -985,13 +985,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - pagefault_disable(); tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); - pagefault_enable(); flush_dcache_page(page); - mark_page_accessed(page); - if (!tmp) { unlock_page(page); page_cache_release(page); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index a8ce6da..4937d4b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -461,6 +461,17 @@ static const match_table_t tokens = { {OPT_ERR, NULL} }; +static int fuse_match_uint(substring_t *s, unsigned int *res) +{ + int err = -ENOMEM; + char *buf = match_strdup(s); + if (buf) { + err = kstrtouint(buf, 10, res); + kfree(buf); + } + return err; +} + static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) { char *p; @@ -471,6 +482,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) while ((p = strsep(&opt, ",")) != NULL) { int token; int value; + unsigned uv; substring_t args[MAX_OPT_ARGS]; if (!*p) continue; @@ -494,18 +506,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) break; case OPT_USER_ID: - if (match_int(&args[0], &value)) + if (fuse_match_uint(&args[0], &uv)) return 0; - d->user_id = make_kuid(current_user_ns(), value); + d->user_id = make_kuid(current_user_ns(), uv); if (!uid_valid(d->user_id)) return 0; d->user_id_present = 1; break; case OPT_GROUP_ID: - if (match_int(&args[0], &value)) + if (fuse_match_uint(&args[0], &uv)) return 0; - d->group_id = make_kgid(current_user_ns(), value); + d->group_id = make_kgid(current_user_ns(), uv); if (!gid_valid(d->group_id)) return 0; d->group_id_present = 1; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 1253c20..f3aee0b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -517,7 +517,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); - mark_page_accessed(page); page_cache_release(page); copied += amt; index++; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 630db36..e803e3c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -583,6 +583,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = PTR_ERR(inode); if (!IS_ERR(inode)) { d = d_splice_alias(inode, dentry); + error = PTR_ERR(d); + if (IS_ERR(d)) + goto fail_gunlock; error = 0; if (file) { if (S_ISREG(inode->i_mode)) { @@ -777,6 +780,11 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, } d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + iput(inode); + gfs2_glock_dq_uninit(&gh); + return d; + } if (file && S_ISREG(inode->i_mode)) error = finish_open(file, dentry, gfs2_open_common, opened); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 52f177b..89afe3a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -128,7 +128,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) yield(); } } else { - page = find_lock_page(mapping, index); + page = find_get_page_flags(mapping, index, + FGP_LOCK|FGP_ACCESSED); if (!page) return NULL; } @@ -145,7 +146,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); return bh; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 4a4fea0..6411218 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -212,13 +212,31 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) be32_to_cpu(entry.folder.id), DT_DIR)) break; } else if (type == HFSPLUS_FILE) { + u16 mode; + unsigned type = DT_UNKNOWN; + if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { pr_err("small file entry\n"); err = -EIO; goto out; } + + mode = be16_to_cpu(entry.file.permissions.mode); + if (S_ISREG(mode)) + type = DT_REG; + else if (S_ISLNK(mode)) + type = DT_LNK; + else if (S_ISFIFO(mode)) + type = DT_FIFO; + else if (S_ISCHR(mode)) + type = DT_CHR; + else if (S_ISBLK(mode)) + type = DT_BLK; + else if (S_ISSOCK(mode)) + type = DT_SOCK; + if (!dir_emit(ctx, strbuf, len, - be32_to_cpu(entry.file.id), DT_REG)) + be32_to_cpu(entry.file.id), type)) break; } else { pr_err("bad catalog entry type\n"); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fbb212f..f0f601c 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -498,11 +498,13 @@ int hfsplus_file_extend(struct inode *inode) goto insert_extent; } out: - mutex_unlock(&hip->extents_lock); if (!res) { hip->alloc_blocks += len; + mutex_unlock(&hip->extents_lock); hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); + return 0; } + mutex_unlock(&hip->extents_lock); return res; insert_extent: @@ -556,11 +558,13 @@ void hfsplus_file_truncate(struct inode *inode) blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> HFSPLUS_SB(sb)->alloc_blksz_shift; + + mutex_lock(&hip->extents_lock); + alloc_cnt = hip->alloc_blocks; if (blk_cnt == alloc_cnt) - goto out; + goto out_unlock; - mutex_lock(&hip->extents_lock); res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); if (res) { mutex_unlock(&hip->extents_lock); @@ -592,10 +596,10 @@ void hfsplus_file_truncate(struct inode *inode) hfs_brec_remove(&fd); } hfs_find_exit(&fd); - mutex_unlock(&hip->extents_lock); hip->alloc_blocks = blk_cnt; -out: +out_unlock: + mutex_unlock(&hip->extents_lock); hip->phys_size = inode->i_size; hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 968eab5..68537e8 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force) int token; if (!input) - return 0; + return 1; while ((p = strsep(&input, ",")) != NULL) { if (!*p) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d19b30a..a4a8ed5 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1017,6 +1017,11 @@ static int __init init_hugetlbfs_fs(void) int error; int i; + if (!hugepages_supported()) { + pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n"); + return -ENOTSUPP; + } + error = bdi_init(&hugetlbfs_backing_dev_info); if (error) return error; @@ -1808,14 +1808,18 @@ EXPORT_SYMBOL(inode_init_owner); * inode_owner_or_capable - check current task permissions to inode * @inode: inode being checked * - * Return true if current either has CAP_FOWNER to the inode, or - * owns the file. + * Return true if current either has CAP_FOWNER in a namespace with the + * inode owner uid mapped, or owns the file. */ bool inode_owner_or_capable(const struct inode *inode) { + struct user_namespace *ns; + if (uid_eq(current_fsuid(), inode->i_uid)) return true; - if (inode_capable(inode, CAP_FOWNER)) + + ns = current_user_ns(); + if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid)) return true; return false; } @@ -1867,3 +1871,34 @@ void inode_dio_done(struct inode *inode) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } EXPORT_SYMBOL(inode_dio_done); + +/* + * inode_set_flags - atomically set some inode flags + * + * Note: the caller should be holding i_mutex, or else be sure that + * they have exclusive access to the inode structure (i.e., while the + * inode is being instantiated). The reason for the cmpxchg() loop + * --- which wouldn't be necessary if all code paths which modify + * i_flags actually followed this rule, is that there is at least one + * code path which doesn't today --- for example, + * __generic_file_aio_write() calls file_remove_suid() without holding + * i_mutex --- so we use cmpxchg() out of an abundance of caution. + * + * In the long run, i_mutex is overkill, and we should probably look + * at using the i_lock spinlock to protect i_flags, and then make sure + * it is so documented in include/linux/fs.h and that all code follows + * the locking convention!! + */ +void inode_set_flags(struct inode *inode, unsigned int flags, + unsigned int mask) +{ + unsigned int old_flags, new_flags; + + WARN_ON_ONCE(flags & ~mask); + do { + old_flags = ACCESS_ONCE(inode->i_flags); + new_flags = (old_flags & ~mask) | flags; + } while (unlikely(cmpxchg(&inode->i_flags, old_flags, + new_flags) != old_flags)); +} +EXPORT_SYMBOL(inode_set_flags); diff --git a/fs/ioprio.c b/fs/ioprio.c index e50170c..31666c9 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -157,14 +157,16 @@ out: int ioprio_best(unsigned short aprio, unsigned short bprio) { - unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); - unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + unsigned short aclass; + unsigned short bclass; - if (aclass == IOPRIO_CLASS_NONE) - aclass = IOPRIO_CLASS_BE; - if (bclass == IOPRIO_CLASS_NONE) - bclass = IOPRIO_CLASS_BE; + if (!ioprio_valid(aprio)) + aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + if (!ioprio_valid(bprio)) + bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM); + aclass = IOPRIO_PRIO_CLASS(aprio); + bclass = IOPRIO_PRIO_CLASS(bprio); if (aclass == bclass) return min(aprio, bprio); if (aclass > bclass) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index e5d408a..2e2af97 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -61,7 +61,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static int isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *, int relocated); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -1258,7 +1258,7 @@ out_toomany: goto out; } -static int isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode, int relocated) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1403,7 +1403,7 @@ static int isofs_read_inode(struct inode *inode) */ if (!high_sierra) { - parse_rock_ridge_inode(de, inode); + parse_rock_ridge_inode(de, inode, relocated); /* if we want uid/gid set, override the rock ridge setting */ if (sbi->s_uid_set) inode->i_uid = sbi->s_uid; @@ -1482,9 +1482,10 @@ static int isofs_iget5_set(struct inode *ino, void *data) * offset that point to the underlying meta-data for the inode. The * code below is otherwise similar to the iget() code in * include/linux/fs.h */ -struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset) +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated) { unsigned long hashval; struct inode *inode; @@ -1506,7 +1507,7 @@ struct inode *isofs_iget(struct super_block *sb, return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - ret = isofs_read_inode(inode); + ret = isofs_read_inode(inode, relocated); if (ret < 0) { iget_failed(inode); inode = ERR_PTR(ret); diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 9916723..0ac4c1f 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -107,7 +107,7 @@ extern int iso_date(char *, int); struct inode; /* To make gcc happy */ -extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *); +extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated); extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *); extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *); @@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); -extern struct inode *isofs_iget(struct super_block *sb, - unsigned long block, - unsigned long offset); +struct inode *__isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset, + int relocated); + +static inline struct inode *isofs_iget(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 0); +} + +static inline struct inode *isofs_iget_reloc(struct super_block *sb, + unsigned long block, + unsigned long offset) +{ + return __isofs_iget(sb, block, offset, 1); +} /* Because the inode number is no longer relevant to finding the * underlying meta-data for an inode, we are free to choose a more diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index c0bf424..735d752 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -30,6 +30,7 @@ struct rock_state { int cont_size; int cont_extent; int cont_offset; + int cont_loops; struct inode *inode; }; @@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode) rs->inode = inode; } +/* Maximum number of Rock Ridge continuation entries */ +#define RR_MAX_CE_ENTRIES 32 + /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. @@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs) goto out; } ret = -EIO; + if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) + goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, @@ -288,12 +294,16 @@ eio: goto out; } +#define RR_REGARD_XA 1 +#define RR_RELOC_DE 2 + static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, - struct inode *inode, int regard_xa) + struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; + unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; @@ -305,7 +315,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de, init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); - if (regard_xa) { + if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) @@ -352,6 +362,9 @@ repeat: rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): + /* Invalid length of ER tag id? */ + if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) + goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { @@ -485,12 +498,22 @@ repeat: "relocated directory\n"); goto out; case SIG('C', 'L'): - ISOFS_I(inode)->i_first_extent = - isonum_733(rr->u.CL.location); - reloc = - isofs_iget(inode->i_sb, - ISOFS_I(inode)->i_first_extent, - 0); + if (flags & RR_RELOC_DE) { + printk(KERN_ERR + "ISOFS: Recursive directory relocation " + "is not supported\n"); + goto eio; + } + reloc_block = isonum_733(rr->u.CL.location); + if (reloc_block == ISOFS_I(inode)->i_iget5_block && + ISOFS_I(inode)->i_iget5_offset == 0) { + printk(KERN_ERR + "ISOFS: Directory relocation points to " + "itself\n"); + goto eio; + } + ISOFS_I(inode)->i_first_extent = reloc_block; + reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; @@ -637,9 +660,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) return rpnt; } -int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) +int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, + int relocated) { - int result = parse_rock_ridge_inode_internal(de, inode, 0); + int flags = relocated ? RR_RELOC_DE : 0; + int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes @@ -647,7 +672,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode) */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { - result = parse_rock_ridge_inode_internal(de, inode, 14); + result = parse_rock_ridge_inode_internal(de, inode, + flags | RR_REGARD_XA); } return result; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index cf2fc05..9181c2b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) struct commit_header *h; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; h = (struct commit_header *)(bh->b_data); @@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) return checksum; } -static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, +static void write_tag_block(journal_t *j, journal_block_tag_t *tag, unsigned long long block) { tag->t_blocknr = cpu_to_be32(block & (u32)~0); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT)) tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); } @@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j, struct jbd2_journal_block_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - @@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j, static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, struct buffer_head *bh, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; struct page *page = bh->b_page; __u8 *addr; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; seq = cpu_to_be32(sequence); @@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, bh->b_size); kunmap_atomic(addr); - /* We only have space to store the lower 16 bits of the crc32c. */ - tag->t_checksum = cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + tag3->t_checksum = cpu_to_be32(csum32); + else + tag->t_checksum = cpu_to_be16(csum32); } /* * jbd2_journal_commit_transaction @@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) LIST_HEAD(io_bufs); LIST_HEAD(log_bufs); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); /* @@ -692,7 +695,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag_flag |= JBD2_FLAG_SAME_UUID; tag = (journal_block_tag_t *) tagp; - write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); + write_tag_block(journal, tag, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be16(tag_flag); jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], commit_transaction->t_tid); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5203264..e72faaca 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); /* Checksumming functions */ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; @@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum == jbd2_superblock_csum(j, sb); @@ -153,7 +153,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; sb->s_checksum = jbd2_superblock_csum(j, sb); @@ -1524,21 +1524,29 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal) && + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { /* Can't have checksum v1 and v2 on at the same time! */ printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " "at the same time!\n"); goto out; } + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { + /* Can't have checksum v2 and v3 at the same time! */ + printk(KERN_ERR "JBD: Can't enable checksumming v2 and v3 " + "at the same time!\n"); + goto out; + } + if (!jbd2_verify_csum_type(journal, sb)) { printk(KERN_ERR "JBD: Unknown checksum type\n"); goto out; } /* Load the checksum driver */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3(journal)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); @@ -1555,7 +1563,7 @@ static int journal_get_superblock(journal_t *journal) } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1815,8 +1823,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; - /* Asking for checksumming v2 and v1? Only give them v2. */ - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && + /* If enabling v2 checksums, turn on v3 instead */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { + incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; + } + + /* Asking for checksumming v3 and v1? Only give them v3. */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && compat & JBD2_FEATURE_COMPAT_CHECKSUM) compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; @@ -1825,8 +1839,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, sb = journal->j_superblock; - /* If enabling v2 checksums, update superblock */ - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + /* If enabling v3 checksums, update superblock */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_feature_compat &= ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); @@ -1844,8 +1858,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1854,7 +1867,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, /* If enabling v1 checksums, downgrade superblock */ if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) sb->s_feature_incompat &= - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | + JBD2_FEATURE_INCOMPAT_CSUM_V3); sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); @@ -2167,16 +2181,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode) */ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; + size_t sz; + + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); + sz += sizeof(__u16); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return x + JBD2_TAG_SIZE64; + return sz; else - return x + JBD2_TAG_SIZE32; + return sz - sizeof(__u32); } /* diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 3929c50..c416647 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - @@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) size -= sizeof(struct jbd2_journal_block_tail); tagp = &bh->b_data[sizeof(journal_header_t)]; @@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal) return err; } -static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +static inline unsigned long long read_tag_block(journal_t *journal, + journal_block_tag_t *tag) { unsigned long long block = be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JBD2_TAG_SIZE32) + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; return block; } @@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; h = buf; @@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, void *buf, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; __u32 csum32; __be32 seq; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; seq = cpu_to_be32(sequence); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - return tag->t_checksum == cpu_to_be16(csum32); + if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return tag3->t_checksum == cpu_to_be32(csum32); + else + return tag->t_checksum == cpu_to_be16(csum32); } static int do_one_pass(journal_t *journal, @@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal, int tag_bytes = journal_tag_bytes(journal); __u32 crc32_sum = ~0; /* Transactional Checksums */ int descr_csum_size = 0; + int block_error = 0; /* * First thing is to establish what we expect to find in the log @@ -512,14 +518,14 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JBD2_DESCRIPTOR_BLOCK: /* Verify checksum first */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) descr_csum_size = sizeof(struct jbd2_journal_block_tail); if (descr_csum_size > 0 && !jbd2_descr_block_csum_verify(journal, bh->b_data)) { err = -EIO; + brelse(bh); goto failed; } @@ -574,7 +580,7 @@ static int do_one_pass(journal_t *journal, unsigned long long blocknr; J_ASSERT(obh != NULL); - blocknr = read_tag_block(tag_bytes, + blocknr = read_tag_block(journal, tag); /* If the block has been @@ -598,7 +604,8 @@ static int do_one_pass(journal_t *journal, "checksum recovering " "block %llu in log\n", blocknr); - continue; + block_error = 1; + goto skip_write; } /* Find a buffer for the new @@ -797,7 +804,8 @@ static int do_one_pass(journal_t *journal, success = -EIO; } } - + if (block_error && success == 0) + success = -EIO; return success; failed: @@ -811,7 +819,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, __be32 provided; __u32 calculated; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 198c9c1..d5e95a1 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -91,8 +91,8 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/bio.h> -#endif #include <linux/log2.h> +#endif static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_table_cache; @@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); /* Make sure we have a descriptor with space left for the record */ @@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) struct jbd2_journal_revoke_tail *tail; __u32 csum; - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 7272cc6..ab3815c 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1590,9 +1590,12 @@ int jbd2_journal_stop(handle_t *handle) * to perform a synchronous write. We do this to detect the * case where a single process is doing a stream of sync * writes. No point in waiting for joiners in that case. + * + * Setting max_batch_time to 0 disables this completely. */ pid = current->pid; - if (handle->h_sync && journal->j_last_sync_writer != pid) { + if (handle->h_sync && journal->j_last_sync_writer != pid && + journal->j_max_batch_time) { u64 commit_time, trans_time; journal->j_last_sync_writer = pid; diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index fe3c052..91bf52d 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -682,7 +682,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct inode *inode = OFNI_EDONI_2SFFJ(f); struct page *pg; - pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, + pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode); if (IS_ERR(pg)) return (void *)pg; diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 413ef89..046fee8 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -134,8 +134,6 @@ struct jffs2_sb_info { struct rw_semaphore wbuf_sem; /* Protects the write buffer */ struct delayed_work wbuf_dwork; /* write-buffer write-out work */ - int wbuf_queued; /* non-zero delayed work is queued */ - spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a6597d6..09ed551 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work) struct jffs2_sb_info *c = work_to_sb(work); struct super_block *sb = OFNI_BS_2SFFJ(c); - spin_lock(&c->wbuf_dwork_lock); - c->wbuf_queued = 0; - spin_unlock(&c->wbuf_dwork_lock); - if (!(sb->s_flags & MS_RDONLY)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); @@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c) if (sb->s_flags & MS_RDONLY) return; - spin_lock(&c->wbuf_dwork_lock); - if (!c->wbuf_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay)) jffs2_dbg(1, "%s()\n", __func__); - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); - c->wbuf_queued = 1; - } - spin_unlock(&c->wbuf_dwork_lock); } int jffs2_nand_flash_setup(struct jffs2_sb_info *c) @@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->erasesize; @@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; @@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 7f464c5..6b0f816 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -29,20 +29,20 @@ void jfs_set_inode_flags(struct inode *inode) { unsigned int flags = JFS_IP(inode)->mode2; - - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | - S_NOATIME | S_DIRSYNC | S_SYNC); + unsigned int new_fl = 0; if (flags & JFS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & JFS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & JFS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & JFS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; if (flags & JFS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME | + S_DIRSYNC | S_SYNC); } void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) @@ -105,18 +105,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) spin_lock(&dentry->d_lock); /* d_lock not required for cursor */ - list_del(&cursor->d_u.d_child); + list_del(&cursor->d_child); p = dentry->d_subdirs.next; while (n && p != &dentry->d_subdirs) { struct dentry *next; - next = list_entry(p, struct dentry, d_u.d_child); + next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(next)) n--; spin_unlock(&next->d_lock); p = p->next; } - list_add_tail(&cursor->d_u.d_child, p); + list_add_tail(&cursor->d_child, p); spin_unlock(&dentry->d_lock); } } @@ -140,7 +140,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_u.d_child; + struct list_head *p, *q = &cursor->d_child; if (!dir_emit_dots(file, ctx)) return 0; @@ -149,7 +149,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) list_move(q, &dentry->d_subdirs); for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_u.d_child); + struct dentry *next = list_entry(p, struct dentry, d_child); spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); if (!simple_positive(next)) { spin_unlock(&next->d_lock); @@ -270,7 +270,7 @@ int simple_empty(struct dentry *dentry) int ret = 0; spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); if (simple_positive(child)) { spin_unlock(&child->d_lock); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1812f02..6ae664b 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, msg.rpc_proc = &clnt->cl_procinfo[proc]; status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); + if (status == -ECONNREFUSED) { + dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n", + status); + rpc_force_rebind(clnt); + status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); + } if (status < 0) dprintk("lockd: NSM upcall RPC failed, status=%d\n", status); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 10d6c41..59a53f6 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -137,10 +137,6 @@ lockd(void *vrqstp) dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); - if (!nlm_timeout) - nlm_timeout = LOCKD_DFLT_TIMEO; - nlmsvc_timeout = nlm_timeout * HZ; - /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. @@ -235,6 +231,7 @@ out_err: if (warned++ == 0) printk(KERN_WARNING "lockd_up: makesock failed, error=%d\n", err); + svc_shutdown_net(serv, net); return err; } @@ -252,13 +249,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) error = make_socks(serv, net); if (error < 0) - goto err_socks; + goto err_bind; set_grace_period(net); dprintk("lockd_up_net: per-net data created; net=%p\n", net); return 0; -err_socks: - svc_rpcb_cleanup(serv, net); err_bind: ln->nlmsvc_users--; return error; @@ -347,6 +342,10 @@ static struct svc_serv *lockd_create_svc(void) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); + if (!nlm_timeout) + nlm_timeout = LOCKD_DFLT_TIMEO; + nlmsvc_timeout = nlm_timeout * HZ; + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); @@ -1359,11 +1359,10 @@ int __break_lease(struct inode *inode, unsigned int mode) restart: break_time = flock->fl_break_time; - if (break_time != 0) { + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } + if (break_time == 0) + break_time++; locks_insert_block(flock, new_fl); spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, @@ -2200,16 +2199,28 @@ void locks_remove_flock(struct file *filp) while ((fl = *before) != NULL) { if (fl->fl_file == filp) { - if (IS_FLOCK(fl)) { - locks_delete_lock(before); - continue; - } if (IS_LEASE(fl)) { lease_modify(before, F_UNLCK); continue; } - /* What? */ - BUG(); + + /* + * There's a leftover lock on the list of a type that + * we didn't expect to see. Most likely a classic + * POSIX lock that ended up not getting released + * properly, or that raced onto the list somehow. Log + * some info about it and then just remove it from + * the list. + */ + WARN(!IS_FLOCK(fl), + "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), inode->i_ino, + fl->fl_type, fl->fl_flags, + fl->fl_start, fl->fl_end); + + locks_delete_lock(before); + continue; } before = &fl->fl_next; } diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 4bc50da..742942a 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -96,7 +96,7 @@ int minix_new_block(struct inode * inode) unsigned long minix_count_free_blocks(struct super_block *sb) { struct minix_sb_info *sbi = minix_sb(sb); - u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1); + u32 bits = sbi->s_nzones - sbi->s_firstdatazone + 1; return (count_free(sbi->s_zmap, sb->s_blocksize, bits) << sbi->s_log_zone_size); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 0332109..a2e7175 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -266,12 +266,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize); if (sbi->s_imap_blocks < block) { printk("MINIX-fs: file system does not have enough " - "imap blocks allocated. Refusing to mount\n"); + "imap blocks allocated. Refusing to mount.\n"); goto out_no_bitmap; } block = minix_blocks_needed( - (sbi->s_nzones - (sbi->s_firstdatazone + 1)), + (sbi->s_nzones - sbi->s_firstdatazone + 1), s->s_blocksize); if (sbi->s_zmap_blocks < block) { printk("MINIX-fs: file system does not have enough " @@ -34,6 +34,7 @@ #include <linux/device_cgroup.h> #include <linux/fs_struct.h> #include <linux/posix_acl.h> +#include <linux/hash.h> #include <asm/uaccess.h> #include "internal.h" @@ -321,10 +322,11 @@ int generic_permission(struct inode *inode, int mask) if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ - if (inode_capable(inode, CAP_DAC_OVERRIDE)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) return 0; if (!(mask & MAY_WRITE)) - if (inode_capable(inode, CAP_DAC_READ_SEARCH)) + if (capable_wrt_inode_uidgid(inode, + CAP_DAC_READ_SEARCH)) return 0; return -EACCES; } @@ -334,7 +336,7 @@ int generic_permission(struct inode *inode, int mask) * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) - if (inode_capable(inode, CAP_DAC_OVERRIDE)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE)) return 0; /* @@ -342,7 +344,7 @@ int generic_permission(struct inode *inode, int mask) */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) - if (inode_capable(inode, CAP_DAC_READ_SEARCH)) + if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH)) return 0; return -EACCES; @@ -640,24 +642,22 @@ static int complete_walk(struct nameidata *nd) static __always_inline void set_root(struct nameidata *nd) { - if (!nd->root.mnt) - get_fs_root(current->fs, &nd->root); + get_fs_root(current->fs, &nd->root); } static int link_path_walk(const char *, struct nameidata *); -static __always_inline void set_root_rcu(struct nameidata *nd) +static __always_inline unsigned set_root_rcu(struct nameidata *nd) { - if (!nd->root.mnt) { - struct fs_struct *fs = current->fs; - unsigned seq; + struct fs_struct *fs = current->fs; + unsigned seq, res; - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); - } + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + res = __read_seqcount_begin(&nd->root.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + return res; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -857,7 +857,8 @@ follow_link(struct path *link, struct nameidata *nd, void **p) return PTR_ERR(s); } if (*s == '/') { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); @@ -1143,7 +1144,8 @@ static void follow_mount_rcu(struct nameidata *nd) static int follow_dotdot_rcu(struct nameidata *nd) { - set_root_rcu(nd); + if (!nd->root.mnt) + set_root_rcu(nd); while (1) { if (nd->path.dentry == nd->root.dentry && @@ -1245,7 +1247,8 @@ static void follow_mount(struct path *path) static void follow_dotdot(struct nameidata *nd) { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); while(1) { struct dentry *old = nd->path.dentry; @@ -1660,8 +1663,7 @@ static inline int can_lookup(struct inode *inode) static inline unsigned int fold_hash(unsigned long hash) { - hash += hash >> (8*sizeof(int)); - return hash; + return hash_64(hash, 32); } #else /* 32-bit case */ @@ -1875,7 +1877,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { lock_rcu_walk(); - set_root_rcu(nd); + nd->seq = set_root_rcu(nd); } else { set_root(nd); path_get(&nd->root); @@ -2280,9 +2282,10 @@ done: goto out; } path->dentry = dentry; - path->mnt = mntget(nd->path.mnt); + path->mnt = nd->path.mnt; if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) return 1; + mntget(path->mnt); follow_mount(path); error = 0; out: @@ -2404,7 +2407,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) return 0; if (uid_eq(dir->i_uid, fsuid)) return 0; - return !inode_capable(inode, CAP_FOWNER); + return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); } /* @@ -3156,7 +3159,8 @@ static int do_tmpfile(int dfd, struct filename *pathname, if (error) goto out2; audit_inode(pathname, nd->path.dentry, 0); - error = may_open(&nd->path, op->acc_mode, op->open_flag); + /* Don't check for other permissions, the inode was just created */ + error = may_open(&nd->path, MAY_OPEN, op->open_flag); if (error) goto out2; file->f_path.mnt = nd->path.mnt; diff --git a/fs/namespace.c b/fs/namespace.c index 22cbfab..bbe7def 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -831,8 +831,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; /* Don't allow unprivileged users to change mount flags */ - if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + if (flag & CL_UNPRIVILEGED) { + mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; + + if (mnt->mnt.mnt_flags & MNT_READONLY) + mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; + + if (mnt->mnt.mnt_flags & MNT_NODEV) + mnt->mnt.mnt_flags |= MNT_LOCK_NODEV; + + if (mnt->mnt.mnt_flags & MNT_NOSUID) + mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID; + + if (mnt->mnt.mnt_flags & MNT_NOEXEC) + mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC; + } /* Don't allow unprivileged users to reveal what is under a mount */ if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) @@ -1268,6 +1281,8 @@ static int do_umount(struct mount *mnt, int flags) * Special case for "unmounting" root ... * we just try to remount it readonly. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; down_write(&sb->s_umount); if (!(sb->s_flags & MS_RDONLY)) retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); @@ -1336,6 +1351,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (mnt->mnt.mnt_flags & MNT_LOCKED) goto dput_and_out; + retval = -EPERM; + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + goto dput_and_out; retval = do_umount(mnt, flags); dput_and_out: @@ -1810,9 +1828,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; - if (mnt->mnt_flags & MNT_LOCK_READONLY) - return -EPERM; - if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -1838,6 +1853,39 @@ static int do_remount(struct path *path, int flags, int mnt_flags, if (path->dentry != path->mnt->mnt_root) return -EINVAL; + /* Don't allow changing of locked mnt flags. + * + * No locks need to be held here while testing the various + * MNT_LOCK flags because those flags can never be cleared + * once they are set. + */ + if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) && + !(mnt_flags & MNT_READONLY)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && + !(mnt_flags & MNT_NODEV)) { + /* Was the nodev implicitly added in mount? */ + if ((mnt->mnt_ns->user_ns != &init_user_ns) && + !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { + mnt_flags |= MNT_NODEV; + } else { + return -EPERM; + } + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && + !(mnt_flags & MNT_NOSUID)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) && + !(mnt_flags & MNT_NOEXEC)) { + return -EPERM; + } + if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) && + ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) { + return -EPERM; + } + err = security_sb_remount(sb, data); if (err) return err; @@ -1851,7 +1899,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags, err = do_remount_sb(sb, flags, data, 0); if (!err) { br_write_lock(&vfsmount_lock); - mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; + mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; mnt->mnt.mnt_flags = mnt_flags; br_write_unlock(&vfsmount_lock); } @@ -2040,7 +2088,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; - mnt_flags |= MNT_NODEV; + mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } @@ -2358,6 +2406,14 @@ long do_mount(const char *dev_name, const char *dir_name, if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; + /* The default atime for remount is preservation */ + if ((flags & MS_REMOUNT) && + ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | + MS_STRICTATIME)) == 0)) { + mnt_flags &= ~MNT_ATIME_MASK; + mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; + } + flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); @@ -2704,6 +2760,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make sure we can reach put_old from new_root */ if (!is_path_reachable(old_mnt, old.dentry, &new)) goto out4; + /* make certain new is below the root */ + if (!is_path_reachable(new_mnt, new.dentry, &root)) + goto out4; root_mp->m_count++; /* pin it so it won't go away */ br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 3be0474..0686002 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -407,7 +407,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_u.d_child); + dent = list_entry(next, struct dentry, d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 60426cc..2f970de 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -448,7 +448,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg result = -EIO; } } - result = 0; } mutex_unlock(&server->root_setup_lock); diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 32c0658..6d5e7c5 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -194,7 +194,7 @@ ncp_renew_dentries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); @@ -216,7 +216,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent) spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + dentry = list_entry(next, struct dentry, d_child); dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); next = next->next; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e242bbf..fdb74cb 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1220,7 +1220,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); if (end != NFS_I(inode)->npages) { rcu_read_lock(); - end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); + end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5d8ccec..3ed1be9 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -109,6 +109,8 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); @@ -177,7 +179,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + res = nfs4_proc_delegreturn(inode, + delegation->cred, + &delegation->stateid, + issync); nfs_free_delegation(delegation); return res; } @@ -364,11 +370,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - int err; + int err = 0; if (delegation == NULL) return 0; do { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + break; err = nfs_delegation_claim_opens(inode, &delegation->stateid); if (!issync || err != -EAGAIN) break; @@ -589,10 +597,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } +static void nfs_revoke_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + nfs_mark_return_delegation(NFS_SERVER(inode), delegation); + } + rcu_read_unlock(); +} + void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; + nfs_revoke_delegation(inode); delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 9a79c7a..e02b090 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -31,6 +31,7 @@ enum { NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, + NFS_DELEGATION_REVOKED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index af5f3ff..d751a23 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -179,6 +179,7 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 66984a9..5b8ab0e 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -58,7 +58,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - hlist_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_u.d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fdeeb28..e5eb677 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -598,7 +598,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; - int err; + int err = 0; trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ @@ -1540,18 +1540,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_version = fattr->change_attr; } } else if (server->caps & NFS_CAP_CHANGE_ATTR) - invalid |= save_cache_validity; + nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ @@ -1574,7 +1576,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)new_isize); } } else - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); @@ -1582,7 +1585,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); else if (server->caps & NFS_CAP_ATIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { @@ -1593,7 +1597,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } } else if (server->caps & NFS_CAP_MODE) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1604,7 +1609,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_uid = fattr->uid; } } else if (server->caps & NFS_CAP_OWNER) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1615,7 +1621,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_gid = fattr->gid; } } else if (server->caps & NFS_CAP_OWNER_GROUP) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1628,7 +1635,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); } } else if (server->caps & NFS_CAP_NLINK) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 55ebebe..ce036f0 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -478,6 +478,16 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ if (pos->cl_cons_state > NFS_CS_READY) { @@ -497,15 +507,6 @@ int nfs40_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_proto != new->cl_proto) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - if (pos->cl_clientid != new->cl_clientid) continue; @@ -560,20 +561,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) } /* - * Returns true if the server owners match + * Returns true if the server major ids match */ static bool -nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b) { struct nfs41_server_owner *o1 = a->cl_serverowner; struct nfs41_server_owner *o2 = b->cl_serverowner; - if (o1->minor_id != o2->minor_id) { - dprintk("NFS: --> %s server owner minor IDs do not match\n", - __func__); - return false; - } - if (o1->major_id_sz != o2->major_id_sz) goto out_major_mismatch; if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) @@ -611,6 +606,16 @@ int nfs41_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION @@ -636,19 +641,15 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_proto != new->cl_proto) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - if (!nfs4_match_clientids(pos, new)) continue; - if (!nfs4_match_serverowners(pos, new)) + /* + * Note that session trunking is just a special subcase of + * client id trunking. In either case, we want to fall back + * to using the existing nfs_client. + */ + if (!nfs4_check_clientid_trunking(pos, new)) continue; atomic_inc(&pos->cl_count); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 394b0a0..3c27659 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1330,7 +1330,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct nfs4_filelayout *flo; flo = kzalloc(sizeof(*flo), gfp_flags); - return &flo->generic_hdr; + return flo != NULL ? &flo->generic_hdr : NULL; } static void diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bcd42fb..43c2711 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1064,6 +1064,7 @@ static void nfs4_opendata_free(struct kref *kref) dput(p->dentry); nfs_sb_deactive(sb); nfs_fattr_free_names(&p->f_attr); + kfree(p->f_attr.mdsthreshold); kfree(p); } @@ -1578,7 +1579,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - return 0; + return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: set_bit(NFS_DELEGATED_STATE, &state->flags); @@ -2025,46 +2026,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +{ + nfs_remove_bad_delegation(state->inode); + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); + clear_bit(NFS_DELEGATED_STATE, &state->flags); +} + +static void nfs40_clear_delegation_stateid(struct nfs4_state *state) +{ + if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) + nfs_finish_clear_delegation_stateid(state); +} + +static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + /* NFSv4.0 doesn't allow for delegation recovery on open expire */ + nfs40_clear_delegation_stateid(state); + return nfs4_open_expired(sp, state); +} + #if defined(CONFIG_NFS_V4_1) -static void nfs41_clear_delegation_stateid(struct nfs4_state *state) +static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred = NULL; - int status = -NFS4ERR_BAD_STATEID; - - /* If a state reset has been done, test_stateid is unneeded */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return; + struct rpc_cred *cred; + int status; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && - nfs4_stateid_match(&delegation->stateid, stateid)) { - cred = get_rpccred(delegation->cred); - rcu_read_unlock(); - status = nfs41_test_stateid(server, stateid, cred); - trace_nfs4_test_delegation_stateid(state, NULL, status); - } else + if (delegation == NULL) { rcu_read_unlock(); + return; + } + + nfs4_stateid_copy(&stateid, &delegation->stateid); + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_stateid(server, &stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - nfs_remove_bad_delegation(state->inode); - - write_seqlock(&state->seqlock); - nfs4_stateid_copy(&state->stateid, &state->open_stateid); - write_sequnlock(&state->seqlock); - clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs41_free_stateid(server, &stateid, cred); + nfs_finish_clear_delegation_stateid(state); } - if (cred != NULL) - put_rpccred(cred); + put_rpccred(cred); } /** @@ -2108,7 +2123,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { int status; - nfs41_clear_delegation_stateid(state); + nfs41_check_delegation_stateid(state); status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2236,10 +2251,12 @@ static int _nfs4_do_open(struct inode *dir, } } - if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { - opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); - if (!opendata->f_attr.mdsthreshold) - goto err_free_label; + if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { + if (!opendata->f_attr.mdsthreshold) { + opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); + if (!opendata->f_attr.mdsthreshold) + goto err_free_label; + } opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } if (dentry->d_inode != NULL) @@ -2267,11 +2284,10 @@ static int _nfs4_do_open(struct inode *dir, if (opendata->file_created) *opened |= FILE_CREATED; - if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) + if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { *ctx_th = opendata->f_attr.mdsthreshold; - else - kfree(opendata->f_attr.mdsthreshold); - opendata->f_attr.mdsthreshold = NULL; + opendata->f_attr.mdsthreshold = NULL; + } nfs4_label_free(olabel); @@ -2281,7 +2297,6 @@ static int _nfs4_do_open(struct inode *dir, err_free_label: nfs4_label_free(olabel); err_opendata_put: - kfree(opendata->f_attr.mdsthreshold); nfs4_opendata_put(opendata); err_put_state_owner: nfs4_put_state_owner(sp); @@ -2531,6 +2546,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; dprintk("%s: begin!\n", __func__); @@ -2538,21 +2554,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_READ|FMODE_WRITE; spin_lock(&state->owner->so_lock); + is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); + is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); + is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); /* Calculate the change in open mode */ + calldata->arg.fmode = 0; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) { - call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_READ; - } - if (state->n_wronly == 0) { - call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_WRITE; - } - } + if (state->n_rdonly == 0) + call_close |= is_rdonly; + else if (is_rdonly) + calldata->arg.fmode |= FMODE_READ; + if (state->n_wronly == 0) + call_close |= is_wronly; + else if (is_wronly) + calldata->arg.fmode |= FMODE_WRITE; + } else if (is_rdwr) + calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; + + if (calldata->arg.fmode == 0) + call_close |= is_rdwr; + if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -6883,7 +6905,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr int ret = 0; if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) - return 0; + return -EAGAIN; task = _nfs41_proc_sequence(clp, cred, false); if (IS_ERR(task)) ret = PTR_ERR(task); @@ -7216,6 +7238,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7228,9 +7253,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); @@ -7894,7 +7916,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, + .recover_open = nfs40_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, }; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 1720d32..e1ba58c 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work) } nfs_expire_all_delegations(clp); } else { + int ret; + /* Queue an asynchronous RENEW. */ - ops->sched_state_renewal(clp, cred, renew_flags); + ret = ops->sched_state_renewal(clp, cred, renew_flags); put_rpccred(cred); - goto out_exp; + switch (ret) { + default: + goto out_exp; + case -EAGAIN: + case -ENOMEM: + break; + } } } else { dprintk("%s: failed to call renewd. Reason: lease not expired \n", diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 26c07f9..03c5315 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1690,7 +1690,8 @@ restart: if (status < 0) { set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); - return nfs4_recovery_handle_error(clp, status); + status = nfs4_recovery_handle_error(clp, status); + return (status != 0) ? status : -EAGAIN; } nfs4_put_state_owner(sp); @@ -1699,7 +1700,7 @@ restart: spin_unlock(&clp->cl_lock); } rcu_read_unlock(); - return status; + return 0; } static int nfs4_check_lease(struct nfs_client *clp) @@ -1746,7 +1747,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) break; case -NFS4ERR_STALE_CLIENTID: clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_CLID_INUSE: @@ -2173,14 +2173,11 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim reboot"; status = nfs4_do_reclaim(clp, clp->cl_mvops->reboot_recovery_ops); - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) - continue; - nfs4_state_end_reclaim_reboot(clp); - if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + if (status == -EAGAIN) continue; if (status < 0) goto out_error; + nfs4_state_end_reclaim_reboot(clp); } /* Now recover expired state... */ @@ -2188,9 +2185,7 @@ static void nfs4_state_manager(struct nfs_client *clp) section = "reclaim nograce"; status = nfs4_do_reclaim(clp, clp->cl_mvops->nograce_recovery_ops); - if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || - test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + if (status == -EAGAIN) continue; if (status < 0) goto out_error; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2ffebf2..27d7f27 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -113,7 +113,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) if (atomic_read(&c->io_count) == 0) break; ret = nfs_wait_bit_killable(&c->flags); - } while (atomic_read(&c->io_count) != 0); + } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a03b9c6..64940b5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2197,6 +2197,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; data->nfs_server.port = nfss->port; data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + data->net = current->nsproxy->net_ns; memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, data->nfs_server.addrlen); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c6aa89f..3a1b1d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -913,12 +913,14 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) if (nfs_have_delegated_attributes(inode)) goto out; - if (nfsi->cache_validity & (NFS_INO_INVALID_DATA|NFS_INO_REVAL_PAGECACHE)) + if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) return false; smp_rmb(); if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags)) return false; out: + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + return false; return PageUptodate(page) != 0; } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 8a50b3c..e15bcbd 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -385,8 +385,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -513,13 +515,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7f05cd1..f42bbe5 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -637,9 +637,11 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(clp->net), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = clp->net, @@ -670,7 +672,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; - args.protocol = XPRT_TRANSPORT_BC_TCP; + args.protocol = conn->cb_xprt->xpt_class->xcl_ident | + XPRT_TRANSPORT_BC; args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ @@ -781,8 +784,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); - dprintk("%s slot is busy\n", __func__); - return false; + /* Race breaker */ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + dprintk("%s slot is busy\n", __func__); + return false; + } + rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } return true; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 419572f..25024d5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -610,15 +610,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, @@ -1242,7 +1233,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) */ if (argp->opcnt == resp->opcnt) return false; - + if (next->opnum == OP_ILLEGAL) + return false; nextd = OPDESC(next); /* * Rest of 2.6.3.1.1: certain operations will return WRONGSEC @@ -1349,6 +1341,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { plen = opdesc->op_rsize_bop(rqstp, op); + /* + * If there's still another operation, make sure + * we'll have space to at least encode an error: + */ + if (resp->opcnt < args->opcnt) + plen += COMPOUND_ERR_SLACK_SPACE; op->status = nfsd4_check_resp_size(resp, plen); } @@ -1513,7 +1511,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32); + return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * + sizeof(__be32); } static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0874998..0a138e4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1071,6 +1071,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } @@ -1088,6 +1100,7 @@ free_client(struct nfs4_client *clp) WARN_ON_ONCE(atomic_read(&ses->se_ref)); free_session(ses); } + rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); @@ -1184,15 +1197,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) return 0; } -static long long +static int compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) { - long long res; - - res = o1->len - o2->len; - if (res) - return res; - return (long long)memcmp(o1->data, o2->data, o1->len); + if (o1->len < o2->len) + return -1; + if (o1->len > o2->len) + return 1; + return memcmp(o1->data, o2->data, o1->len); } static int same_name(const char *n1, const char *n2) @@ -1335,7 +1347,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { spin_lock(&nn->client_lock); @@ -1343,20 +1354,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - INIT_LIST_HEAD(&clp->cl_revoked); - spin_lock_init(&clp->cl_lock); nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); @@ -1388,7 +1388,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) static struct nfs4_client * find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) { - long long cmp; + int cmp; struct rb_node *node = root->rb_node; struct nfs4_client *clp; @@ -3695,9 +3695,16 @@ out: static __be32 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { - if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner))) + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + + if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - release_lock_stateid(stp); + /* + * Currently there's a 1-1 lock stateid<->lockowner + * correspondance, and we have to delete the lockowner when we + * delete the lock stateid: + */ + release_lockowner(lo); return nfs_ok; } @@ -4141,6 +4148,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c if (!same_owner_str(&lo->lo_owner, owner, clid)) return false; + if (list_empty(&lo->lo_owner.so_stateids)) { + WARN_ON_ONCE(1); + return false; + } lst = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); return lst->st_file->fi_inode == inode; @@ -5048,7 +5059,6 @@ nfs4_state_destroy_net(struct net *net) int i; struct nfs4_client *clp = NULL; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct rb_node *node, *tmp; for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->conf_id_hashtbl[i])) { @@ -5057,13 +5067,11 @@ nfs4_state_destroy_net(struct net *net) } } - node = rb_first(&nn->unconf_name_tree); - while (node != NULL) { - tmp = node; - node = rb_next(tmp); - clp = rb_entry(tmp, struct nfs4_client, cl_namenode); - rb_erase(tmp, &nn->unconf_name_tree); - destroy_client(clp); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&nn->unconf_id_hashtbl[i])) { + clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + destroy_client(clp); + } } kfree(nn->sessionid_hashtbl); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ecc735e..1c825ae 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -594,7 +594,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ_BUF(4); READ32(create->cr_linklen); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: @@ -1775,6 +1786,9 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, } else end++; + if (found_esc) + end = next; + str = end; } *pp = p; @@ -2113,8 +2127,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, err = vfs_getattr(&path, &stat); if (err) goto out_nfserr; - if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | - FATTR4_WORD0_MAXNAME)) || + if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &statfs); @@ -2502,6 +2516,8 @@ out_acl: goto out; } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { + if ((buflen -= 16) < 0) + goto out_resource; WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1); @@ -3510,6 +3526,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; + if (nfserr) + return nfserr; + RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); *p++ = htonl(test_stateid->ts_num_ids); diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index b6af150..6040da8 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -132,13 +132,6 @@ nfsd_reply_cache_alloc(void) } static void -nfsd_reply_cache_unhash(struct svc_cacherep *rp) -{ - hlist_del_init(&rp->c_hash); - list_del_init(&rp->c_lru); -} - -static void nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { @@ -231,13 +224,6 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); } -static inline bool -nfsd_cache_entry_expired(struct svc_cacherep *rp) -{ - return rp->c_state != RC_INPROG && - time_after(jiffies, rp->c_timestamp + RC_EXPIRE); -} - /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -249,8 +235,14 @@ prune_cache_entries(void) long freed = 0; list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { - if (!nfsd_cache_entry_expired(rp) && - num_drc_entries <= max_drc_entries) + /* + * Don't free entries attached to calls that are still + * in-progress, but do keep scanning the list. + */ + if (rp->c_state == RC_INPROG) + continue; + if (num_drc_entries <= max_drc_entries && + time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); freed++; @@ -416,22 +408,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) /* * Since the common case is a cache miss followed by an insert, - * preallocate an entry. First, try to reuse the first entry on the LRU - * if it works, then go ahead and prune the LRU list. + * preallocate an entry. */ - spin_lock(&cache_lock); - if (!list_empty(&lru_head)) { - rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); - if (nfsd_cache_entry_expired(rp) || - num_drc_entries >= max_drc_entries) { - nfsd_reply_cache_unhash(rp); - prune_cache_entries(); - goto search_cache; - } - } - - /* No expired ones available, allocate a new one. */ - spin_unlock(&cache_lock); rp = nfsd_reply_cache_alloc(); spin_lock(&cache_lock); if (likely(rp)) { @@ -439,7 +417,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) drc_mem_usage += sizeof(*rp); } -search_cache: + /* go ahead and prune the cache */ + prune_cache_entries(); + found = nfsd_cache_search(rqstp, csum); if (found) { if (likely(rp)) @@ -453,15 +433,6 @@ search_cache: goto out; } - /* - * We're keeping the one we just allocated. Are we now over the - * limit? Prune one off the tip of the LRU in trade for the one we - * just allocated if so. - */ - if (num_drc_entries >= max_drc_entries) - nfsd_reply_cache_free_locked(list_first_entry(&lru_head, - struct svc_cacherep, c_lru)); - nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7f55517..f34d9de 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net) if (err != 0 || fd < 0) return -EINVAL; + if (svc_alien_sock(net, fd)) { + printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); + return -EINVAL; + } + err = nfsd_create_serv(net); if (err != 0) return err; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 30f34ab..f417fef 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -282,7 +282,7 @@ void nfsd_lockd_shutdown(void); * reason. */ #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ -#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ +#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ @@ -328,12 +328,15 @@ void nfsd_lockd_shutdown(void); (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ - (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL #else -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#define NFSD4_2_SECURITY_ATTRS 0 #endif +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + NFSD4_2_SECURITY_ATTRS) + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 760c85a..4942f43 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs) */ ret = nfsd_racache_init(2*nrservs); if (ret) - return ret; + goto dec_users; + ret = nfs4_state_start(); if (ret) goto out_racache; @@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs) out_racache: nfsd_racache_shutdown(); +dec_users: + nfsd_users--; return ret; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 72cb28e..fafac65 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -407,6 +407,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, umode_t ftype = 0; __be32 err; int host_err; + bool get_write_count; int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) @@ -414,10 +415,18 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; + /* Callers that do fh_verify should do the fh_want_write: */ + get_write_count = !fhp->fh_dentry; + /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) goto out; + if (get_write_count) { + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + } dentry = fhp->fh_dentry; inode = dentry->d_inode; @@ -500,6 +509,9 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) char *buf = NULL; int error = 0; + if (!pacl) + return vfs_setxattr(dentry, key, NULL, 0, 0); + buflen = posix_acl_xattr_size(pacl->a_count); buf = kmalloc(buflen, GFP_KERNEL); error = -ENOMEM; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 08fdb77..e319521 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -56,11 +56,9 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) mutex_unlock(&inode->i_mutex); nilfs = inode->i_sb->s_fs_info; - if (!err && nilfs_test_opt(nilfs, BARRIER)) { - err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - if (err != -EIO) - err = 0; - } + if (!err) + err = nilfs_flush_device(nilfs); + return err; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7e350c5..09480c53 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -24,6 +24,7 @@ #include <linux/buffer_head.h> #include <linux/gfp.h> #include <linux/mpage.h> +#include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/aio.h> #include "nilfs.h" @@ -48,6 +49,8 @@ struct nilfs_iget_args { int for_gc; }; +static int nilfs_iget_test(struct inode *inode, void *opaque); + void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; @@ -219,10 +222,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { + struct inode *inode = page->mapping->host; int ret = __set_page_dirty_nobuffers(page); if (page_has_buffers(page)) { - struct inode *inode = page->mapping->host; unsigned nr_dirty = 0; struct buffer_head *bh, *head; @@ -245,6 +248,10 @@ static int nilfs_set_page_dirty(struct page *page) if (nr_dirty) nilfs_set_file_dirty(inode, nr_dirty); + } else if (ret) { + unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } @@ -342,6 +349,17 @@ const struct address_space_operations nilfs_aops = { .is_partially_uptodate = block_is_partially_uptodate, }; +static int nilfs_insert_inode_locked(struct inode *inode, + struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +} + struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -377,7 +395,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); if (err < 0) - goto failed_bmap; + goto failed_after_creation; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ @@ -393,21 +411,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) spin_lock(&nilfs->ns_next_gen_lock); inode->i_generation = nilfs->ns_next_generation++; spin_unlock(&nilfs->ns_next_gen_lock); - insert_inode_hash(inode); + if (nilfs_insert_inode_locked(inode, root, ino) < 0) { + err = -EIO; + goto failed_after_creation; + } err = nilfs_init_acl(inode, dir); if (unlikely(err)) - goto failed_acl; /* never occur. When supporting + goto failed_after_creation; /* never occur. When supporting nilfs_init_acl(), proper cancellation of above jobs should be considered */ return inode; - failed_acl: - failed_bmap: + failed_after_creation: clear_nlink(inode); + unlock_new_inode(inode); iput(inode); /* raw_inode will be deleted through - generic_delete_inode() */ + nilfs_evict_inode() */ goto failed; failed_ifile_create_inode: @@ -455,8 +476,8 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - if (inode->i_nlink == 0 && inode->i_mode == 0) - return -EINVAL; /* this inode is deleted */ + if (inode->i_nlink == 0) + return -ESTALE; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); ii->i_flags = le32_to_cpu(raw_inode->i_flags); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index b44bdb2..4915e54 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -694,11 +694,9 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp, return ret; nilfs = inode->i_sb->s_fs_info; - if (nilfs_test_opt(nilfs, BARRIER)) { - ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - if (ret == -EIO) - return ret; - } + ret = nilfs_flush_device(nilfs); + if (ret < 0) + return ret; if (argp != NULL) { down_read(&nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9de78f0..0f84b25 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -182,6 +184,7 @@ out: out_fail: drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode_inc_link_count(inode); ihold(inode); - err = nilfs_add_nondir(dentry, inode); - if (!err) + err = nilfs_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else { + inode_dec_link_count(inode); + iput(inode); nilfs_transaction_abort(dir->i_sb); + } return err; } @@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); @@ -255,6 +263,7 @@ out_fail: drop_nlink(inode); drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); out_dir: drop_nlink(dir); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a1a1916..0b7d2ca 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1833,6 +1833,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_set_next_segment(nilfs, segbuf); if (update_sr) { + nilfs->ns_flushed_device = 0; nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno++); @@ -2216,6 +2217,8 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, sci->sc_dsync_end = end; err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); + if (!err) + nilfs->ns_flushed_device = 0; nilfs_transaction_unlock(sb); return err; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 7ac2a12..0bdc024 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -310,6 +310,9 @@ int nilfs_commit_super(struct super_block *sb, int flag) nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); + nilfs->ns_flushed_device = 1; + /* make sure store to ns_flushed_device cannot be reordered */ + smp_wmb(); return nilfs_sync_super(sb, flag); } @@ -514,6 +517,9 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) } up_write(&nilfs->ns_sem); + if (!err) + err = nilfs_flush_device(nilfs); + return err; } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index de8cc53..005e1dc 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -45,6 +45,7 @@ enum { /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags + * @ns_flushed_device: flag indicating if all volatile data was flushed * @ns_bdev: block device * @ns_sem: semaphore for shared states * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts @@ -98,6 +99,7 @@ enum { */ struct the_nilfs { unsigned long ns_flags; + int ns_flushed_device; struct block_device *ns_bdev; struct rw_semaphore ns_sem; @@ -353,4 +355,24 @@ static inline int nilfs_segment_is_active(struct the_nilfs *nilfs, __u64 n) return n == nilfs->ns_segnum || n == nilfs->ns_nextnum; } +static inline int nilfs_flush_device(struct the_nilfs *nilfs) +{ + int err; + + if (!nilfs_test_opt(nilfs, BARRIER) || nilfs->ns_flushed_device) + return 0; + + nilfs->ns_flushed_device = 1; + /* + * the store to ns_flushed_device must not be reordered after + * blkdev_issue_flush(). + */ + smp_wmb(); + + err = blkdev_issue_flush(nilfs->ns_bdev, GFP_KERNEL, NULL); + if (err != -EIO) + err = 0; + return err; +} + #endif /* _THE_NILFS_H */ diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6663511..cc80b0a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -69,7 +69,7 @@ static int create_fd(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - client_fd = get_unused_fd(); + client_fd = get_unused_fd_flags(group->fanotify_data.f_flags); if (client_fd < 0) return client_fd; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 238a593..9d7e2b9 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) { struct { struct file_handle handle; - u8 pad[64]; + u8 pad[MAX_HANDLE_SZ]; } f; int size, ret, i; @@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); - if ((ret == 255) || (ret == -ENOSPC)) { + if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return 0; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4bb21d6..a3153e2 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their * d_flags to indicate parental interest (their parent is the * original inode) */ spin_lock(&alias->d_lock); - list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &alias->d_subdirs, d_child) { if (!child->d_inode) continue; diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 74825be..fbb9dfb 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -288,20 +288,25 @@ void fsnotify_unmount_inodes(struct list_head *list) spin_unlock(&inode->i_lock); /* In case the dropping of a reference would nuke next_i. */ - if ((&next_i->i_sb_list != list) && - atomic_read(&next_i->i_count)) { + while (&next_i->i_sb_list != list) { spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && + atomic_read(&next_i->i_count)) { __iget(next_i); need_iput = next_i; + spin_unlock(&next_i->i_lock); + break; } spin_unlock(&next_i->i_lock); + next_i = list_entry(next_i->i_sb_list.next, + struct inode, i_sb_list); } /* - * We can safely drop inode_sb_list_lock here because we hold - * references on both inode and next_i. Also no new inodes - * will be added since the umount has begun. + * We can safely drop inode_sb_list_lock here because either + * we actually hold references on both inode and next_i or + * end of list. Also no new inodes will be added since the + * umount has begun. */ spin_unlock(&inode_sb_list_lock); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index a27e3fe..250ed5b 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1748,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) if (page) { set_page_dirty(page); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); } ntfs_debug("Done."); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ea4ba9d..86ddab9 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2060,7 +2060,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } do { unlock_page(pages[--do_pages]); - mark_page_accessed(pages[do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); if (unlikely(status)) @@ -2134,7 +2133,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, pos, ret); + int err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index f37d3c0..dd2c4e4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -912,7 +912,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { int i; @@ -933,7 +933,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) page_cache_release(wc->w_target_page); } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -2055,11 +2059,19 @@ out_write_size: di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); ocfs2_journal_dirty(handle, wc->w_di_bh); + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5d18ad1..4f66e00 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, * information for this bh as it's not marked locally * uptodate. */ ret = -EIO; - put_bh(bh); mlog_errno(ret); } @@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, if (!buffer_uptodate(bh)) { ret = -EIO; - put_bh(bh); mlog_errno(ret); } diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 0d3a97d..92edcfc 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -37,7 +37,6 @@ #include "dlmglue.h" #include "file.h" #include "inode.h" -#include "super.h" #include "ocfs2_trace.h" void ocfs2_dentry_attach_gen(struct dentry *dentry) @@ -173,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, @@ -346,52 +345,6 @@ out_attach: return ret; } -DEFINE_SPINLOCK(dentry_list_lock); - -/* We limit the number of dentry locks to drop in one go. We have - * this limit so that we don't starve other users of ocfs2_wq. */ -#define DL_INODE_DROP_COUNT 64 - -/* Drop inode references from dentry locks */ -static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) -{ - struct ocfs2_dentry_lock *dl; - - spin_lock(&dentry_list_lock); - while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { - dl = osb->dentry_lock_list; - osb->dentry_lock_list = dl->dl_next; - spin_unlock(&dentry_list_lock); - iput(dl->dl_inode); - kfree(dl); - spin_lock(&dentry_list_lock); - } - spin_unlock(&dentry_list_lock); -} - -void ocfs2_drop_dl_inodes(struct work_struct *work) -{ - struct ocfs2_super *osb = container_of(work, struct ocfs2_super, - dentry_lock_work); - - __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); - /* - * Don't queue dropping if umount is in progress. We flush the - * list in ocfs2_dismount_volume - */ - spin_lock(&dentry_list_lock); - if (osb->dentry_lock_list && - !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) - queue_work(ocfs2_wq, &osb->dentry_lock_work); - spin_unlock(&dentry_list_lock); -} - -/* Flush the whole work queue */ -void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) -{ - __ocfs2_drop_dl_inodes(osb, -1); -} - /* * ocfs2_dentry_iput() and friends. * @@ -416,24 +369,16 @@ void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { + iput(dl->dl_inode); ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); ocfs2_lock_res_free(&dl->dl_lockres); - - /* We leave dropping of inode reference to ocfs2_wq as that can - * possibly lead to inode deletion which gets tricky */ - spin_lock(&dentry_list_lock); - if (!osb->dentry_lock_list && - !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) - queue_work(ocfs2_wq, &osb->dentry_lock_work); - dl->dl_next = osb->dentry_lock_list; - osb->dentry_lock_list = dl; - spin_unlock(&dentry_list_lock); + kfree(dl); } void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { - int unlock; + int unlock = 0; BUG_ON(dl->dl_count == 0); diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index b79eff7..55f5889 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h @@ -29,13 +29,8 @@ extern const struct dentry_operations ocfs2_dentry_ops; struct ocfs2_dentry_lock { - /* Use count of dentry lock */ unsigned int dl_count; - union { - /* Linked list of dentry locks to release */ - struct ocfs2_dentry_lock *dl_next; - u64 dl_parent_blkno; - }; + u64 dl_parent_blkno; /* * The ocfs2_dentry_lock keeps an inode reference until @@ -49,14 +44,9 @@ struct ocfs2_dentry_lock { int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, u64 parent_blkno); -extern spinlock_t dentry_list_lock; - void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl); -void ocfs2_drop_dl_inodes(struct work_struct *work); -void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb); - struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index cf0f103..673c9bf 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -650,12 +650,9 @@ void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, clear_bit(bit, res->refmap); } - -void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, +static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { - assert_spin_locked(&res->spinlock); - res->inflight_locks++; mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, @@ -663,6 +660,13 @@ void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, __builtin_return_address(0)); } +void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + assert_spin_locked(&res->spinlock); + __dlm_lockres_grab_inflight_ref(dlm, res); +} + void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -852,10 +856,8 @@ lookup: /* finally add the lockres to its hash bucket */ __dlm_insert_lockres(dlm, res); - /* Grab inflight ref to pin the resource */ - spin_lock(&res->spinlock); - dlm_lockres_grab_inflight_ref(dlm, res); - spin_unlock(&res->spinlock); + /* since this lockres is new it doesn't not require the spinlock */ + __dlm_lockres_grab_inflight_ref(dlm, res); /* get an extra ref on the mle in case this is a BLOCK * if so, the creator of the BLOCK may try to put the last diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 0b5adca..7b4a3fa 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -537,7 +537,10 @@ master_here: /* success! see if any other nodes need recovery */ mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", dlm->name, dlm->reco.dead_node, dlm->node_num); - dlm_reset_recovery(dlm); + spin_lock(&dlm->spinlock); + __dlm_reset_recovery(dlm); + dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); } dlm_end_recovery(dlm); @@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) if (all_nodes_done) { int ret; + /* Set this flag on recovery master to avoid + * a new recovery for another dead node start + * before the recovery is not done. That may + * cause recovery hung.*/ + spin_lock(&dlm->spinlock); + dlm->reco.state |= DLM_RECO_STATE_FINALIZE; + spin_unlock(&dlm->spinlock); + /* all nodes are now in DLM_RECO_NODE_DATA_DONE state * just send a finalize message to everyone and * clean up */ @@ -1750,13 +1761,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_migratable_lockres *mres) { struct dlm_migratable_lock *ml; - struct list_head *queue; + struct list_head *queue, *iter; struct list_head *tmpq = NULL; struct dlm_lock *newlock = NULL; struct dlm_lockstatus *lksb = NULL; int ret = 0; int i, j, bad; - struct dlm_lock *lock = NULL; + struct dlm_lock *lock; u8 from = O2NM_MAX_NODES; unsigned int added = 0; __be64 c; @@ -1791,14 +1802,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* MIGRATION ONLY! */ BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); + lock = NULL; spin_lock(&res->spinlock); for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { tmpq = dlm_list_idx_to_ptr(res, j); - list_for_each_entry(lock, tmpq, list) { - if (lock->ml.cookie != ml->cookie) - lock = NULL; - else + list_for_each(iter, tmpq) { + lock = list_entry(iter, + struct dlm_lock, list); + if (lock->ml.cookie == ml->cookie) break; + lock = NULL; } if (lock) break; @@ -2875,8 +2888,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, BUG(); } dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; + __dlm_reset_recovery(dlm); spin_unlock(&dlm->spinlock); - dlm_reset_recovery(dlm); dlm_kick_recovery_thread(dlm); break; default: diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3a44a64..3988d0a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3142,22 +3142,60 @@ out: return 0; } +static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres); + /* Mark the lockres as being dropped. It will no longer be * queued if blocking, but we still may have to wait on it * being dequeued from the downconvert thread before we can consider * it safe to drop. * * You can *not* attempt to call cluster_lock on this lockres anymore. */ -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres) { int status; struct ocfs2_mask_waiter mw; - unsigned long flags; + unsigned long flags, flags2; ocfs2_init_mask_waiter(&mw); spin_lock_irqsave(&lockres->l_lock, flags); lockres->l_flags |= OCFS2_LOCK_FREEING; + if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { + /* + * We know the downconvert is queued but not in progress + * because we are the downconvert thread and processing + * different lock. So we can just remove the lock from the + * queue. This is not only an optimization but also a way + * to avoid the following deadlock: + * ocfs2_dentry_post_unlock() + * ocfs2_dentry_lock_put() + * ocfs2_drop_dentry_lock() + * iput() + * ocfs2_evict_inode() + * ocfs2_clear_inode() + * ocfs2_mark_lockres_freeing() + * ... blocks waiting for OCFS2_LOCK_QUEUED + * since we are the downconvert thread which + * should clear the flag. + */ + spin_unlock_irqrestore(&lockres->l_lock, flags); + spin_lock_irqsave(&osb->dc_task_lock, flags2); + list_del_init(&lockres->l_blocked_list); + osb->blocked_lock_count--; + spin_unlock_irqrestore(&osb->dc_task_lock, flags2); + /* + * Warn if we recurse into another post_unlock call. Strictly + * speaking it isn't a problem but we need to be careful if + * that happens (stack overflow, deadlocks, ...) so warn if + * ocfs2 grows a path for which this can happen. + */ + WARN_ON_ONCE(lockres->l_ops->post_unlock); + /* Since the lock is freeing we don't do much in the fn below */ + ocfs2_process_blocked_lock(osb, lockres); + return; + } while (lockres->l_flags & OCFS2_LOCK_QUEUED) { lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); spin_unlock_irqrestore(&lockres->l_lock, flags); @@ -3178,7 +3216,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, { int ret; - ocfs2_mark_lockres_freeing(lockres); + ocfs2_mark_lockres_freeing(osb, lockres); ret = ocfs2_drop_lock(osb, lockres); if (ret) mlog_errno(ret); diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 1d596d8..d293a22 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex); void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); -void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); +void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres); void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f87f9bd..e37a59a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -814,11 +814,13 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) goto bail; } - /* If we're coming from downconvert_thread we can't go into our own - * voting [hello, deadlock city!], so unforuntately we just - * have to skip deleting this guy. That's OK though because - * the node who's doing the actual deleting should handle it - * anyway. */ + /* + * If we're coming from downconvert_thread we can't go into our own + * voting [hello, deadlock city!] so we cannot delete the inode. But + * since we dropped last inode ref when downconverting dentry lock, + * we cannot have the file open and thus the node doing unlink will + * take care of deleting the inode. + */ if (current == osb->dc_task) goto bail; @@ -970,8 +972,6 @@ static void ocfs2_delete_inode(struct inode *inode) if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) goto bail; - dquot_initialize(inode); - if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most @@ -980,6 +980,8 @@ static void ocfs2_delete_inode(struct inode *inode) goto bail; } + dquot_initialize(inode); + /* We want to block signals in delete_inode as the lock and * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned @@ -1067,6 +1069,7 @@ static void ocfs2_clear_inode(struct inode *inode) { int status; struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); clear_inode(inode); trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, @@ -1083,9 +1086,9 @@ static void ocfs2_clear_inode(struct inode *inode) /* Do these before all the other work so that we don't bounce * the downconvert thread while waiting to destroy the locks. */ - ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); - ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); - ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); + ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres); + ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); + ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, &oi->ip_la_data_resv); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index be3f867..c19c2c5 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -430,7 +430,6 @@ leave: brelse(new_fe_bh); brelse(parent_fe_bh); - kfree(si.name); kfree(si.value); ocfs2_free_dir_lookup_result(&lookup); @@ -1818,7 +1817,6 @@ bail: brelse(new_fe_bh); brelse(parent_fe_bh); - kfree(si.name); kfree(si.value); ocfs2_free_dir_lookup_result(&lookup); if (inode_ac) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 3a90347..f613434 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -30,6 +30,7 @@ #include <linux/sched.h> #include <linux/wait.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/rbtree.h> #include <linux/workqueue.h> #include <linux/kref.h> @@ -274,19 +275,16 @@ enum ocfs2_mount_options OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ }; -#define OCFS2_OSB_SOFT_RO 0x0001 -#define OCFS2_OSB_HARD_RO 0x0002 -#define OCFS2_OSB_ERROR_FS 0x0004 -#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 - -#define OCFS2_DEFAULT_ATIME_QUANTUM 60 +#define OCFS2_OSB_SOFT_RO 0x0001 +#define OCFS2_OSB_HARD_RO 0x0002 +#define OCFS2_OSB_ERROR_FS 0x0004 +#define OCFS2_DEFAULT_ATIME_QUANTUM 60 struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; struct ocfs2_replay_map; struct ocfs2_quota_recovery; -struct ocfs2_dentry_lock; struct ocfs2_super { struct task_struct *commit_task; @@ -413,10 +411,9 @@ struct ocfs2_super struct list_head blocked_lock_list; unsigned long blocked_lock_count; - /* List of dentry locks to release. Anyone can add locks to - * the list, ocfs2_wq processes the list */ - struct ocfs2_dentry_lock *dentry_lock_list; - struct work_struct dentry_lock_work; + /* List of dquot structures to drop last reference to */ + struct llist_head dquot_drop_list; + struct work_struct dquot_drop_work; wait_queue_head_t osb_mount_event; @@ -578,18 +575,6 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, spin_unlock(&osb->osb_lock); } - -static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, - unsigned long flag) -{ - unsigned long ret; - - spin_lock(&osb->osb_lock); - ret = osb->osb_flags & flag; - spin_unlock(&osb->osb_lock); - return ret; -} - static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, int hard) { diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index d5ab56c..f266d67 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h @@ -28,6 +28,7 @@ struct ocfs2_dquot { unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ s64 dq_origspace; /* Last globally synced space usage */ s64 dq_originodes; /* Last globally synced inode usage */ + struct llist_node list; /* Member of list of dquots to drop */ }; /* Description of one chunk to recover in memory */ @@ -110,6 +111,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, int ocfs2_create_local_dquot(struct dquot *dquot); int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); int ocfs2_local_write_dquot(struct dquot *dquot); +void ocfs2_drop_dquot_refs(struct work_struct *work); extern const struct dquot_operations ocfs2_quota_operations; extern struct quota_format_type ocfs2_quota_format; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index d7b5108..b990a62 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -10,6 +10,7 @@ #include <linux/jiffies.h> #include <linux/writeback.h> #include <linux/workqueue.h> +#include <linux/llist.h> #include <cluster/masklog.h> @@ -679,6 +680,27 @@ static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) OCFS2_INODE_UPDATE_CREDITS; } +void ocfs2_drop_dquot_refs(struct work_struct *work) +{ + struct ocfs2_super *osb = container_of(work, struct ocfs2_super, + dquot_drop_work); + struct llist_node *list; + struct ocfs2_dquot *odquot, *next_odquot; + + list = llist_del_all(&osb->dquot_drop_list); + llist_for_each_entry_safe(odquot, next_odquot, list, list) { + /* Drop the reference we acquired in ocfs2_dquot_release() */ + dqput(&odquot->dq_dquot); + } +} + +/* + * Called when the last reference to dquot is dropped. If we are called from + * downconvert thread, we cannot do all the handling here because grabbing + * quota lock could deadlock (the node holding the quota lock could need some + * other cluster lock to proceed but with blocked downconvert thread we cannot + * release any lock). + */ static int ocfs2_release_dquot(struct dquot *dquot) { handle_t *handle; @@ -694,6 +716,19 @@ static int ocfs2_release_dquot(struct dquot *dquot) /* Check whether we are not racing with some other dqget() */ if (atomic_read(&dquot->dq_count) > 1) goto out; + /* Running from downconvert thread? Postpone quota processing to wq */ + if (current == osb->dc_task) { + /* + * Grab our own reference to dquot and queue it for delayed + * dropping. Quota code rechecks after calling + * ->release_dquot() and won't free dquot structure. + */ + dqgrab(dquot); + /* First entry on list -> queue work */ + if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list)) + queue_work(ocfs2_wq, &osb->dquot_drop_work); + goto out; + } status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d4e81e4..4d13bf1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1238,30 +1238,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type, return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); } -static void ocfs2_kill_sb(struct super_block *sb) -{ - struct ocfs2_super *osb = OCFS2_SB(sb); - - /* Failed mount? */ - if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) - goto out; - - /* Prevent further queueing of inode drop events */ - spin_lock(&dentry_list_lock); - ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); - spin_unlock(&dentry_list_lock); - /* Wait for work to finish and/or remove it */ - cancel_work_sync(&osb->dentry_lock_work); -out: - kill_block_super(sb); -} - static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", .mount = ocfs2_mount, - .kill_sb = ocfs2_kill_sb, - + .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, .next = NULL }; @@ -1934,17 +1915,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); - /* - * Flush inode dropping work queue so that deletes are - * performed while the filesystem is still working - */ - ocfs2_drop_all_dl_inodes(osb); - /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); ocfs2_disable_quotas(osb); + /* All dquots should be freed by now */ + WARN_ON(!llist_empty(&osb->dquot_drop_list)); + /* Wait for worker to be done with the work structure in osb */ + cancel_work_sync(&osb->dquot_drop_work); + ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); @@ -2272,8 +2252,8 @@ static int ocfs2_initialize_super(struct super_block *sb, INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); journal->j_state = OCFS2_JOURNAL_FREE; - INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); - osb->dentry_lock_list = NULL; + INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); + init_llist_head(&osb->dquot_drop_list); /* get some pseudo constants for clustersize bits */ osb->s_clustersize_bits = @@ -627,23 +627,12 @@ out: static inline int __get_file_write_access(struct inode *inode, struct vfsmount *mnt) { - int error; - error = get_write_access(inode); + int error = get_write_access(inode); if (error) return error; - /* - * Do not take mount writer counts on - * special files since no writes to - * the mount itself will occur. - */ - if (!special_file(inode->i_mode)) { - /* - * Balanced in __fput() - */ - error = __mnt_want_write(mnt); - if (error) - put_write_access(inode); - } + error = __mnt_want_write(mnt); + if (error) + put_write_access(inode); return error; } @@ -676,12 +665,11 @@ static int do_dentry_open(struct file *f, path_get(&f->f_path); inode = f->f_inode = f->f_path.dentry->d_inode; - if (f->f_mode & FMODE_WRITE) { + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; - if (!special_file(inode->i_mode)) - file_take_write(f); + file_take_write(f); } f->f_mapping = inode->i_mapping; @@ -722,7 +710,6 @@ cleanup_all: fops_put(f->f_op); file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { - put_write_access(inode); if (!special_file(inode->i_mode)) { /* * We don't consider this a real @@ -730,6 +717,7 @@ cleanup_all: * because it all happenend right * here, so just reset the state. */ + put_write_access(inode); file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 8bd2135..3542f1f 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -158,6 +158,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) umode_t mode = 0; int not_equiv = 0; + /* + * A null ACL can always be presented as mode bits. + */ + if (!acl) + return 0; + FOREACH_ACL_ENTRY(pa, acl, pe) { switch (pa->e_tag) { case ACL_USER_OBJ: diff --git a/fs/proc/array.c b/fs/proc/array.c index cbd0f1b..09f0d9c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -304,15 +304,11 @@ static void render_cap_t(struct seq_file *m, const char *header, seq_puts(m, header); CAP_FOR_EACH_U32(__capi) { seq_printf(m, "%08x", - a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]); + a->cap[CAP_LAST_U32 - __capi]); } seq_putc(m, '\n'); } -/* Remove non-existent capabilities */ -#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \ - CAP_TO_MASK(CAP_LAST_CAP + 1) - 1) - static inline void task_cap(struct seq_file *m, struct task_struct *p) { const struct cred *cred; @@ -326,11 +322,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) cap_bset = cred->cap_bset; rcu_read_unlock(); - NORM_CAPS(cap_inheritable); - NORM_CAPS(cap_permitted); - NORM_CAPS(cap_effective); - NORM_CAPS(cap_bset); - render_cap_t(m, "CapInh:\t", &cap_inheritable); render_cap_t(m, "CapPrm:\t", &cap_permitted); render_cap_t(m, "CapEff:\t", &cap_effective); diff --git a/fs/proc/base.c b/fs/proc/base.c index c35eaa4..dfce13e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2544,6 +2544,57 @@ static const struct file_operations proc_projid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static int proc_setgroups_open(struct inode *inode, struct file *file) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + int ret; + + ret = -ESRCH; + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + if (file->f_mode & FMODE_WRITE) { + ret = -EACCES; + if (!ns_capable(ns, CAP_SYS_ADMIN)) + goto err_put_ns; + } + + ret = single_open(file, &proc_setgroups_show, ns); + if (ret) + goto err_put_ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_setgroups_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + int ret = single_release(inode, file); + put_user_ns(ns); + return ret; +} + +static const struct file_operations proc_setgroups_operations = { + .open = proc_setgroups_open, + .write = proc_setgroups_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_setgroups_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -2652,6 +2703,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), @@ -2987,6 +3039,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif }; diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 1cf86c0..b5c72a3 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs(j)); + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 390bdab..7724fbd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,4 +1,5 @@ #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/hugetlb.h> #include <linux/huge_mm.h> #include <linux/mount.h> @@ -159,7 +160,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at + * vmacache most of the time. We have zero last_addr at * the beginning and also after lseek. We will have -1 last_addr * after the end of the vmas. */ @@ -1353,7 +1354,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, struct numa_maps *md; struct page *page; - if (pte_none(*pte)) + if (!pte_present(*pte)) return 0; page = pte_page(*pte); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 1282384..14120a3 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -319,10 +319,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, compressed ? ".enc.z" : ""); break; case PSTORE_TYPE_CONSOLE: - sprintf(name, "console-%s", psname); + sprintf(name, "console-%s-%lld", psname, id); break; case PSTORE_TYPE_FTRACE: - sprintf(name, "ftrace-%s", psname); + sprintf(name, "ftrace-%s-%lld", psname, id); break; case PSTORE_TYPE_MCE: sprintf(name, "mce-%s-%lld", psname, id); diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index fa8cef2..e7d95f9 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400); MODULE_PARM_DESC(mem_size, "size of reserved RAM used to store oops/panic logs"); +static unsigned int mem_type; +module_param(mem_type, uint, 0600); +MODULE_PARM_DESC(mem_type, + "set to 1 to try to use unbuffered memory (default 0)"); + static int dump_oops = 1; module_param(dump_oops, int, 0600); MODULE_PARM_DESC(dump_oops, @@ -79,6 +84,7 @@ struct ramoops_context { struct persistent_ram_zone *fprz; phys_addr_t phys_addr; unsigned long size; + unsigned int memtype; size_t record_size; size_t console_size; size_t ftrace_size; @@ -353,7 +359,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, size_t sz = cxt->record_size; cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, - &cxt->ecc_info); + &cxt->ecc_info, + cxt->memtype); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -383,7 +390,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -431,6 +438,7 @@ static int ramoops_probe(struct platform_device *pdev) cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; + cxt->memtype = pdata->mem_type; cxt->record_size = pdata->record_size; cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; @@ -561,6 +569,7 @@ static void ramoops_register_dummy(void) dummy_data->mem_size = mem_size; dummy_data->mem_address = mem_address; + dummy_data->mem_type = 0; dummy_data->record_size = record_size; dummy_data->console_size = ramoops_console_size; dummy_data->ftrace_size = ramoops_ftrace_size; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index de272d4..bda61a7 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -380,7 +380,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz) persistent_ram_update_header_ecc(prz); } -static void *persistent_ram_vmap(phys_addr_t start, size_t size) +static void *persistent_ram_vmap(phys_addr_t start, size_t size, + unsigned int memtype) { struct page **pages; phys_addr_t page_start; @@ -392,7 +393,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) page_start = start - offset_in_page(start); page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); - prot = pgprot_noncached(PAGE_KERNEL); + if (memtype) + prot = pgprot_noncached(PAGE_KERNEL); + else + prot = pgprot_writecombine(PAGE_KERNEL); pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); if (!pages) { @@ -411,8 +415,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) return vaddr; } -static void *persistent_ram_iomap(phys_addr_t start, size_t size) +static void *persistent_ram_iomap(phys_addr_t start, size_t size, + unsigned int memtype) { + void *va; + if (!request_mem_region(start, size, "persistent_ram")) { pr_err("request mem region (0x%llx@0x%llx) failed\n", (unsigned long long)size, (unsigned long long)start); @@ -422,19 +429,24 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) buffer_start_add = buffer_start_add_locked; buffer_size_add = buffer_size_add_locked; - return ioremap(start, size); + if (memtype) + va = ioremap(start, size); + else + va = ioremap_wc(start, size); + + return va; } static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, - struct persistent_ram_zone *prz) + struct persistent_ram_zone *prz, int memtype) { prz->paddr = start; prz->size = size; if (pfn_valid(start >> PAGE_SHIFT)) - prz->vaddr = persistent_ram_vmap(start, size); + prz->vaddr = persistent_ram_vmap(start, size, memtype); else - prz->vaddr = persistent_ram_iomap(start, size); + prz->vaddr = persistent_ram_iomap(start, size, memtype); if (!prz->vaddr) { pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__, @@ -502,7 +514,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) } struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info) + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -513,7 +526,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, goto err; } - ret = persistent_ram_buffer_map(start, size, prz); + ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) goto err; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cfc8dcc..f56a357 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -528,7 +528,7 @@ restart: if (atomic_read(&dquot->dq_count)) { DEFINE_WAIT(wait); - atomic_inc(&dquot->dq_count); + dqgrab(dquot); prepare_to_wait(&dquot->dq_wait_unused, &wait, TASK_UNINTERRUPTIBLE); spin_unlock(&dq_list_lock); @@ -632,12 +632,12 @@ int dquot_writeback_dquots(struct super_block *sb, int type) /* Now we have active dquot from which someone is * holding reference so we can safely just increase * use count */ - atomic_inc(&dquot->dq_count); + dqgrab(dquot); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); err = sb->dq_op->write_dquot(dquot); if (!ret && err) - err = ret; + ret = err; dqput(dquot); spin_lock(&dq_list_lock); } @@ -702,6 +702,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) struct dquot *dquot; unsigned long freed = 0; + spin_lock(&dq_list_lock); head = free_dquots.prev; while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); @@ -713,6 +714,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) freed++; head = free_dquots.prev; } + spin_unlock(&dq_list_lock); return freed; } diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 1fd2051..af67735 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -125,6 +125,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) int d_reclen; char *d_name; ino_t d_ino; + loff_t cur_pos = deh_offset(deh); if (!de_visible(deh)) /* it is hidden entry */ @@ -196,8 +197,9 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) if (local_buf != small_buf) { kfree(local_buf); } - // next entry should be looked for with such offset - next_pos = deh_offset(deh) + 1; + + /* deh_offset(deh) may be invalid now. */ + next_pos = cur_pos + 1; if (item_moved(&tmp_ih, &path_to_entry)) { set_cpu_key_k_offset(&pos_key, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ad62bdb..1e4cf9d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3220,8 +3220,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_size != i_size_read(inode)) { error = inode_newsize_ok(inode, attr->ia_size); if (!error) { + /* + * Could race against reiserfs_file_release + * if called from NFS, so take tailpack mutex. + */ + mutex_lock(&REISERFS_I(inode)->tailpack); truncate_setsize(inode, attr->ia_size); - reiserfs_vfs_truncate_file(inode); + reiserfs_truncate_file(inode, 1); + mutex_unlock(&REISERFS_I(inode)->tailpack); } } @@ -81,6 +81,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink, inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); total_objects = dentries + inodes + fs_objects + 1; + if (!total_objects) + total_objects = 1; /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); @@ -112,9 +114,14 @@ static unsigned long super_cache_count(struct shrinker *shrink, sb = container_of(shrink, struct super_block, s_shrink); - if (!grab_super_passive(sb)) - return 0; - + /* + * Don't call grab_super_passive as it is a potential + * scalability bottleneck. The counts could get updated + * between super_cache_count and super_cache_scan anyway. + * Call to super_cache_count with shrinker_rwsem held + * ensures the safety of call to list_lru_count_node() and + * s_op->nr_cached_objects(). + */ if (sb->s_op && sb->s_op->nr_cached_objects) total_objects = sb->s_op->nr_cached_objects(sb, sc->nid); @@ -125,7 +132,6 @@ static unsigned long super_cache_count(struct shrinker *shrink, sc->nid); total_objects = vfs_pressure_ratio(total_objects); - drop_super(sb); return total_objects; } @@ -321,10 +327,8 @@ void deactivate_locked_super(struct super_block *s) struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { cleancache_invalidate_fs(s); - fs->kill_sb(s); - - /* caches are now gone, we can safely kill the shrinker now */ unregister_shrinker(&s->s_shrink); + fs->kill_sb(s); put_filesystem(fs); put_super(s); @@ -845,7 +849,10 @@ void emergency_remount(void) static DEFINE_IDA(unnamed_dev_ida); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ -static int unnamed_dev_start = 0; /* don't bother trying below it */ +/* Many userspace utilities consider an FSID of 0 invalid. + * Always return at least 1 from get_anon_bdev. + */ +static int unnamed_dev_start = 1; int get_anon_bdev(dev_t *p) { @@ -219,23 +219,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return do_fsync(fd, 1); } -/** - * generic_write_sync - perform syncing after a write if file / inode is sync - * @file: file to which the write happened - * @pos: offset where the write started - * @count: length of the write - * - * This is just a simple wrapper about our general syncing function. - */ -int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); -} -EXPORT_SYMBOL(generic_write_sync); - /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index ff82293..26b69b2 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -166,15 +166,10 @@ static int do_commit(struct ubifs_info *c) err = ubifs_orphan_end_commit(c); if (err) goto out; - old_ltail_lnum = c->ltail_lnum; - err = ubifs_log_end_commit(c, new_ltail_lnum); - if (err) - goto out; err = dbg_check_old_index(c, &zroot); if (err) goto out; - mutex_lock(&c->mst_mutex); c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); @@ -203,8 +198,9 @@ static int do_commit(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); else c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); - err = ubifs_write_master(c); - mutex_unlock(&c->mst_mutex); + + old_ltail_lnum = c->ltail_lnum; + err = ubifs_log_end_commit(c, new_ltail_lnum); if (err) goto out; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 123c79b..b56eb62 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1525,8 +1525,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, } wait_for_stable_page(page); - unlock_page(page); - return 0; + return VM_FAULT_LOCKED; out_unlock: unlock_page(page); diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 36bd4ef..06649d2 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -106,10 +106,14 @@ static inline long long empty_log_bytes(const struct ubifs_info *c) h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs; t = (long long)c->ltail_lnum * c->leb_size; - if (h >= t) + if (h > t) return c->log_bytes - h + t; - else + else if (h != t) return t - h; + else if (c->lhead_lnum != c->ltail_lnum) + return 0; + else + return c->log_bytes; } /** @@ -447,9 +451,9 @@ out: * @ltail_lnum: new log tail LEB number * * This function is called on when the commit operation was finished. It - * moves log tail to new position and unmaps LEBs which contain obsolete data. - * Returns zero in case of success and a negative error code in case of - * failure. + * moves log tail to new position and updates the master node so that it stores + * the new log tail LEB number. Returns zero in case of success and a negative + * error code in case of failure. */ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) { @@ -477,7 +481,12 @@ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum) spin_unlock(&c->buds_lock); err = dbg_check_bud_bytes(c); + if (err) + goto out; + err = ubifs_write_master(c); + +out: mutex_unlock(&c->log_mutex); return err; } diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index ab83ace..1a4bb9e 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c) * ubifs_write_master - write master node. * @c: UBIFS file-system description object * - * This function writes the master node. The caller has to take the - * @c->mst_mutex lock before calling this function. Returns zero in case of - * success and a negative error code in case of failure. The master node is - * written twice to enable recovery. + * This function writes the master node. Returns zero in case of success and a + * negative error code in case of failure. The master node is written twice to + * enable recovery. */ int ubifs_write_master(struct ubifs_info *c) { diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index f35135e..9a9fb94 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) freed = ubifs_destroy_tnc_subtree(znode); atomic_long_sub(freed, &ubifs_clean_zn_cnt); atomic_long_sub(freed, &c->clean_zn_cnt); - ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); total_freed += freed; znode = zprev; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3e4aa72..151c0b4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1971,7 +1971,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi) mutex_init(&c->lp_mutex); mutex_init(&c->tnc_mutex); mutex_init(&c->log_mutex); - mutex_init(&c->mst_mutex); mutex_init(&c->umount_mutex); mutex_init(&c->bu_mutex); mutex_init(&c->write_reserve_mutex); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e8c8cfe..7ab9c71 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1042,7 +1042,6 @@ struct ubifs_debug_info; * * @mst_node: master node * @mst_offs: offset of valid master node - * @mst_mutex: protects the master node area, @mst_node, and @mst_offs * * @max_bu_buf_len: maximum bulk-read buffer length * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu @@ -1282,7 +1281,6 @@ struct ubifs_info { struct ubifs_mst_node *mst_node; int mst_offs; - struct mutex mst_mutex; int max_bu_buf_len; struct mutex bu_mutex; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 062b792..47cacfd 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1270,13 +1270,22 @@ update_time: return 0; } +/* + * Maximum length of linked list formed by ICB hierarchy. The chosen number is + * arbitrary - just that we hopefully don't limit any real use of rewritten + * inode on write-once media but avoid looping for too long on corrupted media. + */ +#define UDF_MAX_ICB_NESTING 1024 + static void __udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; uint16_t ident; struct udf_inode_info *iinfo = UDF_I(inode); + unsigned int indirections = 0; +reread: /* * Set defaults, but the inode is still incomplete! * Note: get_new_inode() sets the following on a new inode: @@ -1313,28 +1322,26 @@ static void __udf_read_inode(struct inode *inode) ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, &ident); if (ident == TAG_IDENT_IE && ibh) { - struct buffer_head *nbh = NULL; struct kernel_lb_addr loc; struct indirectEntry *ie; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(struct kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); + if (ie->indirectICB.extLength) { + brelse(bh); + brelse(ibh); + memcpy(&iinfo->i_location, &loc, + sizeof(struct kernel_lb_addr)); + if (++indirections > UDF_MAX_ICB_NESTING) { + udf_err(inode->i_sb, + "too many ICBs in ICB hierarchy" + " (max %d supported)\n", + UDF_MAX_ICB_NESTING); + make_bad_inode(inode); return; } - brelse(nbh); + goto reread; } } brelse(ibh); diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index d7c6dbe..d89f324 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -80,11 +80,17 @@ static int udf_symlink_filler(struct file *file, struct page *page) struct inode *inode = page->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; - int err = -EIO; + int err; unsigned char *p = kmap(page); struct udf_inode_info *iinfo; uint32_t pos; + /* We don't support symlinks longer than one block */ + if (inode->i_size > inode->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto out_unmap; + } + iinfo = UDF_I(inode); pos = udf_block_map(inode, 0); @@ -94,8 +100,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) } else { bh = sb_bread(inode->i_sb, pos); - if (!bh) - goto out; + if (!bh) { + err = -EIO; + goto out_unlock_inode; + } symlink = bh->b_data; } @@ -109,9 +117,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) unlock_page(page); return 0; -out: +out_unlock_inode: up_read(&iinfo->i_data_sem); SetPageError(page); +out_unmap: kunmap(page); unlock_page(page); return err; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e51e581..ab28ad5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -431,10 +431,22 @@ xfs_start_page_writeback( { ASSERT(PageLocked(page)); ASSERT(!PageWriteback(page)); - if (clear_dirty) + + /* + * if the page was not fully cleaned, we need to ensure that the higher + * layers come back to it correctly. That means we need to keep the page + * dirty, and for WB_SYNC_ALL writeback we need to ensure the + * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to + * write this page in this writeback sweep will be made. + */ + if (clear_dirty) { clear_page_dirty_for_io(page); - set_page_writeback(page); + set_page_writeback(page); + } else + set_page_writeback_keepwrite(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ if (!buffers) end_page_writeback(page); @@ -1569,8 +1581,7 @@ xfs_vm_write_begin( ASSERT(len <= PAGE_CACHE_SIZE); - page = grab_cache_page_write_begin(mapping, index, - flags | AOP_FLAG_NOFS); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; @@ -1658,11 +1669,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 45560ee..19d9fd6 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -158,7 +158,7 @@ xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { - struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev); unsigned int granularity = q->limits.discard_granularity; struct fstrim_range range; xfs_daddr_t start, end, minlen; @@ -181,7 +181,8 @@ xfs_ioc_trim( * matter as trimming blocks is an advisory interface. */ if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || - range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || + range.len < mp->m_sb.sb_blocksize) return -XFS_ERROR(EINVAL); start = BTOBB(range.start); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 1ee776d..895db7a 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1121,7 +1121,8 @@ xfs_qm_dqflush( * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); + mp->m_quotainfo->qi_dqchunklen, 0, &bp, + &xfs_dquot_buf_ops); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4c749ab..aa60645 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -299,7 +299,16 @@ xfs_file_aio_read( xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, -1); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -678,7 +687,15 @@ xfs_file_dio_aio_write( pos, -1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, -1); + WARN_ON_ONCE(ret); + ret = 0; } /* @@ -794,7 +811,7 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index c888040..20ccca1 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -153,7 +153,7 @@ xfs_growfs_data_private( xfs_buf_t *bp; int bucket; int dpct; - int error; + int error, saved_error = 0; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; xfs_rfsblock_t nb, nb_mod; @@ -500,29 +500,33 @@ xfs_growfs_data_private( error = ENOMEM; } + /* + * If we get an error reading or writing alternate superblocks, + * continue. xfs_repair chooses the "best" superblock based + * on most matches; if we break early, we'll leave more + * superblocks un-updated than updated, and xfs_repair may + * pick them over the properly-updated primary. + */ if (error) { xfs_warn(mp, "error %d reading secondary superblock for ag %d", error, agno); - break; + saved_error = error; + continue; } xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); - /* - * If we get an error writing out the alternate superblocks, - * just issue a warning and continue. The real work is - * already done and committed. - */ error = xfs_bwrite(bp); xfs_buf_relse(bp); if (error) { xfs_warn(mp, "write error %d updating secondary superblock for ag %d", error, agno); - break; /* no point in continuing */ + saved_error = error; + continue; } } - return error; + return saved_error ? saved_error : error; error0: xfs_trans_cancel(tp, XFS_TRANS_ABORT); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e3d7538..7a460d8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2370,6 +2370,33 @@ xfs_iunpin_wait( __xfs_iunpin_wait(ip); } +/* + * Removing an inode from the namespace involves removing the directory entry + * and dropping the link count on the inode. Removing the directory entry can + * result in locking an AGF (directory blocks were freed) and removing a link + * count can result in placing the inode on an unlinked list which results in + * locking an AGI. + * + * The big problem here is that we have an ordering constraint on AGF and AGI + * locking - inode allocation locks the AGI, then can allocate a new extent for + * new inodes, locking the AGF after the AGI. Similarly, freeing the inode + * removes the inode from the unlinked list, requiring that we lock the AGI + * first, and then freeing the inode can result in an inode chunk being freed + * and hence freeing disk space requiring that we lock an AGF. + * + * Hence the ordering that is imposed by other parts of the code is AGI before + * AGF. This means we cannot remove the directory entry before we drop the inode + * reference count and put it on the unlinked list as this results in a lock + * order of AGF then AGI, and this can deadlock against inode allocation and + * freeing. Therefore we must drop the link counts before we remove the + * directory entry. + * + * This is still safe from a transactional point of view - it is not until we + * get to xfs_bmap_finish() that we have the possibility of multiple + * transactions in this operation. Hence as long as we remove the directory + * entry and drop the link count in the first transaction of the remove + * operation, there are no transactional constraints on the ordering here. + */ int xfs_remove( xfs_inode_t *dp, @@ -2439,6 +2466,7 @@ xfs_remove( /* * If we're removing a directory perform some additional validation. */ + cancel_flags |= XFS_TRANS_ABORT; if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { @@ -2449,31 +2477,16 @@ xfs_remove( error = XFS_ERROR(ENOTEMPTY); goto out_trans_cancel; } - } - xfs_bmap_init(&free_list, &first_block); - error = xfs_dir_removename(tp, dp, name, ip->i_ino, - &first_block, &free_list, resblks); - if (error) { - ASSERT(error != ENOENT); - goto out_bmap_cancel; - } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - if (is_dir) { - /* - * Drop the link from ip's "..". - */ + /* Drop the link from ip's "..". */ error = xfs_droplink(tp, dp); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - /* - * Drop the "." link from ip to self. - */ + /* Drop the "." link from ip to self. */ error = xfs_droplink(tp, ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; } else { /* * When removing a non-directory we need to log the parent @@ -2482,20 +2495,24 @@ xfs_remove( */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Drop the link from dp to ip. - */ + /* Drop the link from dp to ip. */ error = xfs_droplink(tp, ip); if (error) - goto out_bmap_cancel; + goto out_trans_cancel; - /* - * Determine if this is the last link while - * we are in the transaction. - */ + /* Determine if this is the last link while the inode is locked */ link_zero = (ip->i_d.di_nlink == 0); + xfs_bmap_init(&free_list, &first_block); + error = xfs_dir_removename(tp, dp, name, ip->i_ino, + &first_block, &free_list, resblks); + if (error) { + ASSERT(error != ENOENT); + goto out_bmap_cancel; + } + /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to @@ -2525,7 +2542,6 @@ xfs_remove( out_bmap_cancel: xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); std_return: diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index 02f1083..6829134 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -1031,15 +1031,14 @@ xfs_iext_add( * the next index needed in the indirection array. */ else { - int count = ext_diff; + uint count = ext_diff; while (count) { erp = xfs_iext_irec_new(ifp, erp_idx); - erp->er_extcount = count; - count -= MIN(count, (int)XFS_LINEAR_EXTS); - if (count) { + erp->er_extcount = min(count, XFS_LINEAR_EXTS); + count -= erp->er_extcount; + if (count) erp_idx++; - } } } } @@ -1359,7 +1358,7 @@ xfs_iext_remove_indirect( void xfs_iext_realloc_direct( xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new size of extents */ + int new_size) /* new size of extents after adding */ { int rnew_size; /* real new size of extents */ @@ -1397,13 +1396,8 @@ xfs_iext_realloc_direct( rnew_size - ifp->if_real_bytes); } } - /* - * Switch from the inline extent buffer to a direct - * extent list. Be sure to include the inline extent - * bytes in new_size. - */ + /* Switch from the inline extent buffer to a direct extent list */ else { - new_size += ifp->if_bytes; if (!is_power_of_2(new_size)) { rnew_size = roundup_pow_of_two(new_size); } diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 8c8ef24..52b5375 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1133,7 +1133,7 @@ xfs_ioctl_setattr( * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !inode_capable(VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2b8952d..584996c 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1169,6 +1169,7 @@ xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; + gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; @@ -1229,6 +1230,14 @@ xfs_setup_inode( } /* + * Ensure all page cache allocations are done from GFP_NOFS context to + * prevent direct reclaim recursion back into the filesystem and blowing + * stacks or deadlocking. + */ + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2dea108..3c4ddc1 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1000,27 +1000,34 @@ xfs_log_space_wake( } /* - * Determine if we have a transaction that has gone to disk - * that needs to be covered. To begin the transition to the idle state - * firstly the log needs to be idle (no AIL and nothing in the iclogs). - * If we are then in a state where covering is needed, the caller is informed - * that dummy transactions are required to move the log into the idle state. + * Determine if we have a transaction that has gone to disk that needs to be + * covered. To begin the transition to the idle state firstly the log needs to + * be idle. That means the CIL, the AIL and the iclogs needs to be empty before + * we start attempting to cover the log. * - * Because this is called as part of the sync process, we should also indicate - * that dummy transactions should be issued in anything but the covered or - * idle states. This ensures that the log tail is accurately reflected in - * the log at the end of the sync, hence if a crash occurrs avoids replay - * of transactions where the metadata is already on disk. + * Only if we are then in a state where covering is needed, the caller is + * informed that dummy transactions are required to move the log into the idle + * state. + * + * If there are any items in the AIl or CIL, then we do not want to attempt to + * cover the log as we may be in a situation where there isn't log space + * available to run a dummy transaction and this can lead to deadlocks when the + * tail of the log is pinned by an item that is modified in the CIL. Hence + * there's no point in running a dummy transaction at this point because we + * can't start trying to idle the log until both the CIL and AIL are empty. */ int xfs_log_need_covered(xfs_mount_t *mp) { - int needed = 0; struct xlog *log = mp->m_log; + int needed = 0; if (!xfs_fs_writable(mp)) return 0; + if (!xlog_cil_empty(log)) + return 0; + spin_lock(&log->l_icloglock); switch (log->l_covered_state) { case XLOG_STATE_COVER_DONE: @@ -1029,14 +1036,17 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_ail_min_lsn(log->l_ailp) && - xlog_iclogs_empty(log)) { - if (log->l_covered_state == XLOG_STATE_COVER_NEED) - log->l_covered_state = XLOG_STATE_COVER_DONE; - else - log->l_covered_state = XLOG_STATE_COVER_DONE2; - } - /* FALLTHRU */ + if (xfs_ail_min_lsn(log->l_ailp)) + break; + if (!xlog_iclogs_empty(log)) + break; + + needed = 1; + if (log->l_covered_state == XLOG_STATE_COVER_NEED) + log->l_covered_state = XLOG_STATE_COVER_DONE; + else + log->l_covered_state = XLOG_STATE_COVER_DONE2; + break; default: needed = 1; break; @@ -3702,11 +3712,9 @@ xlog_verify_iclog( /* check validity of iclog pointers */ spin_lock(&log->l_icloglock); icptr = log->l_iclog; - for (i=0; i < log->l_iclog_bufs; i++) { - if (icptr == NULL) - xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); - icptr = icptr->ic_next; - } + for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) + ASSERT(icptr); + if (icptr != log->l_iclog) xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cfe9797..da8524e77 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -711,6 +711,20 @@ xlog_cil_push_foreground( xlog_cil_push(log); } +bool +xlog_cil_empty( + struct xlog *log) +{ + struct xfs_cil *cil = log->l_cilp; + bool empty = false; + + spin_lock(&cil->xc_push_lock); + if (list_empty(&cil->xc_cil)) + empty = true; + spin_unlock(&cil->xc_push_lock); + return empty; +} + /* * Commit a transaction with the given vector to the Committed Item List. * diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 136654b..f80cff2 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -514,12 +514,10 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space) /* * Committed Item List interfaces */ -int -xlog_cil_init(struct xlog *log); -void -xlog_cil_init_post_recovery(struct xlog *log); -void -xlog_cil_destroy(struct xlog *log); +int xlog_cil_init(struct xlog *log); +void xlog_cil_init_post_recovery(struct xlog *log); +void xlog_cil_destroy(struct xlog *log); +bool xlog_cil_empty(struct xlog *log); /* * CIL force routines diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 3979749..5b166a0 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2121,6 +2121,17 @@ xlog_recover_validate_buf_type( __uint16_t magic16; __uint16_t magicda; + /* + * We can only do post recovery validation on items on CRC enabled + * fielsystems as we need to know when the buffer was written to be able + * to determine if we should have replayed the item. If we replay old + * metadata over a newer buffer, then it will enter a temporarily + * inconsistent state resulting in verification failures. Hence for now + * just avoid the verification stage for non-crc filesystems + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); magic16 = be16_to_cpu(*(__be16*)bp->b_addr); magicda = be16_to_cpu(info->magic); @@ -2156,8 +2167,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_agf_buf_ops; break; case XFS_BLFT_AGFL_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_AGFL_MAGIC) { xfs_warn(mp, "Bad AGFL block magic!"); ASSERT(0); @@ -2190,10 +2199,6 @@ xlog_recover_validate_buf_type( #endif break; case XFS_BLFT_DINO_BUF: - /* - * we get here with inode allocation buffers, not buffers that - * track unlinked list changes. - */ if (magic16 != XFS_DINODE_MAGIC) { xfs_warn(mp, "Bad INODE block magic!"); ASSERT(0); @@ -2273,8 +2278,6 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_attr3_leaf_buf_ops; break; case XFS_BLFT_ATTR_RMT_BUF: - if (!xfs_sb_version_hascrc(&mp->m_sb)) - break; if (magic32 != XFS_ATTR3_RMT_MAGIC) { xfs_warn(mp, "Bad attr remote magic!"); ASSERT(0); @@ -2381,16 +2384,7 @@ xlog_recover_do_reg_buffer( /* Shouldn't be any more regions */ ASSERT(i == item->ri_total); - /* - * We can only do post recovery validation on items on CRC enabled - * fielsystems as we need to know when the buffer was written to be able - * to determine if we should have replayed the item. If we replay old - * metadata over a newer buffer, then it will enter a temporarily - * inconsistent state resulting in verification failures. Hence for now - * just avoid the verification stage for non-crc filesystems - */ - if (xfs_sb_version_hascrc(&mp->m_sb)) - xlog_recover_validate_buf_type(mp, bp, buf_f); + xlog_recover_validate_buf_type(mp, bp, buf_f); } /* @@ -2625,12 +2619,29 @@ xlog_recover_buffer_pass2( } /* - * recover the buffer only if we get an LSN from it and it's less than + * Recover the buffer only if we get an LSN from it and it's less than * the lsn of the transaction we are replaying. + * + * Note that we have to be extremely careful of readahead here. + * Readahead does not attach verfiers to the buffers so if we don't + * actually do any replay after readahead because of the LSN we found + * in the buffer if more recent than that current transaction then we + * need to attach the verifier directly. Failure to do so can lead to + * future recovery actions (e.g. EFI and unlinked list recovery) can + * operate on the buffers and they won't get the verifier attached. This + * can lead to blocks on disk having the correct content but a stale + * CRC. + * + * It is safe to assume these clean buffers are currently up to date. + * If the buffer is dirtied by a later transaction being replayed, then + * the verifier will be reset to match whatever recover turns that + * buffer into. */ lsn = xlog_recover_get_buf_lsn(mp, bp); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + xlog_recover_validate_buf_type(mp, bp, buf_f); goto out_release; + } if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5dcc680..dc602b5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -318,7 +318,6 @@ reread: * Initialize the mount structure from the superblock. */ xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - xfs_sb_quota_from_disk(&mp->m_sb); /* * We must be able to do sector-sized and sector-aligned IO. diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 4688a62..794aa2f 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1193,6 +1193,12 @@ xfs_qm_dqiter_bufs( if (error) break; + /* + * A corrupt buffer might not have a verifier attached, so + * make sure we have the correct one attached before writeback + * occurs. + */ + bp->b_ops = &xfs_dquot_buf_ops; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); @@ -1276,7 +1282,7 @@ xfs_qm_dqiterate( xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen, - NULL); + &xfs_dquot_buf_ops); rablkno++; } } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 0397081..1351ff0 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -406,10 +406,11 @@ xfs_sb_quota_from_disk(struct xfs_sb *sbp) } } -void -xfs_sb_from_disk( +static void +__xfs_sb_from_disk( struct xfs_sb *to, - xfs_dsb_t *from) + xfs_dsb_t *from, + bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); @@ -465,6 +466,17 @@ xfs_sb_from_disk( to->sb_pad = 0; to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); + /* Convert on-disk flags to in-memory flags? */ + if (convert_xquota) + xfs_sb_quota_from_disk(to); +} + +void +xfs_sb_from_disk( + struct xfs_sb *to, + xfs_dsb_t *from) +{ + __xfs_sb_from_disk(to, from, true); } static inline void @@ -580,7 +592,11 @@ xfs_sb_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_sb sb; - xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp)); + /* + * Use call variant which doesn't convert quota flags from disk + * format, because xfs_mount_validate_sb checks the on-disk flags. + */ + __xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp), false); /* * Only check the in progress field for the primary superblock as @@ -633,8 +649,9 @@ xfs_sb_read_verify( out_error: if (error) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); + if (error != EWRONGFS) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, + mp, bp->b_addr); xfs_buf_ioerror(bp, error); } } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b12079a..a52136c 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -669,32 +669,47 @@ static inline int pmd_numa(pmd_t pmd) #ifndef pte_mknonnuma static inline pte_t pte_mknonnuma(pte_t pte) { - pte = pte_clear_flags(pte, _PAGE_NUMA); - return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED); + pteval_t val = pte_val(pte); + + val &= ~_PAGE_NUMA; + val |= (_PAGE_PRESENT|_PAGE_ACCESSED); + return __pte(val); } #endif #ifndef pmd_mknonnuma static inline pmd_t pmd_mknonnuma(pmd_t pmd) { - pmd = pmd_clear_flags(pmd, _PAGE_NUMA); - return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED); + pmdval_t val = pmd_val(pmd); + + val &= ~_PAGE_NUMA; + val |= (_PAGE_PRESENT|_PAGE_ACCESSED); + + return __pmd(val); } #endif #ifndef pte_mknuma static inline pte_t pte_mknuma(pte_t pte) { - pte = pte_set_flags(pte, _PAGE_NUMA); - return pte_clear_flags(pte, _PAGE_PRESENT); + pteval_t val = pte_val(pte); + + val &= ~_PAGE_PRESENT; + val |= _PAGE_NUMA; + + return __pte(val); } #endif #ifndef pmd_mknuma static inline pmd_t pmd_mknuma(pmd_t pmd) { - pmd = pmd_set_flags(pmd, _PAGE_NUMA); - return pmd_clear_flags(pmd, _PAGE_PRESENT); + pmdval_t val = pmd_val(pmd); + + val &= ~_PAGE_PRESENT; + val |= _PAGE_NUMA; + + return __pmd(val); } #endif #else diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 0bd7a2e..b521d1c 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -17,6 +17,7 @@ {0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x1317, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x1318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x131D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ @@ -73,7 +74,6 @@ {0x1002, 0x4C64, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ {0x1002, 0x4C66, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ {0x1002, 0x4C67, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \ - {0x1002, 0x4C6E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280|RADEON_IS_MOBILITY}, \ {0x1002, 0x4E44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ {0x1002, 0x4E45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ {0x1002, 0x4E46, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \ @@ -164,8 +164,11 @@ {0x1002, 0x6601, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6602, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6603, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6604, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6605, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6606, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6607, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6608, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6610, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6611, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6613, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ @@ -175,6 +178,8 @@ {0x1002, 0x6631, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_OLAND|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6640, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6641, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6646, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6650, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6651, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BONAIRE|RADEON_NEW_MEMMAP}, \ @@ -285,6 +290,7 @@ {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x682C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ diff --git a/include/drm/i915_powerwell.h b/include/drm/i915_powerwell.h index cfdc884..baa6f11 100644 --- a/include/drm/i915_powerwell.h +++ b/include/drm/i915_powerwell.h @@ -30,7 +30,8 @@ #define _I915_POWERWELL_H_ /* For use by hda_i915 driver */ -extern void i915_request_power_well(void); -extern void i915_release_power_well(void); +extern int i915_request_power_well(void); +extern int i915_release_power_well(void); +extern int i915_get_cdclk_freq(void); #endif /* _I915_POWERWELL_H_ */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 4fb28b2..c25cb64d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -46,6 +46,7 @@ struct audit_tree; struct audit_krule { int vers_ops; + u32 pflags; u32 flags; u32 listnr; u32 action; @@ -63,6 +64,9 @@ struct audit_krule { u64 prio; }; +/* Flag to indicate legacy AUDIT_LOGINUID unset usage */ +#define AUDIT_LOGINUID_LEGACY 0x1 + struct audit_field { u32 type; u32 val; diff --git a/include/linux/bio.h b/include/linux/bio.h index ec48bac..6c17ad5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -187,6 +187,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_idx; /* current bip_vec index */ + unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned bip_owns_buf:1; /* should free bip_buf */ struct work_struct bip_work; /* I/O completion */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e6f765..b105678 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1198,10 +1198,9 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = (sector << 9) & (granularity - 1); + unsigned int alignment = sector_div(sector, granularity >> 9) << 9; - return (granularity + lim->alignment_offset - alignment) - & (granularity - 1); + return (granularity + lim->alignment_offset - alignment) % granularity; } static inline int bdev_alignment_offset(struct block_device *bdev) diff --git a/include/linux/capability.h b/include/linux/capability.h index a6ee1f9..aa93e5e 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -78,8 +78,11 @@ extern const kernel_cap_t __cap_init_eff_set; # error Fix up hand-coded capability macro initializers #else /* HAND-CODED capability initializers */ +#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) +#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) + # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) +# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) @@ -210,7 +213,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); -extern bool inode_capable(const struct inode *inode, int cap); +extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); /* audit system wants to get cap info from files as well */ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index dbbf8aa..4802826 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -289,7 +289,7 @@ extern struct clocksource* clocksource_get_next(void); extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); -extern struct clocksource * __init __weak clocksource_default_clock(void); +extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern void diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 091d72e..01e3132 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended); + enum migrate_mode mode, bool *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -62,6 +62,22 @@ static inline bool compaction_deferred(struct zone *zone, int order) return zone->compact_considered < defer_limit; } +/* + * Update defer tracking counters after successful compaction of given order, + * which means an allocation either succeeded (alloc_success == true) or is + * expected to succeed. + */ +static inline void compaction_defer_reset(struct zone *zone, int order, + bool alloc_success) +{ + if (alloc_success) { + zone->compact_considered = 0; + zone->compact_defer_shift = 0; + } + if (order >= zone->compact_order_failed) + zone->compact_order_failed = order + 1; +} + /* Returns true if restarting compaction after many failures */ static inline bool compaction_restarting(struct zone *zone, int order) { @@ -75,7 +91,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended) + enum migrate_mode mode, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 24545cd..02ae99e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -37,6 +37,9 @@ __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) +/* Make the optimizer believe the variable can be manipulated arbitrarily. */ +#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var)) + #ifdef __CHECKER__ #define __must_be_array(arr) 0 #else diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h new file mode 100644 index 0000000..cdd1cc2 --- /dev/null +++ b/include/linux/compiler-gcc5.h @@ -0,0 +1,66 @@ +#ifndef __LINUX_COMPILER_H +#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead." +#endif + +#define __used __attribute__((__used__)) +#define __must_check __attribute__((warn_unused_result)) +#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) + +/* Mark functions as cold. gcc will assume any path leading to a call + to them will be unlikely. This means a lot of manual unlikely()s + are unnecessary now for any paths leading to the usual suspects + like BUG(), printk(), panic() etc. [but let's keep them for now for + older compilers] + + Early snapshots of gcc 4.3 don't support this and we can't detect this + in the preprocessor, but we can live with this because they're unreleased. + Maketime probing would be overkill here. + + gcc also has a __attribute__((__hot__)) to move hot functions into + a special section, but I don't see any sense in this right now in + the kernel context */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ + +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * Fixed in GCC 4.8.2 and later versions. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index dc1bd3d..5529c52 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -15,6 +15,7 @@ */ #undef barrier #undef RELOC_HIDE +#undef OPTIMIZER_HIDE_VAR #define barrier() __memory_barrier() @@ -23,6 +24,12 @@ __ptr = (unsigned long) (ptr); \ (typeof(ptr)) (__ptr + (off)); }) +/* This should act as an optimization barrier on var. + * Given that this compiler does not have inline assembly, a compiler barrier + * is the best we can do. + */ +#define OPTIMIZER_HIDE_VAR(var) barrier() + /* Intel ECC compiler doesn't support __builtin_types_compatible_p() */ #define __must_be_array(a) 0 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92669cd..a2329c5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); (typeof(ptr)) (__ptr + (off)); }) #endif +#ifndef OPTIMIZER_HIDE_VAR +#define OPTIMIZER_HIDE_VAR(var) barrier() +#endif + /* Not-quite-unique ID. */ #ifndef __UNIQUE_ID # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index cc1b01c..a7ebb89 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -12,10 +12,31 @@ #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/mm.h> +#include <linux/jump_label.h> #ifdef CONFIG_CPUSETS -extern int number_of_cpusets; /* How many cpusets are defined in system? */ +extern struct static_key cpusets_enabled_key; +static inline bool cpusets_enabled(void) +{ + return static_key_false(&cpusets_enabled_key); +} + +static inline int nr_cpusets(void) +{ + /* jump label reference count + the top-level cpuset */ + return static_key_count(&cpusets_enabled_key) + 1; +} + +static inline void cpuset_inc(void) +{ + static_key_slow_inc(&cpusets_enabled_key); +} + +static inline void cpuset_dec(void) +{ + static_key_slow_dec(&cpusets_enabled_key); +} extern int cpuset_init(void); extern void cpuset_init_smp(void); @@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_softwall(node, gfp_mask); } static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_hardwall(node, gfp_mask); } @@ -87,25 +108,26 @@ extern void rebuild_sched_domains(void); extern void cpuset_print_task_mems_allowed(struct task_struct *p); /* - * get_mems_allowed is required when making decisions involving mems_allowed - * such as during page allocation. mems_allowed can be updated in parallel - * and depending on the new value an operation can fail potentially causing - * process failure. A retry loop with get_mems_allowed and put_mems_allowed - * prevents these artificial failures. + * read_mems_allowed_begin is required when making decisions involving + * mems_allowed such as during page allocation. mems_allowed can be updated in + * parallel and depending on the new value an operation can fail potentially + * causing process failure. A retry loop with read_mems_allowed_begin and + * read_mems_allowed_retry prevents these artificial failures. */ -static inline unsigned int get_mems_allowed(void) +static inline unsigned int read_mems_allowed_begin(void) { return read_seqcount_begin(¤t->mems_allowed_seq); } /* - * If this returns false, the operation that took place after get_mems_allowed - * may have failed. It is up to the caller to retry the operation if + * If this returns true, the operation that took place after + * read_mems_allowed_begin may have failed artificially due to a concurrent + * update of mems_allowed. It is up to the caller to retry the operation if * appropriate. */ -static inline bool put_mems_allowed(unsigned int seq) +static inline bool read_mems_allowed_retry(unsigned int seq) { - return !read_seqcount_retry(¤t->mems_allowed_seq, seq); + return read_seqcount_retry(¤t->mems_allowed_seq, seq); } static inline void set_mems_allowed(nodemask_t nodemask) @@ -119,6 +141,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) #else /* !CONFIG_CPUSETS */ +static inline bool cpusets_enabled(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} @@ -221,14 +245,14 @@ static inline void set_mems_allowed(nodemask_t nodemask) { } -static inline unsigned int get_mems_allowed(void) +static inline unsigned int read_mems_allowed_begin(void) { return 0; } -static inline bool put_mems_allowed(unsigned int seq) +static inline bool read_mems_allowed_retry(unsigned int seq) { - return true; + return false; } #endif /* !CONFIG_CPUSETS */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 7032518..60023e5 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -14,14 +14,13 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; -extern int __weak elfcorehdr_alloc(unsigned long long *addr, - unsigned long long *size); -extern void __weak elfcorehdr_free(unsigned long long addr); -extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); -extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); -extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, - unsigned long from, unsigned long pfn, - unsigned long size, pgprot_t prot); +extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); +extern void elfcorehdr_free(unsigned long long addr); +extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); +extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot); extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); diff --git a/include/linux/cred.h b/include/linux/cred.h index 04421e8..6c58dd7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -68,6 +68,7 @@ extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); +extern bool may_setgroups(void); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b81b096..df663e5 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -26,6 +26,19 @@ #include <linux/uaccess.h> /* + * Autoloaded crypto modules should only use a prefixed name to avoid allowing + * arbitrary modules to be loaded. Loading from userspace may still need the + * unprefixed names, so retains those aliases as well. + * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3 + * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro + * expands twice on the same line. Instead, use a separate base name for the + * alias. + */ +#define MODULE_ALIAS_CRYPTO(name) \ + __MODULE_INFO(alias, alias_userspace, name); \ + __MODULE_INFO(alias, alias_crypto, "crypto-" name) + +/* * Algorithm masks and types. */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 59066e0..53c1b60 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -122,15 +122,15 @@ struct dentry { void *d_fsdata; /* fs-specific data */ struct list_head d_lru; /* LRU list */ + struct list_head d_child; /* child of parent list */ + struct list_head d_subdirs; /* our children */ /* - * d_child and d_rcu can share memory + * d_alias and d_rcu can share memory */ union { - struct list_head d_child; /* child of parent list */ + struct hlist_node d_alias; /* inode alias list */ struct rcu_head d_rcu; } d_u; - struct list_head d_subdirs; /* our children */ - struct hlist_node d_alias; /* inode alias list */ }; /* @@ -211,6 +211,8 @@ struct dentry_operations { #define DCACHE_LRU_LIST 0x80000 #define DCACHE_DENTRY_KILLED 0x100000 +#define DCACHE_MAY_FREE 0x00800000 + extern seqlock_t rename_lock; static inline int dname_external(const struct dentry *dentry) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 3a8d0a2..ec951f9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -97,6 +97,20 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) } #endif +/* + * Set both the DMA mask and the coherent DMA mask to the same thing. + * Note that we don't check the return value from dma_set_coherent_mask() + * as the DMA API guarantees that the coherent DMA mask can be set to + * the same or smaller than the streaming DMA mask. + */ +static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) +{ + int rc = dma_set_mask(dev, mask); + if (rc == 0) + dma_set_coherent_mask(dev, mask); + return rc; +} + extern u64 dma_get_required_mask(struct device *dev); static inline unsigned int dma_get_max_seg_size(struct device *dev) diff --git a/include/linux/fs.h b/include/linux/fs.h index 164d2a9..9cb726a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2217,7 +2217,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); +static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) +{ + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) + return 0; + return vfs_fsync_range(file, pos, pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); +} extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK @@ -2490,6 +2496,9 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, void inode_dio_wait(struct inode *inode); void inode_dio_done(struct inode *inode); +extern void inode_set_flags(struct inode *inode, unsigned int flags, + unsigned int mask); + extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9f15c00..e68db4d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -524,6 +524,7 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); +extern void ftrace_module_init(struct module *mod); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); @@ -533,6 +534,7 @@ static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } static inline void ftrace_enable_daemon(void) { } static inline void ftrace_release_mod(struct module *mod) {} +static inline void ftrace_module_init(struct module *mod) {} static inline int register_ftrace_command(struct ftrace_func_command *cmd) { return -EINVAL; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 9b4dd49..fa7ac98 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -364,8 +364,8 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_hot_cold_page(struct page *page, int cold); -extern void free_hot_cold_page_list(struct list_head *list, int cold); +extern void free_hot_cold_page(struct page *page, bool cold); +extern void free_hot_cold_page_list(struct list_head *list, bool cold); extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 32ba451..c32411b 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -21,6 +21,8 @@ #include <linux/hid.h> #include <linux/hid-sensor-ids.h> +#include <linux/iio/iio.h> +#include <linux/iio/trigger.h> /** * struct hid_sensor_hub_attribute_info - Attribute info @@ -166,6 +168,7 @@ struct hid_sensor_common { struct platform_device *pdev; unsigned usage_id; bool data_ready; + struct iio_trigger *trigger; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; struct hid_sensor_hub_attribute_info power_state; diff --git a/include/linux/hid.h b/include/linux/hid.h index 31b9d29..00c88fc 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -286,6 +286,7 @@ struct hid_item { #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 #define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 #define HID_QUIRK_NO_INIT_INPUT_REPORTS 0x00000200 +#define HID_QUIRK_ALWAYS_POLL 0x00000400 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_FULLSPEED_INTERVAL 0x10000000 #define HID_QUIRK_NO_INIT_REPORTS 0x20000000 diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a291552..aac671b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -92,10 +92,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); #endif /* CONFIG_DEBUG_VM */ extern unsigned long transparent_hugepage_flags; -extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pmd_t *dst_pmd, pmd_t *src_pmd, - struct vm_area_struct *vma, - unsigned long addr, unsigned long end); extern int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6125579..511b1a0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -387,15 +387,23 @@ static inline pgoff_t basepage_index(struct page *page) extern void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); -int pmd_huge_support(void); -/* - * Currently hugepage migration is enabled only for pmd-based hugepage. - * This function will be updated when hugepage migration is more widely - * supported. - */ static inline int hugepage_migration_support(struct hstate *h) { - return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION + return huge_page_shift(h) == PMD_SHIFT; +#else + return 0; +#endif +} + +static inline bool hugepages_supported(void) +{ + /* + * Some platform decide whether they support huge pages at boot + * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when + * there is no such support + */ + return HPAGE_SHIFT != 0; } #else /* CONFIG_HUGETLB_PAGE */ @@ -425,7 +433,6 @@ static inline pgoff_t basepage_index(struct page *page) return page->index; } #define dissolve_free_huge_pages(s, e) do {} while (0) -#define pmd_huge_support() 0 #define hugepage_migration_support(h) 0 #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index d98503b..b6043a0 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -473,15 +473,18 @@ hv_get_ringbuffer_availbytes(struct hv_ring_buffer_info *rbi, * 0 . 13 (Windows Server 2008) * 1 . 1 (Windows 7) * 2 . 4 (Windows 8) + * 3 . 0 (Windows 8 R2) */ #define VERSION_WS2008 ((0 << 16) | (13)) #define VERSION_WIN7 ((1 << 16) | (1)) #define VERSION_WIN8 ((2 << 16) | (4)) +#define VERSION_WIN8_1 ((3 << 16) | (0)) + #define VERSION_INVAL -1 -#define VERSION_CURRENT VERSION_WIN8 +#define VERSION_CURRENT VERSION_WIN8_1 /* Make maximum size of pipe payload of 16K */ #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384) @@ -884,7 +887,7 @@ struct vmbus_channel_relid_released { struct vmbus_channel_initiate_contact { struct vmbus_channel_message_header header; u32 vmbus_version_requested; - u32 padding2; + u32 target_vcpu; /* The VCPU the host should respond to */ u64 interrupt_page; u64 monitor_page1; u64 monitor_page2; diff --git a/include/linux/if_team.h b/include/linux/if_team.h index a899dc2..a6aa970 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -194,6 +194,7 @@ struct team { bool user_carrier_enabled; bool queue_override_enabled; struct list_head *qom_lists; /* array of queue override mapping lists */ + bool port_mtu_change_allowed; struct { unsigned int count; unsigned int interval; /* in ms */ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 715c343..0bd3943 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -90,7 +90,6 @@ extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); extern bool vlan_do_receive(struct sk_buff **skb); -extern struct sk_buff *vlan_untag(struct sk_buff *skb); extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid); extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid); @@ -126,11 +125,6 @@ static inline bool vlan_do_receive(struct sk_buff **skb) return false; } -static inline struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - return skb; -} - static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid) { return 0; diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index 13ce220..593ae7c 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -90,7 +90,7 @@ enum iio_event_direction { #define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF) -#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0xCF) +#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F) #define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF) diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 369cf2c..68f46cd 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -84,10 +84,12 @@ static inline void iio_trigger_put(struct iio_trigger *trig) put_device(&trig->dev); } -static inline void iio_trigger_get(struct iio_trigger *trig) +static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) { get_device(&trig->dev); __module_get(trig->ops->owner); + + return trig; } /** diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 79640e0..f738f92 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -234,7 +234,7 @@ static inline void in_dev_put(struct in_device *idev) static __inline__ __be32 inet_make_mask(int logmask) { if (logmask) - return htonl(~((1<<(32-logmask))-1)); + return htonl(~((1U<<(32-logmask))-1)); return 0; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 766558a..a85cceb 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -40,6 +40,7 @@ extern struct fs_struct init_fs; #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ + .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ @@ -221,6 +222,7 @@ extern struct task_group root_task_group; [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \ }, \ .thread_group = LIST_HEAD_INIT(tsk.thread_group), \ + .thread_node = LIST_HEAD_INIT(init_signals.thread_head), \ INIT_IDS \ INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a2609fb..750d1e6 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -200,7 +200,40 @@ static inline int check_wakeup_irqs(void) { return 0; } extern cpumask_var_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); +/* Internal implementation. Use the helpers below */ +extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, + bool force); + +/** + * irq_set_affinity - Set the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @mask: cpumask + * + * Fails if cpumask does not contain an online CPU + */ +static inline int +irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, false); +} + +/** + * irq_force_affinity - Force the irq affinity of a given irq + * @irq: Interrupt to set affinity + * @mask: cpumask + * + * Same as irq_set_affinity, but without checking the mask against + * online cpus. + * + * Solely for low level cpu hotplug code, where we need to make per + * cpu interrupts affine before the cpu becomes online. + */ +static inline int +irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return __irq_set_affinity(irq, cpumask, true); +} + extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); @@ -237,6 +270,11 @@ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) return -EINVAL; } +static inline int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) +{ + return 0; +} + static inline int irq_can_set_affinity(unsigned int irq) { return 0; diff --git a/include/linux/irq.h b/include/linux/irq.h index e2d8789..7a08e31 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -383,7 +383,8 @@ extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); extern void irq_cpu_online(void); extern void irq_cpu_offline(void); -extern int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask); +extern int irq_set_affinity_locked(struct irq_data *data, + const struct cpumask *cpumask, bool force); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index da992bc..6686311 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -27,6 +27,8 @@ struct irq_desc; * @irq_count: stats field to detect stalled irqs * @last_unhandled: aging timer for unhandled count * @irqs_unhandled: stats field for spurious unhandled interrupts + * @threads_handled: stats field for deferred spurious detection of threaded handlers + * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers * @lock: locking for SMP * @affinity_hint: hint to user space for preferred irq affinity * @affinity_notify: context for notification of affinity changes @@ -52,6 +54,8 @@ struct irq_desc { unsigned int irq_count; /* For detecting broken IRQs */ unsigned long last_unhandled; /* Aging timer for unhandled count */ unsigned int irqs_unhandled; + atomic_t threads_handled; + int threads_handled_last; u64 random_ip; raw_spinlock_t lock; struct cpumask *percpu_enabled; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d5b50a1..0dae71e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -159,7 +159,11 @@ typedef struct journal_header_s * journal_block_tag (in the descriptor). The other h_chksum* fields are * not used. * - * Checksum v1 and v2 are mutually exclusive features. + * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses + * journal_block_tag3_t to store a full 32-bit checksum. Everything else + * is the same as v2. + * + * Checksum v1, v2, and v3 are mutually exclusive features. */ struct commit_header { __be32 h_magic; @@ -179,6 +183,14 @@ struct commit_header { * raw struct shouldn't be used for pointer math or sizeof() - use * journal_tag_bytes(journal) instead to compute this. */ +typedef struct journal_block_tag3_s +{ + __be32 t_blocknr; /* The on-disk block number */ + __be32 t_flags; /* See below */ + __be32 t_blocknr_high; /* most-significant high 32bits. */ + __be32 t_checksum; /* crc32c(uuid+seq+block) */ +} journal_block_tag3_t; + typedef struct journal_block_tag_s { __be32 t_blocknr; /* The on-disk block number */ @@ -187,9 +199,6 @@ typedef struct journal_block_tag_s __be32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; -#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) -#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t)) - /* Tail of descriptor block, for checksumming */ struct jbd2_journal_block_tail { __be32 t_checksum; /* crc32c(uuid+descr_block) */ @@ -284,6 +293,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 +#define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM @@ -291,7 +301,8 @@ typedef struct journal_superblock_s #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ - JBD2_FEATURE_INCOMPAT_CSUM_V2) + JBD2_FEATURE_INCOMPAT_CSUM_V2 | \ + JBD2_FEATURE_INCOMPAT_CSUM_V3) #ifdef __KERNEL__ @@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) +{ + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) || + JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) + return 1; + + return 0; +} + /* * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for * transaction control blocks. diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d235e88..8acbb7b 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -258,23 +258,11 @@ extern unsigned long preset_lpj; #define SEC_JIFFIE_SC (32 - SHIFT_HZ) #endif #define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29) -#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 19) #define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) -#define USEC_CONVERSION \ - ((unsigned long)((((u64)NSEC_PER_USEC << USEC_JIFFIE_SC) +\ - TICK_NSEC -1) / (u64)TICK_NSEC)) -/* - * USEC_ROUND is used in the timeval to jiffie conversion. See there - * for more details. It is the scaled resolution rounding value. Note - * that it is a 64-bit value. Since, when it is applied, we are already - * in jiffies (albit scaled), it is nothing but the bits we will shift - * off. - */ -#define USEC_ROUND (u64)(((u64)1 << USEC_JIFFIE_SC) - 1) /* * The maximum jiffie value is (MAX_INT >> 1). Here we translate that * into seconds. The 64-bit case will overflow if we are not careful, diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 006627b..c5f7962 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -63,6 +63,10 @@ struct static_key { # include <asm/jump_label.h> # define HAVE_JUMP_LABEL +#else +struct static_key { + atomic_t enabled; +}; #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ enum jump_label_type { @@ -73,6 +77,12 @@ enum jump_label_type { struct module; #include <linux/atomic.h> + +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + #ifdef HAVE_JUMP_LABEL #define JUMP_LABEL_TRUE_BRANCH 1UL @@ -123,24 +133,20 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ -struct static_key { - atomic_t enabled; -}; - static __always_inline void jump_label_init(void) { } static __always_inline bool static_key_false(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled)) > 0) + if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { - if (likely(atomic_read(&key->enabled)) > 0) + if (likely(static_key_count(key) > 0)) return true; return false; } @@ -180,7 +186,7 @@ static inline int jump_label_apply_nops(struct module *mod) static inline bool static_key_enabled(struct static_key *key) { - return (atomic_read(&key->enabled) > 0); + return static_key_count(key) > 0; } #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be..4b2053a 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -74,6 +74,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) * Number of interrupts per specific IRQ source, since bootup */ extern unsigned int kstat_irqs(unsigned int irq); +extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index c6e091b..bdfc95b 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -283,7 +283,7 @@ struct kgdb_io { extern struct kgdb_arch arch_kgdb_ops; -extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs); +extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); #ifdef CONFIG_SERIAL_KGDB_NMI extern int kgdb_register_nmi_console(void); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2af2eb..b26594b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -464,8 +464,6 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation); static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 5e17d08..47938b0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -596,6 +596,7 @@ struct ata_host { struct device *dev; void __iomem * const *iomap; unsigned int n_ports; + unsigned int n_tags; /* nr of NCQ tags */ void *private_data; struct ata_port_operations *ops; unsigned long flags; @@ -825,6 +826,7 @@ struct ata_port { unsigned long qc_allocated; unsigned int qc_active; int nr_active_links; /* #links with active qcs */ + unsigned int last_tag; /* track next tag hw expects */ struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ diff --git a/include/linux/list.h b/include/linux/list.h index 885943e..2c2cbc1 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -384,6 +384,22 @@ static inline void list_splice_tail_init(struct list_head *list, list_entry((ptr)->prev, type, member) /** + * list_next_entry - get the next element in list + * @pos: the type * to cursor + * @member: the name of the list_struct within the struct. + */ +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +/** + * list_prev_entry - get the prev element in list + * @pos: the type * to cursor + * @member: the name of the list_struct within the struct. + */ +#define list_prev_entry(pos, member) \ + list_entry((pos)->member.prev, typeof(*(pos)), member) + +/** * list_for_each - iterate over a list * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index da6716b..ccc1b71 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -175,6 +175,12 @@ static inline int vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return 0; + +#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION + if (vma->vm_flags & VM_HUGETLB) + return 0; +#endif + /* * Migration allocates pages in the highest zone. If we cannot * do so then migration (at least from node to node) is not diff --git a/include/linux/migrate.h b/include/linux/migrate.h index ee8b14a..449905e 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -5,7 +5,9 @@ #include <linux/mempolicy.h> #include <linux/migrate_mode.h> -typedef struct page *new_page_t(struct page *, unsigned long private, int **); +typedef struct page *new_page_t(struct page *page, unsigned long private, + int **reason); +typedef void free_page_t(struct page *page, unsigned long private); /* * Return values from addresss_space_operations.migratepage(): @@ -39,7 +41,7 @@ extern void putback_lru_pages(struct list_head *l); extern void putback_movable_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); -extern int migrate_pages(struct list_head *l, new_page_t x, +extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern int fail_migrate_page(struct address_space *, @@ -61,8 +63,9 @@ extern int migrate_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline void putback_movable_pages(struct list_head *l) {} -static inline int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, enum migrate_mode mode, int reason) +static inline int migrate_pages(struct list_head *l, new_page_t new, + free_page_t free, unsigned long private, enum migrate_mode mode, + int reason) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 2acbab4..2e577bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -919,6 +919,14 @@ extern void show_free_areas(unsigned int flags); extern bool skip_free_areas_node(unsigned int flags, int nid); int shmem_zero_setup(struct vm_area_struct *); +#ifdef CONFIG_SHMEM +bool shmem_mapping(struct address_space *mapping); +#else +static inline bool shmem_mapping(struct address_space *mapping) +{ + return false; +} +#endif extern int can_do_mlock(void); extern int user_shm_lock(size_t, struct user_struct *); @@ -1001,6 +1009,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end); int truncate_inode_page(struct address_space *mapping, struct page *page); int generic_error_remove_page(struct address_space *mapping, struct page *page); @@ -1655,9 +1664,6 @@ void page_cache_async_readahead(struct address_space *mapping, unsigned long size); unsigned long max_sane_readahead(unsigned long nr); -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, - struct file *filp); /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); @@ -1668,7 +1674,7 @@ extern int expand_downwards(struct vm_area_struct *vma, #if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #else - #define expand_upwards(vma, address) do { } while (0) + #define expand_upwards(vma, address) (0) #endif /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d87823c..a9c83ee 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -329,9 +329,9 @@ struct mm_rss_stat { struct kioctx_table; struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ + struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bd791e4..450f19c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -75,9 +75,18 @@ enum { extern int page_group_by_mobility_disabled; -static inline int get_pageblock_migratetype(struct page *page) +#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) +#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) + +#define get_pageblock_migratetype(page) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + PB_migrate_end, MIGRATETYPE_MASK) + +static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) { - return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); + BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); + return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, + MIGRATETYPE_MASK); } struct free_area { @@ -134,6 +143,7 @@ enum zone_stat_item { NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -312,19 +322,12 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H struct zone { - /* Fields commonly accessed by the page allocator */ + /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; /* - * When free pages are below this point, additional steps are taken - * when reading the number of free pages to avoid per-cpu counter - * drift allowing watermarks to be breached - */ - unsigned long percpu_drift_mark; - - /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several * GB of ram we must reserve some of the lower zone memory (otherwise we risk @@ -332,40 +335,26 @@ struct zone { * on the higher zones). This array is recalculated at runtime if the * sysctl_lowmem_reserve_ratio sysctl changes. */ - unsigned long lowmem_reserve[MAX_NR_ZONES]; - - /* - * This is a per-zone reserve of pages that should not be - * considered dirtyable memory. - */ - unsigned long dirty_balance_reserve; + long lowmem_reserve[MAX_NR_ZONES]; #ifdef CONFIG_NUMA int node; +#endif + /* - * zone reclaim becomes active if more unmapped pages exist. + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this zone's LRU. Maintained by the pageout code. */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif + unsigned int inactive_ratio; + + struct pglist_data *zone_pgdat; struct per_cpu_pageset __percpu *pageset; + /* - * free areas of different sizes + * This is a per-zone reserve of pages that should not be + * considered dirtyable memory. */ - spinlock_t lock; -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - /* Set to true when the PG_migrate_skip bits should be cleared */ - bool compact_blockskip_flush; - - /* pfns where compaction scanners should start */ - unsigned long compact_cached_free_pfn; - unsigned long compact_cached_migrate_pfn; -#endif -#ifdef CONFIG_MEMORY_HOTPLUG - /* see spanned/present_pages for more description */ - seqlock_t span_seqlock; -#endif - struct free_area free_area[MAX_ORDER]; + unsigned long dirty_balance_reserve; #ifndef CONFIG_SPARSEMEM /* @@ -375,71 +364,14 @@ struct zone { unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_COMPACTION - /* - * On compaction failure, 1<<compact_defer_shift compactions - * are skipped before trying again. The number attempted since - * last failure is tracked with compact_considered. - */ - unsigned int compact_considered; - unsigned int compact_defer_shift; - int compact_order_failed; -#endif - - ZONE_PADDING(_pad1_) - - /* Fields commonly accessed by the page reclaim scanner */ - spinlock_t lru_lock; - struct lruvec lruvec; - - unsigned long pages_scanned; /* since last reclaim */ - unsigned long flags; /* zone flags, see below */ - - /* Zone statistics */ - atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; - - /* - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on - * this zone's LRU. Maintained by the pageout code. - */ - unsigned int inactive_ratio; - - - ZONE_PADDING(_pad2_) - /* Rarely used or read-mostly fields */ - +#ifdef CONFIG_NUMA /* - * wait_table -- the array holding the hash table - * wait_table_hash_nr_entries -- the size of the hash table array - * wait_table_bits -- wait_table_size == (1 << wait_table_bits) - * - * The purpose of all these is to keep track of the people - * waiting for a page to become available and make them - * runnable again when possible. The trouble is that this - * consumes a lot of space, especially when so few things - * wait on pages at a given time. So instead of using - * per-page waitqueues, we use a waitqueue hash table. - * - * The bucket discipline is to sleep on the same queue when - * colliding and wake all in that wait queue when removing. - * When something wakes, it must check to be sure its page is - * truly available, a la thundering herd. The cost of a - * collision is great, but given the expected load of the - * table, they should be so rare as to be outweighed by the - * benefits from the saved space. - * - * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the - * primary users of these fields, and in mm/page_alloc.c - * free_area_init_core() performs the initialization of them. + * zone reclaim becomes active if more unmapped pages exist. */ - wait_queue_head_t * wait_table; - unsigned long wait_table_hash_nr_entries; - unsigned long wait_table_bits; + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; +#endif /* CONFIG_NUMA */ - /* - * Discontig memory support fields. - */ - struct pglist_data *zone_pgdat; /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -485,14 +417,103 @@ struct zone { * adjust_managed_page_count() should be used instead of directly * touching zone->managed_pages and totalram_pages. */ + unsigned long managed_pages; unsigned long spanned_pages; unsigned long present_pages; - unsigned long managed_pages; + + const char *name; /* - * rarely used fields: + * Number of MIGRATE_RESEVE page block. To maintain for just + * optimization. Protected by zone->lock. */ - const char *name; + int nr_migrate_reserve_block; + +#ifdef CONFIG_MEMORY_HOTPLUG + /* see spanned/present_pages for more description */ + seqlock_t span_seqlock; +#endif + + /* + * wait_table -- the array holding the hash table + * wait_table_hash_nr_entries -- the size of the hash table array + * wait_table_bits -- wait_table_size == (1 << wait_table_bits) + * + * The purpose of all these is to keep track of the people + * waiting for a page to become available and make them + * runnable again when possible. The trouble is that this + * consumes a lot of space, especially when so few things + * wait on pages at a given time. So instead of using + * per-page waitqueues, we use a waitqueue hash table. + * + * The bucket discipline is to sleep on the same queue when + * colliding and wake all in that wait queue when removing. + * When something wakes, it must check to be sure its page is + * truly available, a la thundering herd. The cost of a + * collision is great, but given the expected load of the + * table, they should be so rare as to be outweighed by the + * benefits from the saved space. + * + * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the + * primary users of these fields, and in mm/page_alloc.c + * free_area_init_core() performs the initialization of them. + */ + wait_queue_head_t *wait_table; + unsigned long wait_table_hash_nr_entries; + unsigned long wait_table_bits; + + ZONE_PADDING(_pad1_) + + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; + + /* free areas of different sizes */ + struct free_area free_area[MAX_ORDER]; + + /* zone flags, see below */ + unsigned long flags; + + ZONE_PADDING(_pad2_) + + /* Write-intensive fields used by page reclaim */ + + /* Fields commonly accessed by the page reclaim scanner */ + spinlock_t lru_lock; + struct lruvec lruvec; + + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* pfn where compaction free scanner should start */ + unsigned long compact_cached_free_pfn; + /* pfn where async and sync compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[2]; +#endif + +#ifdef CONFIG_COMPACTION + /* + * On compaction failure, 1<<compact_defer_shift compactions + * are skipped before trying again. The number attempted since + * last failure is tracked with compact_considered. + */ + unsigned int compact_considered; + unsigned int compact_defer_shift; + int compact_order_failed; +#endif + +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* Set to true when the PG_migrate_skip bits should be cleared */ + bool compact_blockskip_flush; +#endif + + ZONE_PADDING(_pad3_) + /* Zone statistics */ + atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; typedef enum { @@ -508,6 +529,7 @@ typedef enum { ZONE_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ + ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -545,6 +567,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone) return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); } +static inline int zone_is_fair_depleted(const struct zone *zone) +{ + return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); +} + static inline int zone_is_oom_locked(const struct zone *zone) { return test_bit(ZONE_OOM_LOCKED, &zone->flags); @@ -799,10 +826,10 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); +bool zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, diff --git a/include/linux/module.h b/include/linux/module.h index 05f2447..54aef1b 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -143,7 +143,7 @@ extern const struct gtype##_id __mod_##gtype##_table \ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) #define MODULE_DEVICE_TABLE(type,name) \ - MODULE_GENERIC_TABLE(type##_device,name) + MODULE_GENERIC_TABLE(type##__##name##_device, name) /* Version of form [<epoch>:]<version>[-<extra-version>]. Or for CVS/RCS ID version, everything but the number is stripped. diff --git a/include/linux/mount.h b/include/linux/mount.h index 38cd98f..22e5b96 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -42,11 +42,18 @@ struct mnt_namespace; * flag, consider how it interacts with shared mounts. */ #define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ + | MNT_READONLY) +#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL 0x4000 +#define MNT_LOCK_ATIME 0x040000 +#define MNT_LOCK_NOEXEC 0x080000 +#define MNT_LOCK_NOSUID 0x100000 +#define MNT_LOCK_NODEV 0x200000 #define MNT_LOCK_READONLY 0x400000 #define MNT_LOCKED 0x800000 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9963e18..30e0620 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2898,6 +2898,20 @@ extern const char *netdev_drivername(const struct net_device *dev); extern void linkwatch_run_queue(void); +static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, + netdev_features_t f2) +{ + if (f1 & NETIF_F_GEN_CSUM) + f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + if (f2 & NETIF_F_GEN_CSUM) + f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= f2; + if (f1 & NETIF_F_GEN_CSUM) + f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + + return f1; +} + static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) { diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7a6c396..8b50a62 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -16,9 +16,10 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) } enum netlink_skb_flags { - NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ - NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ - NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ + NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ + NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ + NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ + NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; struct netlink_skb_parms { @@ -171,4 +172,11 @@ extern int netlink_add_tap(struct netlink_tap *nt); extern int __netlink_remove_tap(struct netlink_tap *nt); extern int netlink_remove_tap(struct netlink_tap *nt); +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *ns, int cap); +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *ns, int cap); +bool netlink_capable(const struct sk_buff *skb, int cap); +bool netlink_net_capable(const struct sk_buff *skb, int cap); + #endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2b30701..715671e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1223,11 +1223,22 @@ struct nfs41_free_stateid_res { unsigned int status; }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ + kfree(cinfo->buckets); +} + #else struct pnfs_ds_commit_info { }; +static inline void +nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo) +{ +} + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; diff --git a/include/linux/of.h b/include/linux/of.h index d27359a..da95cf0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -263,14 +263,12 @@ extern int of_property_read_u64(const struct device_node *np, extern int of_property_read_string(struct device_node *np, const char *propname, const char **out_string); -extern int of_property_read_string_index(struct device_node *np, - const char *propname, - int index, const char **output); extern int of_property_match_string(struct device_node *np, const char *propname, const char *string); -extern int of_property_count_strings(struct device_node *np, - const char *propname); +extern int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); @@ -450,15 +448,9 @@ static inline int of_property_read_string(struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string_index(struct device_node *np, - const char *propname, int index, - const char **out_string) -{ - return -ENOSYS; -} - -static inline int of_property_count_strings(struct device_node *np, - const char *propname) +static inline int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) { return -ENOSYS; } @@ -560,6 +552,70 @@ static inline int of_node_to_nid(struct device_node *np) #endif /** + * of_property_read_string_array() - Read an array of strings from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * + * Search for a property in a device tree node and retrieve a list of + * terminated string values (pointer to data, not a copy) in that property. + * + * If @out_strs is NULL, the number of strings in the property is returned. + */ +static inline int of_property_read_string_array(struct device_node *np, + const char *propname, const char **out_strs, + size_t sz) +{ + return of_property_read_string_helper(np, propname, out_strs, sz, 0); +} + +/** + * of_property_count_strings() - Find and return the number of strings from a + * multiple strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device tree node and retrieve the number of null + * terminated string contain in it. Returns the number of strings on + * success, -EINVAL if the property does not exist, -ENODATA if property + * does not have a value, and -EILSEQ if the string is not null-terminated + * within the length of the property data. + */ +static inline int of_property_count_strings(struct device_node *np, + const char *propname) +{ + return of_property_read_string_helper(np, propname, NULL, 0, 0); +} + +/** + * of_property_read_string_index() - Find and read a string from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @index: index of the string in the list of strings + * @out_string: pointer to null terminated return string, modified only if + * return value is 0. + * + * Search for a property in a device tree node and retrieve a null + * terminated string value (pointer to data, not a copy) in the list of strings + * contained in that property. + * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * property does not have a value, and -EILSEQ if the string is not + * null-terminated within the length of the property data. + * + * The out_string pointer is modified only if a valid string can be decoded. + */ +static inline int of_property_read_string_index(struct device_node *np, + const char *propname, + int index, const char **output) +{ + int rc = of_property_read_string_helper(np, propname, output, 1, index); + return rc < 0 ? rc : 0; +} + +/** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. diff --git a/include/linux/oom.h b/include/linux/oom.h index da60007..297cda5 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p) extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); + +extern int oom_kills_count(void); +extern void note_oom_kill(void); extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6d53675..2284ea6 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -198,6 +198,7 @@ struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) + __SETPAGEFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) @@ -208,6 +209,7 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) + __SETPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobFree, slob_free) @@ -228,9 +230,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) PAGEFLAG(MappedToDisk, mappedtodisk) -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +/* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) #ifdef CONFIG_HIGHMEM /* @@ -317,13 +319,23 @@ CLEARPAGEFLAG(Uptodate, uptodate) extern void cancel_dirty_page(struct page *page, unsigned int account_size); int test_clear_page_writeback(struct page *page); -int test_set_page_writeback(struct page *page); +int __test_set_page_writeback(struct page *page, bool keep_write); + +#define test_set_page_writeback(page) \ + __test_set_page_writeback(page, false) +#define test_set_page_writeback_keepwrite(page) \ + __test_set_page_writeback(page, true) static inline void set_page_writeback(struct page *page) { test_set_page_writeback(page); } +static inline void set_page_writeback_keepwrite(struct page *page) +{ + test_set_page_writeback_keepwrite(page); +} + #ifdef CONFIG_PAGEFLAGS_EXTENDED /* * System with lots of page flags available. This allows separate diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 2ee8cd2..2baeee1 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -30,9 +30,12 @@ enum pageblock_bits { PB_migrate, PB_migrate_end = PB_migrate + 3 - 1, /* 3 bits required for migrate types */ -#ifdef CONFIG_COMPACTION PB_migrate_skip,/* If set the block is skipped by compaction */ -#endif /* CONFIG_COMPACTION */ + + /* + * Assume the bits will always align on a word. If this assumption + * changes then get/set pageblock needs updating. + */ NR_PAGEBLOCK_BITS }; @@ -62,11 +65,26 @@ extern int pageblock_order; /* Forward declaration */ struct page; +unsigned long get_pfnblock_flags_mask(struct page *page, + unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask); + +void set_pfnblock_flags_mask(struct page *page, + unsigned long flags, + unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask); + /* Declarations for getting and setting flags. See mm/page_alloc.c */ -unsigned long get_pageblock_flags_group(struct page *page, - int start_bitidx, int end_bitidx); -void set_pageblock_flags_group(struct page *page, unsigned long flags, - int start_bitidx, int end_bitidx); +#define get_pageblock_flags_group(page, start_bitidx, end_bitidx) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) +#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \ + set_pfnblock_flags_mask(page, flags, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) #ifdef CONFIG_COMPACTION #define get_pageblock_skip(page) \ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e3dea75..bf944e8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -99,7 +99,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) -void release_pages(struct page **pages, int nr, int cold); +void release_pages(struct page **pages, int nr, bool cold); /* * speculatively take a reference to a page. @@ -243,12 +243,116 @@ static inline struct page *page_cache_alloc_readahead(struct address_space *x) typedef int filler_t(void *, struct page *); -extern struct page * find_get_page(struct address_space *mapping, - pgoff_t index); -extern struct page * find_lock_page(struct address_space *mapping, - pgoff_t index); -extern struct page * find_or_create_page(struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask); +pgoff_t page_cache_next_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan); +pgoff_t page_cache_prev_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan); + +#define FGP_ACCESSED 0x00000001 +#define FGP_LOCK 0x00000002 +#define FGP_CREAT 0x00000004 +#define FGP_WRITE 0x00000008 +#define FGP_NOFS 0x00000010 +#define FGP_NOWAIT 0x00000020 + +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, + int fgp_flags, gfp_t cache_gfp_mask); + +/** + * find_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct page *find_get_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, 0, 0); +} + +static inline struct page *find_get_page_flags(struct address_space *mapping, + pgoff_t offset, int fgp_flags) +{ + return pagecache_get_page(mapping, offset, fgp_flags, 0); +} + +/** + * find_lock_page - locate, pin and lock a pagecache page + * pagecache_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * Otherwise, %NULL is returned. + * + * find_lock_page() may sleep. + */ +static inline struct page *find_lock_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, FGP_LOCK, 0); +} + +/** + * find_or_create_page - locate or add a pagecache page + * @mapping: the page's address_space + * @index: the page's index into the mapping + * @gfp_mask: page allocation mode + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * If the page is not present, a new page is allocated using @gfp_mask + * and added to the page cache and the VM's LRU list. The page is + * returned locked and with an increased refcount. + * + * On memory exhaustion, %NULL is returned. + * + * find_or_create_page() may sleep, even if @gfp_flags specifies an + * atomic allocation! + */ +static inline struct page *find_or_create_page(struct address_space *mapping, + pgoff_t offset, gfp_t gfp_mask) +{ + return pagecache_get_page(mapping, offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT, + gfp_mask); +} + +/** + * grab_cache_page_nowait - returns locked page at given index in given cache + * @mapping: target address_space + * @index: the page index + * + * Same as grab_cache_page(), but do not wait if the page is unavailable. + * This is intended for speculative data generators, where the data can + * be regenerated if the page couldn't be grabbed. This routine should + * be safe to call while holding the lock for another page. + * + * Clear __GFP_FS when allocating the page to avoid recursion into the fs + * and deadlock against the caller's locked page. + */ +static inline struct page *grab_cache_page_nowait(struct address_space *mapping, + pgoff_t index) +{ + return pagecache_get_page(mapping, index, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(mapping)); +} + +struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); +struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); +unsigned find_get_entries(struct address_space *mapping, pgoff_t start, + unsigned int nr_entries, struct page **entries, + pgoff_t *indices); unsigned find_get_pages(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, @@ -268,10 +372,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * grab_cache_page_nowait(struct address_space *mapping, - pgoff_t index); -extern struct page * read_cache_page_async(struct address_space *mapping, - pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page(struct address_space *mapping, pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, @@ -279,14 +379,6 @@ extern struct page * read_cache_page_gfp(struct address_space *mapping, extern int read_cache_pages(struct address_space *mapping, struct list_head *pages, filler_t *filler, void *data); -static inline struct page *read_mapping_page_async( - struct address_space *mapping, - pgoff_t index, void *data) -{ - filler_t *filler = (filler_t *)mapping->a_ops->readpage; - return read_cache_page_async(mapping, index, filler, data); -} - static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, void *data) { diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index e4dbfab..b45d391 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -22,6 +22,11 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); +unsigned pagevec_lookup_entries(struct pagevec *pvec, + struct address_space *mapping, + pgoff_t start, unsigned nr_entries, + pgoff_t *indices); +void pagevec_remove_exceptionals(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); unsigned pagevec_lookup_tag(struct pagevec *pvec, diff --git a/include/linux/pci.h b/include/linux/pci.h index c587034..825d604 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -323,6 +323,7 @@ struct pci_dev { unsigned int is_added:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ + unsigned int no_64bit_msi:1; /* device may only use 32-bit MSIs */ unsigned int block_cfg_access:1; /* config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ @@ -478,6 +479,15 @@ static inline bool pci_is_root_bus(struct pci_bus *pbus) return !(pbus->parent); } +static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev) +{ + dev = pci_physfn(dev); + if (pci_is_root_bus(dev->bus)) + return NULL; + + return dev->bus->self; +} + #ifdef CONFIG_PCI_MSI static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 97fbecd..057c1d8 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2551,6 +2551,7 @@ #define PCI_DEVICE_ID_INTEL_MFD_EMMC0 0x0823 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1 0x0824 #define PCI_DEVICE_ID_INTEL_MRST_SD2 0x084F +#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB 0x095E #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 95961f0..0afb48f 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -110,7 +110,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) - __this_cpu_inc(*pcpu_count); + this_cpu_inc(*pcpu_count); else atomic_inc(&ref->count); @@ -139,7 +139,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { - __this_cpu_inc(*pcpu_count); + this_cpu_inc(*pcpu_count); ret = true; } @@ -164,7 +164,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) pcpu_count = ACCESS_ONCE(ref->pcpu_count); if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) - __this_cpu_dec(*pcpu_count); + this_cpu_dec(*pcpu_count); else if (unlikely(atomic_dec_and_test(&ref->count))) ref->release(ref); diff --git a/include/linux/plist.h b/include/linux/plist.h index aa0fb39..8b6c970 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -98,6 +98,13 @@ struct plist_node { } /** + * PLIST_HEAD - declare and init plist_head + * @head: name for struct plist_head variable + */ +#define PLIST_HEAD(head) \ + struct plist_head head = PLIST_HEAD_INIT(head) + +/** * PLIST_NODE_INIT - static struct plist_node initializer * @node: struct plist_node variable name * @__prio: initial node priority @@ -134,6 +141,8 @@ static inline void plist_node_init(struct plist_node *node, int prio) extern void plist_add(struct plist_node *node, struct plist_head *head); extern void plist_del(struct plist_node *node, struct plist_head *head); +extern void plist_requeue(struct plist_node *node, struct plist_head *head); + /** * plist_for_each - iterate over the plist * @pos: the type * to use as a loop counter @@ -143,6 +152,16 @@ extern void plist_del(struct plist_node *node, struct plist_head *head); list_for_each_entry(pos, &(head)->node_list, node_list) /** + * plist_for_each_continue - continue iteration over the plist + * @pos: the type * to use as a loop cursor + * @head: the head for your list + * + * Continue to iterate over plist, continuing after the current position. + */ +#define plist_for_each_continue(pos, head) \ + list_for_each_entry_continue(pos, &(head)->node_list, node_list) + +/** * plist_for_each_safe - iterate safely over a plist of given type * @pos: the type * to use as a loop counter * @n: another type * to use as temporary storage @@ -163,6 +182,18 @@ extern void plist_del(struct plist_node *node, struct plist_head *head); list_for_each_entry(pos, &(head)->node_list, mem.node_list) /** + * plist_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor + * @head: the head for your list + * @m: the name of the list_struct within the struct + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define plist_for_each_entry_continue(pos, head, m) \ + list_for_each_entry_continue(pos, &(head)->node_list, m.node_list) + +/** * plist_for_each_entry_safe - iterate safely over list of given type * @pos: the type * to use as a loop counter * @n: another type * to use as temporary storage @@ -229,6 +260,20 @@ static inline int plist_node_empty(const struct plist_node *node) #endif /** + * plist_next - get the next entry in list + * @pos: the type * to cursor + */ +#define plist_next(pos) \ + list_next_entry(pos, node_list) + +/** + * plist_prev - get the prev entry in list + * @pos: the type * to cursor + */ +#define plist_prev(pos) \ + list_prev_entry(pos, node_list) + +/** * plist_first - return the first node (and thus, highest priority) * @head: the &struct plist_head pointer * diff --git a/include/linux/printk.h b/include/linux/printk.h index c262485..a2d18be 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -126,9 +126,9 @@ asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* - * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ -__printf(1, 2) __cold int printk_sched(const char *fmt, ...); +__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -162,7 +162,7 @@ int printk(const char *s, ...) return 0; } static inline __printf(1, 2) __cold -int printk_sched(const char *s, ...) +int printk_deferred(const char *s, ...) { return 0; } diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9974975..4af3fdc 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -53,7 +53,8 @@ struct persistent_ram_zone { }; struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info); + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); @@ -76,6 +77,7 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, struct ramoops_platform_data { unsigned long mem_size; unsigned long mem_address; + unsigned int mem_type; unsigned long record_size; unsigned long console_size; unsigned long ftrace_size; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 07d0df6..cc79eff 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -5,6 +5,7 @@ #include <linux/sched.h> /* For struct task_struct. */ #include <linux/err.h> /* for IS_ERR_VALUE */ #include <linux/bug.h> /* For BUG_ON. */ +#include <linux/pid_namespace.h> /* For task_active_pid_ns. */ #include <uapi/linux/ptrace.h> /* @@ -129,6 +130,37 @@ static inline void ptrace_event(int event, unsigned long message) } /** + * ptrace_event_pid - possibly stop for a ptrace event notification + * @event: %PTRACE_EVENT_* value to report + * @pid: process identifier for %PTRACE_GETEVENTMSG to return + * + * Check whether @event is enabled and, if so, report @event and @pid + * to the ptrace parent. @pid is reported as the pid_t seen from the + * the ptrace parent's pid namespace. + * + * Called without locks. + */ +static inline void ptrace_event_pid(int event, struct pid *pid) +{ + /* + * FIXME: There's a potential race if a ptracer in a different pid + * namespace than parent attaches between computing message below and + * when we acquire tasklist_lock in ptrace_stop(). If this happens, + * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG. + */ + unsigned long message = 0; + struct pid_namespace *ns; + + rcu_read_lock(); + ns = task_active_pid_ns(rcu_dereference(current->parent)); + if (ns) + message = pid_nr_ns(pid, ns); + rcu_read_unlock(); + + ptrace_event(event, message); +} + +/** * ptrace_init_task - initialize ptrace state for a new child * @child: new child task * @ptrace: true if child should be ptrace'd by parent's tracer @@ -302,6 +334,9 @@ static inline void user_single_step_siginfo(struct task_struct *tsk, * calling arch_ptrace_stop() when it would be superfluous. For example, * if the thread has not been back to user mode since the last stop, the * thread state might indicate that nothing needs to be done. + * + * This is guaranteed to be invoked once before a task stops for ptrace and + * may include arch-specific operations necessary prior to a ptrace stop. */ #define arch_ptrace_stop_needed(code, info) (0) #endif diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 6965fe3..1d3eee5 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -46,6 +46,14 @@ void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); void dquot_initialize(struct inode *inode); void dquot_drop(struct inode *inode); struct dquot *dqget(struct super_block *sb, struct kqid qid); +static inline struct dquot *dqgrab(struct dquot *dquot) +{ + /* Make sure someone else has active reference to dquot */ + WARN_ON_ONCE(!atomic_read(&dquot->dq_count)); + WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)); + atomic_inc(&dquot->dq_count); + return dquot; +} void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5b6d5b2..ab8263e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -219,6 +219,7 @@ static inline void radix_tree_replace_slot(void **pslot, void *item) int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); +void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete(struct radix_tree_root *, unsigned long); unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, @@ -226,10 +227,6 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, unsigned long *indices, unsigned long first_index, unsigned int max_items); -unsigned long radix_tree_next_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan); -unsigned long radix_tree_prev_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan); #ifndef CONFIG_PREEMPT_RT_FULL int radix_tree_preload(gfp_t gfp_mask); int radix_tree_maybe_preload(gfp_t gfp_mask); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index d69cf63..49a4d6f 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -void ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index f28544b..321f4ec 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -4,6 +4,7 @@ #include <linux/mutex.h> #include <linux/netdevice.h> +#include <linux/wait.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -22,6 +23,10 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); + +extern wait_queue_head_t netdev_unregistering_wq; +extern struct mutex net_mutex; + #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); #endif /* #ifdef CONFIG_PROVE_LOCKING */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 625a41f..3ce814c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -65,6 +65,10 @@ struct fs_struct; struct perf_event_context; struct blk_plug; +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -469,6 +473,7 @@ struct signal_struct { atomic_t sigcnt; atomic_t live; int nr_threads; + struct list_head thread_head; wait_queue_head_t wait_chldexit; /* for wait4() */ @@ -1098,6 +1103,9 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif + /* per-thread vma caching */ + u32 vmacache_seqnum; + struct vm_area_struct *vmacache[VMACACHE_SIZE]; #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif @@ -1154,6 +1162,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; + struct list_head thread_node; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -1547,6 +1556,24 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } +static inline int pid_alive(const struct task_struct *p); +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1586,7 +1613,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) * * Return: 1 if the process is alive. 0 otherwise. */ -static inline int pid_alive(struct task_struct *p) +static inline int pid_alive(const struct task_struct *p) { return p->pids[PIDTYPE_PID].pid != NULL; } @@ -1721,11 +1748,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */ +/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags + * __GFP_FS is also cleared as it implies __GFP_IO. + */ static inline gfp_t memalloc_noio_flags(gfp_t flags) { if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~__GFP_IO; + flags &= ~(__GFP_IO | __GFP_FS); return flags; } @@ -2240,6 +2269,16 @@ extern bool current_is_single_threaded(void); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; diff --git a/include/linux/serio.h b/include/linux/serio.h index 36aac73..9f779c7 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -23,6 +23,7 @@ struct serio { char name[32]; char phys[32]; + char firmware_id[128]; bool manual_bind; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 30aa0dc..deb4960 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -49,6 +49,7 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern bool shmem_mapping(struct address_space *mapping); extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7406b93..e13de0b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2403,6 +2403,7 @@ extern struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); +struct sk_buff *skb_vlan_untag(struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 54f91d3..46cca4c 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -23,7 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie); void sock_diag_save_cookie(void *sk, __u32 *cookie); int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype); #endif diff --git a/include/linux/string.h b/include/linux/string.h index ac889c5..0ed878d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -129,7 +129,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, - const void *from, size_t available); + const void *from, size_t available); /** * strstarts - does @str start with @prefix? @@ -141,7 +141,8 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } -extern size_t memweight(const void *ptr, size_t bytes); +size_t memweight(const void *ptr, size_t bytes); +void memzero_explicit(void *s, size_t count); /** * kbasename - return the last part of a pathname. diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b05963f..f5bfb1a 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -32,6 +32,7 @@ struct svc_xprt_class { struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; + int xcl_ident; }; /* diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 62fd1b7..947009e 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -56,6 +56,7 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); +bool svc_alien_sock(struct net *net, int fd); int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, const size_t len); void svc_init_xprt_sock(void); diff --git a/include/linux/swap.h b/include/linux/swap.h index 46ba0c6..241bf09 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -214,8 +214,9 @@ struct percpu_cluster { struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ + struct plist_node list; /* entry in swap_active_head */ + struct plist_node avail_list; /* entry in swap_avail_head */ signed char type; /* strange name for an index */ - signed char next; /* next type on the swap list */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ @@ -255,11 +256,6 @@ struct swap_info_struct { struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ }; -struct swap_list_t { - int head; /* head of priority-ordered swapfile list */ - int next; /* swapfile to be used next */ -}; - /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; extern unsigned long totalreserve_pages; @@ -272,12 +268,14 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void __lru_cache_add(struct page *); extern void lru_cache_add(struct page *); +extern void lru_cache_add_anon(struct page *page); +extern void lru_cache_add_file(struct page *page); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); +extern void init_page_accessed(struct page *page); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); @@ -287,22 +285,6 @@ extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); -/** - * lru_cache_add: add a page to the page lists - * @page: the page to add - */ -static inline void lru_cache_add_anon(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - -static inline void lru_cache_add_file(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -460,7 +442,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #define free_page_and_swap_cache(page) \ page_cache_release(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), 0); + release_pages((pages), (nr), false); static inline void show_swap_cache_info(void) { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index e282624..388293a 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -6,7 +6,7 @@ * want to expose them to the dozens of source files that include swap.h */ extern spinlock_t swap_lock; -extern struct swap_list_t swap_list; +extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern int try_to_unuse(unsigned int, bool, unsigned long); diff --git a/include/linux/time.h b/include/linux/time.h index d5d229b..7d532a3 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -173,6 +173,19 @@ extern void getboottime(struct timespec *ts); extern void monotonic_to_bootbased(struct timespec *ts); extern void get_monotonic_boottime(struct timespec *ts); +static inline bool timeval_valid(const struct timeval *tv) +{ + /* Dates before 1970 are bogus */ + if (tv->tv_sec < 0) + return false; + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + + return true; +} + extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 52f944d..3fb4288 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -30,4 +30,24 @@ descriptor */ #define USB_QUIRK_DELAY_INIT 0x00000040 +/* + * For high speed and super speed interupt endpoints, the USB 2.0 and + * USB 3.0 spec require the interval in microframes + * (1 microframe = 125 microseconds) to be calculated as + * interval = 2 ^ (bInterval-1). + * + * Devices with this quirk report their bInterval as the result of this + * calculation instead of the exponent variable used in the calculation. + */ +#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL 0x00000080 + +/* device generates spurious wakeup, ignore remote wakeup capability */ +#define USB_QUIRK_IGNORE_REMOTE_WAKEUP 0x00000200 + +/* device generates spurious wakeup, ignore remote wakeup capability */ +#define USB_QUIRK_IGNORE_REMOTE_WAKEUP 0x00000200 + +/* device can't handle device_qualifier descriptor requests */ +#define USB_QUIRK_DEVICE_QUALIFIER 0x00000100 + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4db2985..67c1108 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ } extent[UID_GID_MAP_MAX_EXTENTS]; }; +#define USERNS_SETGROUPS_ALLOWED 1UL + +#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -27,6 +31,7 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + unsigned long flags; }; extern struct user_namespace init_user_ns; @@ -57,6 +62,9 @@ extern struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); +extern int proc_setgroups_show(struct seq_file *m, void *v); +extern bool userns_may_setgroups(const struct user_namespace *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -81,6 +89,10 @@ static inline void put_user_ns(struct user_namespace *ns) { } +static inline bool userns_may_setgroups(const struct user_namespace *ns) +{ + return true; +} #endif #endif /* _LINUX_USER_H */ diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 502073a..b483abd 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -64,6 +64,7 @@ int vga_switcheroo_get_client_state(struct pci_dev *dev); void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic); int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain); +void vga_switcheroo_fini_domain_pm_ops(struct device *dev); int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain); #else @@ -82,6 +83,7 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {} static inline int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; } +static inline void vga_switcheroo_fini_domain_pm_ops(struct device *dev) {} static inline int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; } #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index c557c6d..3a712e2 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -71,12 +71,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif +#ifdef CONFIG_DEBUG_TLBFLUSH #ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif +#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, +#endif /* CONFIG_DEBUG_TLBFLUSH */ NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h new file mode 100644 index 0000000..c3fa0fd4 --- /dev/null +++ b/include/linux/vmacache.h @@ -0,0 +1,38 @@ +#ifndef __LINUX_VMACACHE_H +#define __LINUX_VMACACHE_H + +#include <linux/sched.h> +#include <linux/mm.h> + +/* + * Hash based on the page number. Provides a good hit rate for + * workloads with good locality and those with random accesses as well. + */ +#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK) + +static inline void vmacache_flush(struct task_struct *tsk) +{ + memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); +} + +extern void vmacache_flush_all(struct mm_struct *mm); +extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); +extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); + +#ifndef CONFIG_MMU +extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end); +#endif + +static inline void vmacache_invalidate(struct mm_struct *mm) +{ + mm->vmacache_seqnum++; + + /* deal with overflows */ + if (unlikely(mm->vmacache_seqnum == 0)) + vmacache_flush_all(mm); +} + +#endif /* __LINUX_VMACACHE_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 1ea2fd5..fe37043 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -87,6 +87,14 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_DEBUG_TLBFLUSH +#define count_vm_tlb_event(x) count_vm_event(x) +#define count_vm_tlb_events(x, y) count_vm_events(x, y) +#else +#define count_vm_tlb_event(x) do {} while (0) +#define count_vm_tlb_events(x, y) do { (void)(y); } while (0) +#endif + #define __count_zone_vm_events(item, zone, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ zone_idx(zone), delta) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 594521b..eff358e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -455,7 +455,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \ 1, (name)) #define create_singlethread_workqueue(name) \ - alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name)) + alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name) extern void destroy_workqueue(struct workqueue_struct *wq); diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 6781258..3d4c034 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -321,6 +321,9 @@ struct v4l2_fh; * @done_wq: waitqueue for processes waiting for buffers ready to be dequeued * @alloc_ctx: memory type/allocator-specific contexts for each plane * @streaming: current streaming state + * @waiting_for_buffers: used in poll() to check if vb2 is still waiting for + * buffers. Only set for capture queues if qbuf has not yet been + * called since poll() needs to return POLLERR in that situation. * @fileio: file io emulator internal data, used only if emulator is active */ struct vb2_queue { @@ -353,6 +356,7 @@ struct vb2_queue { unsigned int plane_sizes[VIDEO_MAX_PLANES]; unsigned int streaming:1; + unsigned int waiting_for_buffers:1; struct vb2_fileio_data *fileio; }; diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 7d64d36..4282778 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -155,7 +155,11 @@ struct vsock_transport { /**** CORE ****/ -int vsock_core_init(const struct vsock_transport *t); +int __vsock_core_init(const struct vsock_transport *t, struct module *owner); +static inline int vsock_core_init(const struct vsock_transport *t) +{ + return __vsock_core_init(t, THIS_MODULE); +} void vsock_core_exit(void); /**** UTILS ****/ diff --git a/include/net/dst.h b/include/net/dst.h index a158a07..2322201 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -468,6 +468,7 @@ extern void dst_init(void); /* Flags for xfrm_lookup flags argument. */ enum { XFRM_LOOKUP_ICMP = 1 << 0, + XFRM_LOOKUP_QUEUE = 1 << 1, }; struct flowi; @@ -478,7 +479,16 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, int flags) { return dst_orig; -} +} + +static inline struct dst_entry *xfrm_lookup_route(struct net *net, + struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, + int flags) +{ + return dst_orig; +} static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { @@ -490,6 +500,10 @@ extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig const struct flowi *fl, struct sock *sk, int flags); +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, struct sock *sk, + int flags); + /* skb attached with this dst needs transformation if dst->xfrm is valid */ static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) { diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index de2c785..0a8f6f9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops { void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); int (*bind_conflict)(const struct sock *sk, const struct inet_bind_bucket *tb, bool relax); + void (*mtu_reduced)(struct sock *sk); }; /** inet_connection_sock - INET connection oriented sock diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 53f464d..bb06fd2 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -41,14 +41,13 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count + * Once inet_peer is queued for deletion (refcnt == -1), following field + * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ - atomic_t ip_id_count; /* IP ID for the next packet */ }; struct rcu_head rcu; struct inet_peer *gc_next; @@ -166,7 +165,7 @@ extern void inetpeer_invalidate_tree(struct inet_peer_base *); extern void inetpeer_invalidate_family(int family); /* - * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, + * temporary check to make sure we dont access rid, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer */ static inline void inet_peer_refcheck(const struct inet_peer *p) @@ -174,20 +173,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); } - -/* can be called with or without local BH being disabled */ -static inline int inet_getid(struct inet_peer *p, int more) -{ - int old, new; - more++; - inet_peer_refcheck(p); - do { - old = atomic_read(&p->ip_id_count); - new = old + more; - if (!new) - new = 1; - } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old); - return new; -} - #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index fe6ac1cd..7193c65 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -262,9 +262,10 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) !(dst_metric_locked(dst, RTAX_MTU))); } -extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); +u32 ip_idents_reserve(u32 hash, int segs); +void __ip_select_ident(struct iphdr *iph, int segs); -static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) +static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -274,24 +275,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && inet_sk(sk)->inet_daddr) ? - htons(inet_sk(sk)->inet_id++) : 0; - } else - __ip_select_ident(iph, dst, 0); -} - -static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) -{ - struct iphdr *iph = ip_hdr(skb); - - if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += 1 + more; - } else + inet_sk(sk)->inet_id += segs; + } else { iph->id = 0; - } else - __ip_select_ident(iph, dst, more); + } + } else { + __ip_select_ident(iph, segs); + } +} + +static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +{ + ip_select_ident_segs(skb, sk, 1); } /* diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0cef3a0..3ba53d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,11 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +/* We do not (yet ?) support IPv6 jumbograms (RFC 2675) + * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header + */ +#define IP6_MAX_MTU (0xFFFF + sizeof(struct ipv6hdr)) + /* * rt6_srcprefs2flags() and rt6_flags2srcprefs() translate * between IPV6_ADDR_PREFERENCES socket option values diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1f96efd..ea97c94 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -537,14 +537,19 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) } /* more secured version of ipv6_addr_hash() */ -static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; return jhash_3words(v, (__force u32)a->s6_addr32[2], (__force u32)a->s6_addr32[3], - ipv6_hash_secret); + initval); +} + +static inline u32 ipv6_addr_jhash(const struct in6_addr *a) +{ + return __ipv6_addr_jhash(a, ipv6_hash_secret); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) @@ -656,7 +661,7 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -extern void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); +extern void ipv6_proxy_select_ident(struct sk_buff *skb); extern int ip6_dst_hoplimit(struct dst_entry *dst); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cc6035f..0218c3d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1449,8 +1449,6 @@ struct ieee80211_tx_control { * @IEEE80211_HW_CONNECTION_MONITOR: * The hardware performs its own connection monitoring, including * periodic keep-alives to the AP and probing the AP on beacon loss. - * When this flag is set, signaling beacon-loss will cause an immediate - * change to disassociated state. * * @IEEE80211_HW_NEED_DTIM_BEFORE_ASSOC: * This device needs to get data from beacon before association (i.e. diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 88a1d40..8887722 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -47,8 +47,8 @@ enum nf_ct_ext_id { /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { struct rcu_head rcu; - u8 offset[NF_CT_EXT_NUM]; - u8 len; + u16 offset[NF_CT_EXT_NUM]; + u16 len; char data[0]; }; diff --git a/include/net/regulatory.h b/include/net/regulatory.h index f17ed59..3e827aa 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -106,7 +106,7 @@ struct ieee80211_reg_rule { struct ieee80211_regdomain { struct rcu_head rcu_head; u32 n_reg_rules; - char alpha2[2]; + char alpha2[3]; u8 dfs_region; struct ieee80211_reg_rule reg_rules[]; }; diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 832f219..c3f0cd9 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -116,7 +116,7 @@ typedef enum { * analysis of the state functions, but in reality just taken from * thin air in the hopes othat we don't trigger a kernel panic. */ -#define SCTP_MAX_NUM_COMMANDS 14 +#define SCTP_MAX_NUM_COMMANDS 20 typedef union { __s32 i32; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 3794c5a..3848934 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -454,6 +454,11 @@ static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_associat asoc->pmtu_pending = 0; } +static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) +{ + return !list_empty(&chunk->list); +} + /* Walk through a list of TLV parameters. Don't trust the * individual parameter lengths and instead depend on * the chunk length to indicate when to stop. Make sure diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4ef75af..c91b6f5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -249,9 +249,9 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *, int, __be16); struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr); -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp); +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp); struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf); int sctp_process_asconf_ack(struct sctp_association *asoc, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2174d8d..8b31f09 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1245,6 +1245,7 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; + __u8 auth_enable; }; /* Recover the outter endpoint structure. */ @@ -1273,7 +1274,8 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *, int sctp_has_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr); -int sctp_verify_init(struct net *net, const struct sctp_association *asoc, +int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, sctp_cid_t, sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **err_chunk); int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index c2e542b..b1c3d1c 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,8 +3,6 @@ #include <linux/types.h> -extern __u32 secure_ip_id(__be32 daddr); -extern __u32 secure_ipv6_id(const __be32 daddr[4]); extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); diff --git a/include/net/sock.h b/include/net/sock.h index 6e2c490..3899018 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -938,7 +938,6 @@ struct proto { struct sk_buff *skb); void (*release_cb)(struct sock *sk); - void (*mtu_reduced)(struct sock *sk); /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); @@ -1749,8 +1748,8 @@ sk_dst_get(struct sock *sk) rcu_read_lock(); dst = rcu_dereference(sk->sk_dst_cache); - if (dst) - dst_hold(dst); + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; rcu_read_unlock(); return dst; } @@ -1789,9 +1788,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst) static inline void sk_dst_set(struct sock *sk, struct dst_entry *dst) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_set(sk, dst); - spin_unlock(&sk->sk_dst_lock); + struct dst_entry *old_dst; + + sk_tx_queue_clear(sk); + old_dst = xchg(&sk->sk_dst_cache, dst); + dst_release(old_dst); } static inline void @@ -1803,9 +1804,7 @@ __sk_dst_reset(struct sock *sk) static inline void sk_dst_reset(struct sock *sk) { - spin_lock(&sk->sk_dst_lock); - __sk_dst_reset(sk); - spin_unlock(&sk->sk_dst_lock); + sk_dst_set(sk, NULL); } extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); @@ -2275,6 +2274,11 @@ extern int sock_get_timestampns(struct sock *, struct timespec __user *); extern int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type); +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap); +bool sk_capable(const struct sock *sk, int cap); +bool sk_net_capable(const struct sock *sk, int cap); + /* * Enable debug/info messages */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 31c4890..da22d3a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -454,6 +454,7 @@ extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); */ extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); +void tcp_v4_mtu_reduced(struct sock *sk); extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock * tcp_create_openreq_child(struct sock *sk, struct request_sock *req, diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d65fbec..409fafb 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -149,6 +149,7 @@ struct scsi_device { unsigned skip_ms_page_8:1; /* do not use MODE SENSE page 0x08 */ unsigned skip_ms_page_3f:1; /* do not use MODE SENSE page 0x3f */ unsigned skip_vpd_pages:1; /* do not read VPD pages */ + unsigned try_vpd_pages:1; /* attempt to read VPD pages */ unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */ unsigned no_start_on_add:1; /* do not issue start on add */ unsigned allow_restart:1; /* issue START_UNIT in error handler */ @@ -257,7 +258,7 @@ struct scsi_target { struct list_head siblings; struct list_head devices; struct device dev; - unsigned int reap_ref; /* protected by the host lock */ + struct kref reap_ref; /* last put renders target invisible */ unsigned int channel; unsigned int id; /* target id ... replace * scsi_device.id eventually */ @@ -284,7 +285,6 @@ struct scsi_target { #define SCSI_DEFAULT_TARGET_BLOCKED 3 char scsi_level; - struct execute_work ew; enum scsi_target_state state; void *hostdata; /* available to low-level driver */ unsigned long starget_data[0]; /* for the transport */ diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 447d2d7..183eaab 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -32,4 +32,9 @@ #define BLIST_ATTACH_PQ3 0x1000000 /* Scan: Attach to PQ3 devices */ #define BLIST_NO_DIF 0x2000000 /* Disable T10 PI (DIF) */ #define BLIST_SKIP_VPD_PAGES 0x4000000 /* Ignore SBC-3 VPD pages */ +#define BLIST_SCSI3LUN 0x8000000 /* Scan more than 256 LUNs + for sequential scan */ +#define BLIST_TRY_VPD_PAGES 0x10000000 /* Attempt to read VPD pages */ +#define BLIST_NO_RSOC 0x20000000 /* don't try to issue RSOC */ + #endif diff --git a/include/sound/core.h b/include/sound/core.h index 2a14f1f..d6bc961 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -121,6 +121,8 @@ struct snd_card { int user_ctl_count; /* count of all user controls */ struct list_head controls; /* all controls for this card */ struct list_head ctl_files; /* active control files */ + struct mutex user_ctl_lock; /* protects user controls against + concurrent access */ struct snd_info_entry *proc_root; /* root for soundcard specific files */ struct snd_info_entry *proc_id; /* the card id */ diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h index 047d657..3007641 100644 --- a/include/sound/soc-dpcm.h +++ b/include/sound/soc-dpcm.h @@ -101,6 +101,8 @@ struct snd_soc_dpcm_runtime { /* state and update */ enum snd_soc_dpcm_update runtime_update; enum snd_soc_dpcm_state state; + + int trigger_pending; /* trigger cmd + 1 if pending, 0 if not */ }; /* can this BE stop and free */ diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index 361bd0f..78edd78 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -68,7 +68,8 @@ extern void iscsit_build_nopin_rsp(struct iscsi_cmd *, struct iscsi_conn *, extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_tm_rsp *); extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *, - struct iscsi_text_rsp *); + struct iscsi_text_rsp *, + enum iscsit_transport_type); extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *, struct iscsi_reject *); extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 5ebe21c..7eb689a 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -51,6 +51,7 @@ int transport_subsystem_register(struct se_subsystem_api *); void transport_subsystem_release(struct se_subsystem_api *); void target_complete_cmd(struct se_cmd *, u8); +void target_complete_cmd_with_length(struct se_cmd *, u8, int); sense_reason_t spc_parse_cdb(struct se_cmd *cmd, unsigned int *size); sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 4c2301d..2aaf370 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -132,6 +132,7 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * block_rq_complete - block IO operation completed by device driver * @q: queue containing the block operation request * @rq: block operations request + * @nr_bytes: number of completed bytes * * The block_rq_complete tracepoint event indicates that some portion * of operation request has been completed by the device driver. If @@ -139,11 +140,37 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, * do for the request. If @rq->bio is non-NULL then there is * additional work required to complete the request. */ -DEFINE_EVENT(block_rq_with_error, block_rq_complete, +TRACE_EVENT(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request_queue *q, struct request *rq, + unsigned int nr_bytes), - TP_ARGS(q, rq) + TP_ARGS(q, rq, nr_bytes), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, RWBS_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_rq_pos(rq); + __entry->nr_sector = nr_bytes >> 9; + __entry->errors = rq->errors; + + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) ); DECLARE_EVENT_CLASS(block_rq, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index fde1b3e..c6814b9 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -5,6 +5,7 @@ #define _TRACE_COMPACTION_H #include <linux/types.h> +#include <linux/list.h> #include <linux/tracepoint.h> #include <trace/events/gfpflags.h> @@ -47,10 +48,11 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, TRACE_EVENT(mm_compaction_migratepages, - TP_PROTO(unsigned long nr_migrated, - unsigned long nr_failed), + TP_PROTO(unsigned long nr_all, + int migrate_rc, + struct list_head *migratepages), - TP_ARGS(nr_migrated, nr_failed), + TP_ARGS(nr_all, migrate_rc, migratepages), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -58,7 +60,22 @@ TRACE_EVENT(mm_compaction_migratepages, ), TP_fast_assign( - __entry->nr_migrated = nr_migrated; + unsigned long nr_failed = 0; + struct list_head *page_lru; + + /* + * migrate_pages() returns either a non-negative number + * with the number of pages that failed migration, or an + * error code, in which case we need to count the remaining + * pages manually + */ + if (migrate_rc >= 0) + nr_failed = migrate_rc; + else + list_for_each(page_lru, migratepages) + nr_failed++; + + __entry->nr_migrated = nr_all - nr_failed; __entry->nr_failed = nr_failed; ), @@ -67,6 +84,48 @@ TRACE_EVENT(mm_compaction_migratepages, __entry->nr_failed) ); +TRACE_EVENT(mm_compaction_begin, + TP_PROTO(unsigned long zone_start, unsigned long migrate_start, + unsigned long free_start, unsigned long zone_end), + + TP_ARGS(zone_start, migrate_start, free_start, zone_end), + + TP_STRUCT__entry( + __field(unsigned long, zone_start) + __field(unsigned long, migrate_start) + __field(unsigned long, free_start) + __field(unsigned long, zone_end) + ), + + TP_fast_assign( + __entry->zone_start = zone_start; + __entry->migrate_start = migrate_start; + __entry->free_start = free_start; + __entry->zone_end = zone_end; + ), + + TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu", + __entry->zone_start, + __entry->migrate_start, + __entry->free_start, + __entry->zone_end) +); + +TRACE_EVENT(mm_compaction_end, + TP_PROTO(int status), + + TP_ARGS(status), + + TP_STRUCT__entry( + __field(int, status) + ), + + TP_fast_assign( + __entry->status = status; + ), + + TP_printk("status=%d", __entry->status) +); #endif /* _TRACE_COMPACTION_H */ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index d0c6134..aece134 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -267,14 +267,12 @@ DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, - int alloc_order, int fallback_order, - int alloc_migratetype, int fallback_migratetype, - int change_ownership), + int alloc_order, int fallback_order, + int alloc_migratetype, int fallback_migratetype, int new_migratetype), TP_ARGS(page, alloc_order, fallback_order, - alloc_migratetype, fallback_migratetype, - change_ownership), + alloc_migratetype, fallback_migratetype, new_migratetype), TP_STRUCT__entry( __field( struct page *, page ) @@ -291,7 +289,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; - __entry->change_ownership = change_ownership; + __entry->change_ownership = (new_migratetype == alloc_migratetype); ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 1619327..ca298c7 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -78,7 +78,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = __this_cpu_read(mod->refptr->incs) + __this_cpu_read(mod->refptr->decs); + __entry->refcnt = __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs); __assign_str(name, mod->name); ), diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1c9fabd..ce0803b 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion, TP_PROTO( struct page *page, - unsigned long pfn, - int lru, - unsigned long flags + int lru ), - TP_ARGS(page, pfn, lru, flags), + TP_ARGS(page, lru), TP_STRUCT__entry( __field(struct page *, page ) @@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion, TP_fast_assign( __entry->page = page; - __entry->pfn = pfn; + __entry->pfn = page_to_pfn(page); __entry->lru = lru; - __entry->flags = flags; + __entry->flags = trace_pagemap_flags(page); ), /* Flag format is based on page-types.c formatting for pagemap */ @@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct page *page, unsigned long pfn), + TP_PROTO(struct page *page), - TP_ARGS(page, pfn), + TP_ARGS(page), TP_STRUCT__entry( __field(struct page *, page ) @@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate, TP_fast_assign( __entry->page = page; - __entry->pfn = pfn; + __entry->pfn = page_to_pfn(page); ), /* Flag format is based on page-types.c formatting for pagemap */ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index fed853f..9674145 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -4,6 +4,7 @@ #include <linux/tracepoint.h> #include <linux/unistd.h> #include <linux/ftrace_event.h> +#include <linux/thread_info.h> #include <asm/ptrace.h> @@ -32,4 +33,18 @@ struct syscall_metadata { struct ftrace_event_call *exit_event; }; +#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS) +static inline void syscall_tracepoint_update(struct task_struct *p) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); + else + clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT); +} +#else +static inline void syscall_tracepoint_update(struct task_struct *p) +{ +} +#endif + #endif /* _TRACE_SYSCALL_H */ diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h index 73bde4e..da10687 100644 --- a/include/uapi/drm/tegra_drm.h +++ b/include/uapi/drm/tegra_drm.h @@ -105,7 +105,6 @@ struct drm_tegra_submit { __u32 num_waitchks; __u32 waitchk_mask; __u32 timeout; - __u32 pad; __u64 syncpts; __u64 cmdbufs; __u64 relocs; diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 75cef3f..b7cb978 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -374,6 +374,8 @@ struct audit_tty_status { __u32 log_passwd; /* 1 = enabled, 0 = disabled */ }; +#define AUDIT_UID_UNSET (unsigned int)-1 + /* audit_rule_data supports filter rules with both integer and string * fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and * AUDIT_LIST_RULES requests. diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 440d5c4..599b0d4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -156,7 +156,7 @@ enum { /* * IPV6 socket options */ - +#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADDRFORM 1 #define IPV6_2292PKTINFO 2 #define IPV6_2292HOPOPTS 3 @@ -195,6 +195,7 @@ enum { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 +#endif /* * Multicast: diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index a372627..f30db09 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -164,6 +164,7 @@ struct input_keymap_entry { #define INPUT_PROP_DIRECT 0x01 /* direct input devices */ #define INPUT_PROP_BUTTONPAD 0x02 /* has button(s) under pad */ #define INPUT_PROP_SEMI_MT 0x03 /* touch rectangle only */ +#define INPUT_PROP_TOPBUTTONPAD 0x04 /* softbuttons at top of pad */ #define INPUT_PROP_MAX 0x1f #define INPUT_PROP_CNT (INPUT_PROP_MAX + 1) diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index 335e8a7..e28807a 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -69,6 +69,7 @@ #define __UAPI_DEF_SOCKADDR_IN6 0 #define __UAPI_DEF_IPV6_MREQ 0 #define __UAPI_DEF_IPPROTO_V6 0 +#define __UAPI_DEF_IPV6_OPTIONS 0 #else @@ -82,9 +83,16 @@ #define __UAPI_DEF_SOCKADDR_IN6 1 #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 +#define __UAPI_DEF_IPV6_OPTIONS 1 #endif /* _NETINET_IN_H */ +/* Definitions for xattr.h */ +#if defined(_SYS_XATTR_H) +#define __UAPI_DEF_XATTR 0 +#else +#define __UAPI_DEF_XATTR 1 +#endif /* If we did not see any headers from any supported C libraries, * or we are being included in the kernel, then define everything @@ -97,6 +105,10 @@ #define __UAPI_DEF_SOCKADDR_IN6 1 #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 +#define __UAPI_DEF_IPV6_OPTIONS 1 + +/* Definitions for xattr.h */ +#define __UAPI_DEF_XATTR 1 #endif /* __GLIBC__ */ diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 5dda450..2ec9fbc 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -6,6 +6,8 @@ #define XT_BPF_MAX_NUM_INSTR 64 +struct sk_filter; + struct xt_bpf_info { __u16 bpf_program_num_elem; struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild index 6cb4ea8..4cc4d6e 100644 --- a/include/uapi/linux/usb/Kbuild +++ b/include/uapi/linux/usb/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list header-y += audio.h header-y += cdc.h +header-y += cdc-wdm.h header-y += ch11.h header-y += ch9.h header-y += functionfs.h diff --git a/include/uapi/linux/usb/cdc-wdm.h b/include/uapi/linux/usb/cdc-wdm.h index f03134f..0dc132e 100644 --- a/include/uapi/linux/usb/cdc-wdm.h +++ b/include/uapi/linux/usb/cdc-wdm.h @@ -9,6 +9,8 @@ #ifndef _UAPI__LINUX_USB_CDC_WDM_H #define _UAPI__LINUX_USB_CDC_WDM_H +#include <linux/types.h> + /* * This IOCTL is used to retrieve the wMaxCommand for the device, * defining the message limit for both reading and writing. diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 437f1b0..c5e2c7d 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1059,14 +1059,14 @@ struct v4l2_bt_timings { /* A few useful defines to calculate the total blanking and frame sizes */ #define V4L2_DV_BT_BLANKING_WIDTH(bt) \ - (bt->hfrontporch + bt->hsync + bt->hbackporch) + ((bt)->hfrontporch + (bt)->hsync + (bt)->hbackporch) #define V4L2_DV_BT_FRAME_WIDTH(bt) \ - (bt->width + V4L2_DV_BT_BLANKING_WIDTH(bt)) + ((bt)->width + V4L2_DV_BT_BLANKING_WIDTH(bt)) #define V4L2_DV_BT_BLANKING_HEIGHT(bt) \ - (bt->vfrontporch + bt->vsync + bt->vbackporch + \ - bt->il_vfrontporch + bt->il_vsync + bt->il_vbackporch) + ((bt)->vfrontporch + (bt)->vsync + (bt)->vbackporch + \ + (bt)->il_vfrontporch + (bt)->il_vsync + (bt)->il_vbackporch) #define V4L2_DV_BT_FRAME_HEIGHT(bt) \ - (bt->height + V4L2_DV_BT_BLANKING_HEIGHT(bt)) + ((bt)->height + V4L2_DV_BT_BLANKING_HEIGHT(bt)) /** struct v4l2_dv_timings - DV timings * @type: the type of the timings diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h index e4629b9..f2765c1 100644 --- a/include/uapi/linux/xattr.h +++ b/include/uapi/linux/xattr.h @@ -7,11 +7,18 @@ Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ + +#include <linux/libc-compat.h> + #ifndef _UAPI_LINUX_XATTR_H #define _UAPI_LINUX_XATTR_H +#if __UAPI_DEF_XATTR +#define __USE_KERNEL_XATTR_DEFS + #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */ +#endif /* Namespaces */ #define XATTR_OS2_PREFIX "os2." diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 99b80ab..3066718 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -34,6 +34,7 @@ #define RDMA_USER_CM_H #include <linux/types.h> +#include <linux/socket.h> #include <linux/in6.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_user_sa.h> diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h index 5759810..21eed48 100644 --- a/include/uapi/sound/compress_offload.h +++ b/include/uapi/sound/compress_offload.h @@ -80,7 +80,7 @@ struct snd_compr_tstamp { struct snd_compr_avail { __u64 avail; struct snd_compr_tstamp tstamp; -}; +} __attribute__((packed)); enum snd_compr_direction { SND_COMPRESS_PLAYBACK = 0, diff --git a/init/Kconfig b/init/Kconfig index 54eb5b6..836b34e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1409,6 +1409,7 @@ config FUTEX config HAVE_FUTEX_CMPXCHG bool + depends on FUTEX help Architectures should select this if futex_atomic_cmpxchg_inatomic() is implemented and always working. This removes a couple of runtime diff --git a/init/main.c b/init/main.c index e8087f7..0db119a 100644 --- a/init/main.c +++ b/init/main.c @@ -612,6 +612,10 @@ asmlinkage void __init start_kernel(void) if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); #endif +#ifdef CONFIG_X86_ESPFIX64 + /* Should be run before the first non-init thread is created */ + init_espfix_bsp(); +#endif thread_info_cache_init(); cred_init(); fork_init(totalram_pages); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index b0e99de..a0f0ab2 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -123,7 +123,6 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; - size_t lenp_bef = *lenp; int oldval; int rc; @@ -133,7 +132,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); - if (write && !rc && lenp_bef == *lenp) { + if (write && !rc) { int newval = *((int *)(ipc_table.data)); /* * The file "auto_msgmni" has correctly been set. diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 8bb92eb..b867a1c 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -220,6 +220,9 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE endif +config ARCH_SUPPORTS_ATOMIC_RMW + bool + config MUTEX_SPIN_ON_OWNER def_bool y - depends on SMP && !DEBUG_MUTEXES && !PREEMPT_RT_FULL + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL diff --git a/kernel/audit.c b/kernel/audit.c index 6def25f..4059e94 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -593,13 +593,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_TTY_SET: case AUDIT_TRIM: case AUDIT_MAKE_EQUIV: - if (!capable(CAP_AUDIT_CONTROL)) + if (!netlink_capable(skb, CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - if (!capable(CAP_AUDIT_WRITE)) + if (!netlink_capable(skb, CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ @@ -1412,7 +1412,7 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) audit_log_format(ab, " %s=", prefix); CAP_FOR_EACH_U32(i) { audit_log_format(ab, "%08x", - cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]); + cap->cap[CAP_LAST_U32 - i]); } } @@ -1613,10 +1613,10 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) spin_unlock_irq(&tsk->sighand->siglock); audit_log_format(ab, - " ppid=%ld pid=%d auid=%u uid=%u gid=%u" + " ppid=%d pid=%d auid=%u uid=%u gid=%u" " euid=%u suid=%u fsuid=%u" - " egid=%u sgid=%u fsgid=%u ses=%u tty=%s", - sys_getppid(), + " egid=%u sgid=%u fsgid=%u tty=%s ses=%u", + task_ppid_nr(tsk), tsk->pid, from_kuid(&init_user_ns, audit_get_loginuid(tsk)), from_kuid(&init_user_ns, cred->uid), @@ -1627,7 +1627,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) from_kgid(&init_user_ns, cred->egid), from_kgid(&init_user_ns, cred->sgid), from_kgid(&init_user_ns, cred->fsgid), - audit_get_sessionid(tsk), tty); + tty, audit_get_sessionid(tsk)); get_task_comm(name, tsk); audit_log_format(ab, " comm="); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 43c307d..00c4459 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count) chunk->owners[i].index = i; } fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); + chunk->mark.mask = FS_IN_IGNORED; return chunk; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index f7aee8b..dfd2f4a 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -423,9 +423,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, f->lsm_rule = NULL; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) { + if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; + entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } err = audit_field_valid(entry, f); @@ -601,6 +602,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->buflen += data->values[i] = audit_pack_string(&bufp, krule->filterkey); break; + case AUDIT_LOGINUID_SET: + if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) { + data->fields[i] = AUDIT_LOGINUID; + data->values[i] = AUDIT_UID_UNSET; + break; + } + /* fallthrough if set */ default: data->values[i] = f->val; } @@ -617,6 +625,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) int i; if (a->flags != b->flags || + a->pflags != b->pflags || a->listnr != b->listnr || a->action != b->action || a->field_count != b->field_count) @@ -735,6 +744,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) new = &entry->rule; new->vers_ops = old->vers_ops; new->flags = old->flags; + new->pflags = old->pflags; new->listnr = old->listnr; new->action = old->action; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9845cb3..979c00b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -472,7 +472,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_PPID: if (ctx) { if (!ctx->ppid) - ctx->ppid = sys_getppid(); + ctx->ppid = task_ppid_nr(tsk); result = audit_comparator(ctx->ppid, f->op, f->val); } break; @@ -733,6 +733,22 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) return AUDIT_BUILD_CONTEXT; } +static int audit_in_mask(const struct audit_krule *rule, unsigned long val) +{ + int word, bit; + + if (val > 0xffffffff) + return false; + + word = AUDIT_WORD(val); + if (word >= AUDIT_BITMASK_SIZE) + return false; + + bit = AUDIT_BIT(val); + + return rule->mask[word] & bit; +} + /* At syscall entry and exit time, this filter is called if the * audit_state is not low enough that auditing cannot take place, but is * also not high enough that we already know we have to write an audit @@ -750,11 +766,8 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, rcu_read_lock(); if (!list_empty(list)) { - int word = AUDIT_WORD(ctx->major); - int bit = AUDIT_BIT(ctx->major); - list_for_each_entry_rcu(e, list, list) { - if ((e->rule.mask[word] & bit) == bit && + if (audit_in_mask(&e->rule, ctx->major) && audit_filter_rules(tsk, &e->rule, ctx, NULL, &state, false)) { rcu_read_unlock(); @@ -774,20 +787,16 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, static int audit_filter_inode_name(struct task_struct *tsk, struct audit_names *n, struct audit_context *ctx) { - int word, bit; int h = audit_hash_ino((u32)n->ino); struct list_head *list = &audit_inode_hash[h]; struct audit_entry *e; enum audit_state state; - word = AUDIT_WORD(ctx->major); - bit = AUDIT_BIT(ctx->major); - if (list_empty(list)) return 0; list_for_each_entry_rcu(e, list, list) { - if ((e->rule.mask[word] & bit) == bit && + if (audit_in_mask(&e->rule, ctx->major) && audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) { ctx->current_state = state; return 1; diff --git a/kernel/capability.c b/kernel/capability.c index 4e66bf9..50fb74b 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -268,6 +268,10 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) i++; } + effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + new = prepare_creds(); if (!new) return -ENOMEM; @@ -433,23 +437,19 @@ bool capable(int cap) EXPORT_SYMBOL(capable); /** - * inode_capable - Check superior capability over inode + * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question * @cap: The capability in question * - * Return true if the current task has the given superior capability - * targeted at it's own user namespace and that the given inode is owned - * by the current user namespace or a child namespace. - * - * Currently we check to see if an inode is owned by the current - * user namespace by seeing if the inode's owner maps into the - * current user namespace. - * + * Return true if the current task has the given capability targeted at + * its own user namespace and that the given inode's uid and gid are + * mapped into the current user namespace. */ -bool inode_capable(const struct inode *inode, int cap) +bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid); + return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); } -EXPORT_SYMBOL(inode_capable); +EXPORT_SYMBOL(capable_wrt_inode_uidgid); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1c204fd..5d9d542 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1012,7 +1012,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) parent = dentry->d_parent; spin_lock(&parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - list_del_init(&dentry->d_u.d_child); + list_del_init(&dentry->d_child); spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); remove_dir(dentry); diff --git a/kernel/cpu.c b/kernel/cpu.c index ba7416b..6d2cab1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1017,10 +1017,12 @@ void set_cpu_present(unsigned int cpu, bool present) void set_cpu_online(unsigned int cpu, bool online) { - if (online) + if (online) { cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); - else + cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + } else { cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); + } } void set_cpu_active(unsigned int cpu, bool active) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5ae9f95..c828913 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,12 +61,7 @@ #include <linux/cgroup.h> #include <linux/wait.h> -/* - * Tracks how many cpusets are currently defined in system. - * When there is only one cpuset (the root cpuset) we can - * short circuit some hooks. - */ -int number_of_cpusets __read_mostly; +struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; /* See "Frequency meter" comments, below. */ @@ -611,7 +606,7 @@ static int generate_sched_domains(cpumask_var_t **domains, goto done; } - csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); + csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; @@ -1022,7 +1017,7 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, task_lock(tsk); /* * Determine if a loop is necessary if another thread is doing - * get_mems_allowed(). If at least one node remains unchanged and + * read_mems_allowed_begin(). If at least one node remains unchanged and * tsk does not have a mempolicy, then an empty nodemask will not be * possible when mems_allowed is larger than a word. */ @@ -1236,7 +1231,13 @@ done: int current_cpuset_is_being_rebound(void) { - return task_cs(current) == cpuset_being_rebound; + int ret; + + rcu_read_lock(); + ret = task_cs(current) == cpuset_being_rebound; + rcu_read_unlock(); + + return ret; } static int update_relax_domain_level(struct cpuset *cs, s64 val) @@ -1980,7 +1981,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); - number_of_cpusets++; + cpuset_inc(); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -2031,7 +2032,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); - number_of_cpusets--; + cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); @@ -2086,7 +2087,6 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG(); - number_of_cpusets = 1; return 0; } diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 0506d44..e911ec6 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -49,6 +49,7 @@ #include <linux/pid.h> #include <linux/smp.h> #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/rcupdate.h> #include <asm/cacheflush.h> @@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) if (!CACHE_FLUSH_IS_SAFE) return; - if (current->mm && current->mm->mmap_cache) { - flush_cache_range(current->mm->mmap_cache, - addr, addr + BREAK_INSTR_SIZE); + if (current->mm) { + int i; + + for (i = 0; i < VMACACHE_SIZE; i++) { + if (!current->vmacache[i]) + continue; + flush_cache_range(current->vmacache[i], + addr, addr + BREAK_INSTR_SIZE); + } } + /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 40c3397..f5aa00f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -39,6 +39,7 @@ #include <linux/hw_breakpoint.h> #include <linux/mm_types.h> #include <linux/cgroup.h> +#include <linux/compat.h> #include "internal.h" @@ -1428,6 +1429,11 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +struct remove_event { + struct perf_event *event; + bool detach_group; +}; + /* * Cross CPU call to remove a performance event * @@ -1436,12 +1442,15 @@ group_sched_out(struct perf_event *group_event, */ static int __perf_remove_from_context(void *info) { - struct perf_event *event = info; + struct remove_event *re = info; + struct perf_event *event = re->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); raw_spin_lock(&ctx->lock); event_sched_out(event, cpuctx, ctx); + if (re->detach_group) + perf_group_detach(event); list_del_event(event, ctx); if (!ctx->nr_events && cpuctx->task_ctx == ctx) { ctx->is_active = 0; @@ -1466,10 +1475,14 @@ static int __perf_remove_from_context(void *info) * When called from perf_event_exit_task, it's OK because the * context has been detached from its task. */ -static void perf_remove_from_context(struct perf_event *event) +static void perf_remove_from_context(struct perf_event *event, bool detach_group) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; + struct remove_event re = { + .event = event, + .detach_group = detach_group, + }; lockdep_assert_held(&ctx->mutex); @@ -1478,12 +1491,12 @@ static void perf_remove_from_context(struct perf_event *event) * Per cpu events are removed via an smp call and * the removal is always successful. */ - cpu_function_call(event->cpu, __perf_remove_from_context, event); + cpu_function_call(event->cpu, __perf_remove_from_context, &re); return; } retry: - if (!task_function_call(task, __perf_remove_from_context, event)) + if (!task_function_call(task, __perf_remove_from_context, &re)) return; raw_spin_lock_irq(&ctx->lock); @@ -1493,6 +1506,11 @@ retry: */ if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; goto retry; } @@ -1500,6 +1518,8 @@ retry: * Since the task isn't running, its safe to remove the event, us * holding the ctx->lock ensures the task won't get scheduled in. */ + if (detach_group) + perf_group_detach(event); list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); } @@ -1925,6 +1945,11 @@ retry: */ if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; goto retry; } @@ -2212,9 +2237,6 @@ static void __perf_event_sync_stat(struct perf_event *event, perf_event_update_userpage(next_event); } -#define list_next_entry(pos, member) \ - list_entry(pos->member.next, typeof(*pos), member) - static void perf_event_sync_stat(struct perf_event_context *ctx, struct perf_event_context *next_ctx) { @@ -3235,10 +3257,7 @@ int perf_event_release_kernel(struct perf_event *event) * to trigger the AB-BA case. */ mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); - raw_spin_lock_irq(&ctx->lock); - perf_group_detach(event); - raw_spin_unlock_irq(&ctx->lock); - perf_remove_from_context(event); + perf_remove_from_context(event, true); mutex_unlock(&ctx->mutex); free_event(event); @@ -3614,6 +3633,26 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return 0; } +#ifdef CONFIG_COMPAT +static long perf_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PERF_EVENT_IOC_SET_FILTER): + case _IOC_NR(PERF_EVENT_IOC_ID): + /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */ + if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~IOCSIZE_MASK; + cmd |= sizeof(void *) << IOCSIZE_SHIFT; + } + break; + } + return perf_ioctl(file, cmd, arg); +} +#else +# define perf_compat_ioctl NULL +#endif + int perf_event_task_enable(void) { struct perf_event *event; @@ -4107,7 +4146,7 @@ static const struct file_operations perf_fops = { .read = perf_read, .poll = perf_poll, .unlocked_ioctl = perf_ioctl, - .compat_ioctl = perf_ioctl, + .compat_ioctl = perf_compat_ioctl, .mmap = perf_mmap, .fasync = perf_fasync, }; @@ -5343,6 +5382,9 @@ struct swevent_htable { /* Recursion avoidance in each contexts */ int recursion[PERF_NR_CONTEXTS]; + + /* Keeps track of cpu being initialized/exited */ + bool online; }; static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); @@ -5589,8 +5631,14 @@ static int perf_swevent_add(struct perf_event *event, int flags) hwc->state = !(flags & PERF_EF_START); head = find_swevent_head(swhash, event); - if (WARN_ON_ONCE(!head)) + if (!head) { + /* + * We can race with cpu hotplug code. Do not + * WARN if the cpu just got unplugged. + */ + WARN_ON_ONCE(swhash->online); return -EINVAL; + } hlist_add_head_rcu(&event->hlist_entry, head); @@ -6957,6 +7005,9 @@ SYSCALL_DEFINE5(perf_event_open, if (attr.freq) { if (attr.sample_freq > sysctl_perf_event_sample_rate) return -EINVAL; + } else { + if (attr.sample_period & (1ULL << 63)) + return -EINVAL; } /* @@ -7100,7 +7151,7 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_context *gctx = group_leader->ctx; mutex_lock(&gctx->mutex); - perf_remove_from_context(group_leader); + perf_remove_from_context(group_leader, false); /* * Removing from the context ends up with disabled @@ -7110,7 +7161,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_remove_from_context(sibling); + perf_remove_from_context(sibling, false); perf_event__state_init(sibling); put_ctx(gctx); } @@ -7123,11 +7174,11 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { synchronize_rcu(); - perf_install_in_context(ctx, group_leader, event->cpu); + perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_install_in_context(ctx, sibling, event->cpu); + perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } } @@ -7242,7 +7293,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) mutex_lock(&src_ctx->mutex); list_for_each_entry_safe(event, tmp, &src_ctx->event_list, event_entry) { - perf_remove_from_context(event); + perf_remove_from_context(event, false); unaccount_event_cpu(event, src_cpu); put_ctx(src_ctx); list_add(&event->migrate_entry, &events); @@ -7304,13 +7355,7 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - if (child_event->parent) { - raw_spin_lock_irq(&child_ctx->lock); - perf_group_detach(child_event); - raw_spin_unlock_irq(&child_ctx->lock); - } - - perf_remove_from_context(child_event); + perf_remove_from_context(child_event, !!child_event->parent); /* * It can happen that the parent exits first, and has events @@ -7748,8 +7793,10 @@ int perf_event_init_task(struct task_struct *child) for_each_task_context_nr(ctxn) { ret = perf_event_init_context(child, ctxn); - if (ret) + if (ret) { + perf_event_free_task(child); return ret; + } } return 0; @@ -7772,6 +7819,7 @@ static void perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); + swhash->online = true; if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; @@ -7794,14 +7842,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { + struct remove_event re = { .detach_group = false }; struct perf_event_context *ctx = __info; - struct perf_event *event; perf_pmu_rotate_stop(ctx->pmu); rcu_read_lock(); - list_for_each_entry_rcu(event, &ctx->event_list, event_entry) - __perf_remove_from_context(event); + list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) + __perf_remove_from_context(&re); rcu_read_unlock(); } @@ -7829,6 +7877,7 @@ static void perf_event_exit_cpu(int cpu) perf_event_exit_cpu_context(cpu); mutex_lock(&swhash->hlist_mutex); + swhash->online = false; swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ad8e1bd..8176caf 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1511,7 +1511,6 @@ bool uprobe_deny_signal(void) if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { utask->state = UTASK_SSTEP_TRAPPED; set_tsk_thread_flag(t, TIF_UPROBE); - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } } diff --git a/kernel/exit.c b/kernel/exit.c index 1403fdc..3b93e6a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -74,6 +74,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) __this_cpu_dec(process_counts); } list_del_rcu(&p->thread_group); + list_del_rcu(&p->thread_node); } /* diff --git a/kernel/fork.c b/kernel/fork.c index ae9a1a4..b16bb05 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -28,6 +28,8 @@ #include <linux/mman.h> #include <linux/mmu_notifier.h> #include <linux/fs.h> +#include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/nsproxy.h> #include <linux/capability.h> #include <linux/cpu.h> @@ -376,7 +378,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; - mm->mmap_cache = NULL; + mm->vmacache_seqnum = 0; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -908,6 +910,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) if (!oldmm) return 0; + /* initialize the new vmacache entries */ + vmacache_flush(tsk); + if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; @@ -1064,6 +1069,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); atomic_set(&sig->sigcnt, 1); + + /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ + sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); + tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); + init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); @@ -1355,7 +1365,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_policy; retval = audit_alloc(p); if (retval) - goto bad_fork_cleanup_policy; + goto bad_fork_cleanup_perf; /* copy all the process information */ retval = copy_semundo(clone_flags, p); if (retval) @@ -1510,6 +1520,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, atomic_inc(¤t->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); + list_add_tail_rcu(&p->thread_node, + &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; @@ -1517,7 +1529,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); + syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) @@ -1552,8 +1566,9 @@ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); -bad_fork_cleanup_policy: +bad_fork_cleanup_perf: perf_event_free_task(p); +bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: @@ -1636,10 +1651,12 @@ long do_fork(unsigned long clone_flags, */ if (!IS_ERR(p)) { struct completion vfork; + struct pid *pid; trace_sched_process_fork(current, p); - nr = task_pid_vnr(p); + pid = get_task_pid(p, PIDTYPE_PID); + nr = pid_vnr(pid); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); @@ -1654,12 +1671,14 @@ long do_fork(unsigned long clone_flags, /* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) - ptrace_event(trace, nr); + ptrace_event_pid(trace, pid); if (clone_flags & CLONE_VFORK) { if (!wait_for_vfork_done(p, &vfork)) - ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); + ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); } + + put_pid(pid); } else { nr = PTR_ERR(p); } diff --git a/kernel/freezer.c b/kernel/freezer.c index aa6a8aa..8f9279b 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_struct *p) if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) return false; + if (test_thread_flag(TIF_MEMDIE)) + return false; + if (pm_nosig_freezing || cgroup_freezing(p)) return true; diff --git a/kernel/futex.c b/kernel/futex.c index 639692f..2d98dc9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -596,6 +596,55 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock_irq(&curr->pi_lock); } +/* + * We need to check the following states: + * + * Waiter | pi_state | pi->owner | uTID | uODIED | ? + * + * [1] NULL | --- | --- | 0 | 0/1 | Valid + * [2] NULL | --- | --- | >0 | 0/1 | Valid + * + * [3] Found | NULL | -- | Any | 0/1 | Invalid + * + * [4] Found | Found | NULL | 0 | 1 | Valid + * [5] Found | Found | NULL | >0 | 1 | Invalid + * + * [6] Found | Found | task | 0 | 1 | Valid + * + * [7] Found | Found | NULL | Any | 0 | Invalid + * + * [8] Found | Found | task | ==taskTID | 0/1 | Valid + * [9] Found | Found | task | 0 | 0 | Invalid + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid + * + * [1] Indicates that the kernel can acquire the futex atomically. We + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. + * + * [2] Valid, if TID does not belong to a kernel thread. If no matching + * thread is found then it indicates that the owner TID has died. + * + * [3] Invalid. The waiter is queued on a non PI futex + * + * [4] Valid state after exit_robust_list(), which sets the user space + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. + * + * [5] The user space value got manipulated between exit_robust_list() + * and exit_pi_state_list() + * + * [6] Valid state after exit_pi_state_list() which sets the new owner in + * the pi_state but cannot access the user space value. + * + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. + * + * [8] Owner and user space value match + * + * [9] There is no transient state which sets the user space TID to 0 + * except exit_robust_list(), but this is indicated by the + * FUTEX_OWNER_DIED bit. See [4] + * + * [10] There is no transient state which leaves owner and user space + * TID out of sync. + */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) @@ -611,12 +660,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* - * Another waiter already exists - bump up - * the refcount and return its pi_state: + * Sanity check the waiter before increasing + * the refcount and attaching to it. */ pi_state = this->pi_state; /* - * Userspace might have messed up non-PI and PI futexes + * Userspace might have messed up non-PI and + * PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; @@ -624,34 +674,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!atomic_read(&pi_state->refcount)); /* - * When pi_state->owner is NULL then the owner died - * and another waiter is on the fly. pi_state->owner - * is fixed up by the task which acquires - * pi_state->rt_mutex. - * - * We do not check for pid == 0 which can happen when - * the owner died and robust_list_exit() cleared the - * TID. + * Handle the owner died case: */ - if (pid && pi_state->owner) { + if (uval & FUTEX_OWNER_DIED) { + /* + * exit_pi_state_list sets owner to NULL and + * wakes the topmost waiter. The task which + * acquires the pi_state->rt_mutex will fixup + * owner. + */ + if (!pi_state->owner) { + /* + * No pi state owner, but the user + * space TID is not 0. Inconsistent + * state. [5] + */ + if (pid) + return -EINVAL; + /* + * Take a ref on the state and + * return. [4] + */ + goto out_state; + } + /* - * Bail out if user space manipulated the - * futex value. + * If TID is 0, then either the dying owner + * has not yet executed exit_pi_state_list() + * or some waiter acquired the rtmutex in the + * pi state, but did not yet fixup the TID in + * user space. + * + * Take a ref on the state and return. [6] */ - if (pid != task_pid_vnr(pi_state->owner)) + if (!pid) + goto out_state; + } else { + /* + * If the owner died bit is not set, + * then the pi_state must have an + * owner. [7] + */ + if (!pi_state->owner) return -EINVAL; } + /* + * Bail out if user space manipulated the + * futex value. If pi state exists then the + * owner TID must be the same as the user + * space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + + out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; - return 0; } } /* * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 + * the new pi_state to it, but bail out when TID = 0 [1] */ if (!pid) return -ESRCH; @@ -659,6 +745,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, if (!p) return -ESRCH; + if (!p->mm) { + put_task_struct(p); + return -EPERM; + } + /* * We need to look at the task state flags to figure out, * whether the task is exiting. To protect against the do_exit @@ -679,6 +770,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return ret; } + /* + * No existing pi state. First waiter. [2] + */ pi_state = alloc_pi_state(); /* @@ -750,10 +844,18 @@ retry: return -EDEADLK; /* - * Surprise - we got the lock. Just return to userspace: + * Surprise - we got the lock, but we do not trust user space at all. */ - if (unlikely(!curval)) - return 1; + if (unlikely(!curval)) { + /* + * We verify whether there is kernel state for this + * futex. If not, we can safely assume, that the 0 -> + * TID transition is correct. If state exists, we do + * not bother to fixup the user space state as it was + * corrupted already. + */ + return futex_top_waiter(hb, key) ? -EINVAL : 1; + } uval = curval; @@ -883,6 +985,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; + int ret = 0; if (!pi_state) return -EINVAL; @@ -906,23 +1009,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = this->task; /* - * We pass it to the next owner. (The WAITERS bit is always - * kept enabled while there is PI state around. We must also - * preserve the owner died bit.) + * We pass it to the next owner. The WAITERS bit is always + * kept enabled while there is PI state around. We cleanup the + * owner died bit, because we are the owner. */ - if (!(uval & FUTEX_OWNER_DIED)) { - int ret = 0; + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; - if (ret) { - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - return ret; - } + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + ret = -EFAULT; + else if (curval != uval) + ret = -EINVAL; + if (ret) { + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + return ret; } raw_spin_lock_irq(&pi_state->owner->pi_lock); @@ -1201,7 +1300,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * * Return: * 0 - failed to acquire the lock atomically; - * 1 - acquired the lock; + * >0 - acquired the lock, return value is vpid of the top_waiter * <0 - error */ static int futex_proxy_trylock_atomic(u32 __user *pifutex, @@ -1212,7 +1311,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, { struct futex_q *top_waiter = NULL; u32 curval; - int ret; + int ret, vpid; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; @@ -1240,11 +1339,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * the contended case or if set_waiters is 1. The pi_state is returned * in ps in contended cases. */ + vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, set_waiters); - if (ret == 1) + if (ret == 1) { requeue_pi_wake_futex(top_waiter, key2, hb2); - + return vpid; + } return ret; } @@ -1276,10 +1377,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - u32 curval2; if (requeue_pi) { /* + * Requeue PI only works on two distinct uaddrs. This + * check is only valid for private futexes. See below. + */ + if (uaddr1 == uaddr2) + return -EINVAL; + + /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. */ @@ -1317,6 +1424,15 @@ retry: if (unlikely(ret != 0)) goto out_put_key1; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (requeue_pi && match_futex(&key1, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1362,16 +1478,25 @@ retry_private: * At this point the top_waiter has either taken uaddr2 or is * waiting on it. If the former, then the pi_state will not * exist yet, look it up one more time to ensure we have a - * reference to it. + * reference to it. If the lock was taken, ret contains the + * vpid of the top waiter task. */ - if (ret == 1) { + if (ret > 0) { WARN_ON(pi_state); drop_count++; task_count++; - ret = get_futex_value_locked(&curval2, uaddr2); - if (!ret) - ret = lookup_pi_state(curval2, hb2, &key2, - &pi_state); + /* + * If we acquired the lock, then the user + * space value of uaddr2 should be vpid. It + * cannot be changed by the top waiter as it + * is blocked on hb2 lock if it tries to do + * so. If something fiddled with it behind our + * back the pi state lookup might unearth + * it. So we rather use the known value than + * rereading and handing potential crap to + * lookup_pi_state. + */ + ret = lookup_pi_state(ret, hb2, &key2, &pi_state); } switch (ret) { @@ -2151,9 +2276,10 @@ retry: /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking - * anyone else up: + * anyone else up. We only try this if neither the waiters nor + * the owner died bit are set. */ - if (!(uval & FUTEX_OWNER_DIED) && + if (!(uval & ~FUTEX_TID_MASK) && cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* @@ -2185,11 +2311,9 @@ retry: /* * No waiters - kernel unlocks the futex: */ - if (!(uval & FUTEX_OWNER_DIED)) { - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; - } + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; out_unlock: spin_unlock(&hb->lock); @@ -2347,6 +2471,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (ret) goto out_key2; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (match_futex(&q.key, &key2)) { + queue_unlock(&q, hb); + ret = -EINVAL; + goto out_put_keys; + } + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); diff --git a/kernel/groups.c b/kernel/groups.c index 90cf1c3..67b4ba3 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -6,6 +6,7 @@ #include <linux/slab.h> #include <linux/security.h> #include <linux/syscalls.h> +#include <linux/user_namespace.h> #include <asm/uaccess.h> /* init to 2 - one for init_task, one to ensure it is never freed */ @@ -223,6 +224,14 @@ out: return i; } +bool may_setgroups(void) +{ + struct user_namespace *user_ns = current_user_ns(); + + return ns_capable(user_ns, CAP_SETGID) && + userns_may_setgroups(user_ns); +} + /* * SMP: Our groups are copy-on-write. We can set them safely * without another task interfering. @@ -233,7 +242,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c19183d..c967b71 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -248,6 +248,11 @@ again: goto again; } timer->base = new_base; + } else { + if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { + cpu = this_cpu; + goto again; + } } return new_base; } @@ -583,6 +588,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) cpu_base->expires_next.tv64 = expires_next.tv64; + /* + * If a hang was detected in the last timer interrupt then we + * leave the hang delay active in the hardware. We want the + * system to make progress. That also prevents the following + * scenario: + * T1 expires 50ms from now + * T2 expires 5s from now + * + * T1 is removed, so this code is called and would reprogram + * the hardware to 5s from now. Any hrtimer_start after that + * will not reprogram the hardware due to hang_detected being + * set. So we'd effectivly block all timers until the T2 event + * fires. + */ + if (cpu_base->hang_detected) + return; + if (cpu_base->expires_next.tv64 != KTIME_MAX) tick_program_event(cpu_base->expires_next, 1); } @@ -1069,11 +1091,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Remove an active timer from the queue: */ ret = remove_hrtimer(timer, base); - /* Switch the timer base, if necessary: */ - new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - if (mode & HRTIMER_MODE_REL) { - tim = ktime_add_safe(tim, new_base->get_time()); + tim = ktime_add_safe(tim, base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in @@ -1086,6 +1105,11 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, #endif } + hrtimer_set_expires_range_ns(timer, tim, delta_ns); + + /* Switch the timer base, if necessary: */ + new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); + #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST { ktime_t now = new_base->get_time(); @@ -1097,8 +1121,6 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } #endif - hrtimer_set_expires_range_ns(timer, tim, delta_ns); - timer_stats_hrtimer_set_start_info(timer); leftmost = enqueue_hrtimer(timer, new_base); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 001fa5b..8a160e8 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -74,6 +74,14 @@ extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); extern void mask_irq(struct irq_desc *desc); extern void unmask_irq(struct irq_desc *desc); +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } +#endif + extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8ab8e93..07d4551 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -131,6 +131,16 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif +void irq_lock_sparse(void) +{ + mutex_lock(&sparse_irq_lock); +} + +void irq_unlock_sparse(void) +{ + mutex_unlock(&sparse_irq_lock); +} + static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; @@ -167,6 +177,12 @@ static void free_desc(unsigned int irq) unregister_irq_proc(irq, desc); + /* + * sparse_irq_lock protects also show_interrupts() and + * kstat_irq_usr(). Once we deleted the descriptor from the + * sparse tree we can free it. Access in proc will fail to + * lookup the descriptor. + */ mutex_lock(&sparse_irq_lock); delete_irq_desc(irq); mutex_unlock(&sparse_irq_lock); @@ -489,6 +505,15 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu + * @irq: The interrupt number + * @cpu: The cpu number + * + * Returns the sum of interrupt counts on @cpu since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); @@ -497,6 +522,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +/** + * kstat_irqs - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -509,3 +542,22 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } + +/** + * kstat_irqs_usr - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. Contrary to kstat_irqs() this can be called from any + * preemptible context. It's protected against concurrent removal of + * an interrupt descriptor when sparse irqs are enabled. + */ +unsigned int kstat_irqs_usr(unsigned int irq) +{ + int sum; + + irq_lock_sparse(); + sum = kstat_irqs(irq); + irq_unlock_sparse(); + return sum; +} diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 252bf10..11be231 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -152,7 +152,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_chip *chip = irq_data_get_irq_chip(data); int ret; - ret = chip->irq_set_affinity(data, mask, false); + ret = chip->irq_set_affinity(data, mask, force); switch (ret) { case IRQ_SET_MASK_OK: cpumask_copy(data->affinity, mask); @@ -220,7 +220,8 @@ static inline void init_helper_thread(void) { } #endif -int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) +int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, + bool force) { struct irq_chip *chip = irq_data_get_irq_chip(data); struct irq_desc *desc = irq_data_to_desc(data); @@ -230,7 +231,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = irq_do_set_affinity(data, mask, false); + ret = irq_do_set_affinity(data, mask, force); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -255,13 +256,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return ret; } -/** - * irq_set_affinity - Set the irq affinity of a given irq - * @irq: Interrupt to set affinity - * @mask: cpumask - * - */ -int irq_set_affinity(unsigned int irq, const struct cpumask *mask) +int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -271,7 +266,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) return -EINVAL; raw_spin_lock_irqsave(&desc->lock, flags); - ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); + ret = irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask, force); raw_spin_unlock_irqrestore(&desc->lock, flags); return ret; } @@ -944,8 +939,8 @@ static int irq_thread(void *data) irq_thread_check_affinity(desc, action); action_ret = handler_fn(desc, action); - if (!noirqdebug) - note_interrupt(action->irq, desc, action_ret); + if (action_ret == IRQ_HANDLED) + atomic_inc(&desc->threads_handled); #ifdef CONFIG_PREEMPT_RT_FULL migrate_disable(); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 36f6ee1..095cd72 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -15,6 +15,23 @@ #include "internals.h" +/* + * Access rules: + * + * procfs protects read/write of /proc/irq/N/ files against a + * concurrent free of the interrupt descriptor. remove_proc_entry() + * immediately prevents new read/writes to happen and waits for + * already running read/write functions to complete. + * + * We remove the proc entries first and then delete the interrupt + * descriptor from the radix tree and free it. So it is guaranteed + * that irq_to_desc(N) is valid as long as the read/writes are + * permitted by procfs. + * + * The read from /proc/interrupts is a different problem because there + * is no protection. So the lookup and the access to irqdesc + * information must be protected by sparse_irq_lock. + */ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP @@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } + irq_lock_sparse(); desc = irq_to_desc(i); if (!desc) - return 0; + goto outsparse; raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) @@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: raw_spin_unlock_irqrestore(&desc->lock, flags); +outsparse: + irq_unlock_sparse(); return 0; } #endif diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index e5a309a..ca47be0 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -265,21 +265,119 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc, return action && (action->flags & IRQF_IRQPOLL); } +#define SPURIOUS_DEFERRED 0x80000000 + void note_interrupt(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret) { if (desc->istate & IRQS_POLL_INPROGRESS) return; - /* we get here again via the threaded handler */ - if (action_ret == IRQ_WAKE_THREAD) - return; - if (bad_action_ret(action_ret)) { report_bad_irq(irq, desc, action_ret); return; } + /* + * We cannot call note_interrupt from the threaded handler + * because we need to look at the compound of all handlers + * (primary and threaded). Aside of that in the threaded + * shared case we have no serialization against an incoming + * hardware interrupt while we are dealing with a threaded + * result. + * + * So in case a thread is woken, we just note the fact and + * defer the analysis to the next hardware interrupt. + * + * The threaded handlers store whether they sucessfully + * handled an interrupt and we check whether that number + * changed versus the last invocation. + * + * We could handle all interrupts with the delayed by one + * mechanism, but for the non forced threaded case we'd just + * add pointless overhead to the straight hardirq interrupts + * for the sake of a few lines less code. + */ + if (action_ret & IRQ_WAKE_THREAD) { + /* + * There is a thread woken. Check whether one of the + * shared primary handlers returned IRQ_HANDLED. If + * not we defer the spurious detection to the next + * interrupt. + */ + if (action_ret == IRQ_WAKE_THREAD) { + int handled; + /* + * We use bit 31 of thread_handled_last to + * denote the deferred spurious detection + * active. No locking necessary as + * thread_handled_last is only accessed here + * and we have the guarantee that hard + * interrupts are not reentrant. + */ + if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) { + desc->threads_handled_last |= SPURIOUS_DEFERRED; + return; + } + /* + * Check whether one of the threaded handlers + * returned IRQ_HANDLED since the last + * interrupt happened. + * + * For simplicity we just set bit 31, as it is + * set in threads_handled_last as well. So we + * avoid extra masking. And we really do not + * care about the high bits of the handled + * count. We just care about the count being + * different than the one we saw before. + */ + handled = atomic_read(&desc->threads_handled); + handled |= SPURIOUS_DEFERRED; + if (handled != desc->threads_handled_last) { + action_ret = IRQ_HANDLED; + /* + * Note: We keep the SPURIOUS_DEFERRED + * bit set. We are handling the + * previous invocation right now. + * Keep it for the current one, so the + * next hardware interrupt will + * account for it. + */ + desc->threads_handled_last = handled; + } else { + /* + * None of the threaded handlers felt + * responsible for the last interrupt + * + * We keep the SPURIOUS_DEFERRED bit + * set in threads_handled_last as we + * need to account for the current + * interrupt as well. + */ + action_ret = IRQ_NONE; + } + } else { + /* + * One of the primary handlers returned + * IRQ_HANDLED. So we don't care about the + * threaded handlers on the same line. Clear + * the deferred detection bit. + * + * In theory we could/should check whether the + * deferred bit is set and take the result of + * the previous run into account here as + * well. But it's really not worth the + * trouble. If every other interrupt is + * handled we never trigger the spurious + * detector. And if this is just the one out + * of 100k unhandled ones which is handled + * then we merily delay the spurious detection + * by one hard interrupt. Not a real problem. + */ + desc->threads_handled_last &= ~SPURIOUS_DEFERRED; + } + } + if (unlikely(action_ret == IRQ_NONE)) { /* * If we are seeing only the odd spurious IRQ caused by diff --git a/kernel/kcmp.c b/kernel/kcmp.c index e30ac0f..0aa69ea 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -44,11 +44,12 @@ static long kptr_obfuscate(long v, int type) */ static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) { - long ret; + long t1, t2; - ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); + t1 = kptr_obfuscate((long)v1, type); + t2 = kptr_obfuscate((long)v2, type); - return (ret < 0) | ((ret > 0) << 1); + return (t1 < t2) | ((t1 > t2) << 1); } /* The caller must have pinned the task */ diff --git a/kernel/kexec.c b/kernel/kexec.c index 355e13a..4c9dcff 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1681,6 +1681,14 @@ int kernel_kexec(void) kexec_in_progress = true; kernel_restart_prepare(NULL); migrate_to_reboot_cpu(); + + /* + * migrate_to_reboot_cpu() disables CPU hotplug assuming that + * no further code needs to use CPU hotplug (which is true in + * the reboot case). However, the kexec path depends on using + * CPU hotplug again; so re-enable it here. + */ + cpu_hotplug_enable(); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); } diff --git a/kernel/module.c b/kernel/module.c index dc58274..f3c612e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1882,7 +1882,9 @@ static void free_module(struct module *mod) /* We leave it in list to prevent duplicate loads, but make sure * that noone uses it while it's being deconstructed. */ + mutex_lock(&module_mutex); mod->state = MODULE_STATE_UNFORMED; + mutex_unlock(&module_mutex); /* Remove dynamic debug info */ ddebug_remove_module(mod->name); @@ -3296,6 +3298,9 @@ static int load_module(struct load_info *info, const char __user *uargs, dynamic_debug_setup(info->debug, info->num_debug); + /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ + ftrace_module_init(mod); + /* Finally it's fully formed, ready to start executing. */ err = complete_formation(mod, info); if (err) diff --git a/kernel/pid.c b/kernel/pid.c index 9b9a266..82430c8 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -341,6 +341,8 @@ out: out_unlock: spin_unlock_irq(&pidmap_lock); + put_pid_ns(ns); + out_free: while (++i <= ns->level) free_pidmap(pid->numbers + i); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a22b931..5218a7d 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -636,6 +636,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, goto out; } } else { + memset(&event.sigev_value, 0, sizeof(event.sigev_value)); event.sigev_notify = SIGEV_SIGNAL; event.sigev_signo = SIGALRM; event.sigev_value.sival_int = new_timer->it_id; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index d26958b..9c312ed 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -496,8 +496,14 @@ int hibernation_restore(int platform_mode) error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); - dpm_resume_end(PMSG_RECOVER); + /* + * The above should either succeed and jump to the new kernel, + * or return with an error. Otherwise things are just + * undefined, so let's be paranoid. + */ + BUG_ON(!error); } + dpm_resume_end(PMSG_RECOVER); pm_restore_gfp_mask(); ftrace_start(); resume_console(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 1d1bf63..3ae41cd 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -293,12 +293,12 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, { char *s = buf; #ifdef CONFIG_SUSPEND - int i; + suspend_state_t i; + + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (pm_states[i].state) + s += sprintf(s,"%s ", pm_states[i].label); - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (pm_states[i] && valid_state(i)) - s += sprintf(s,"%s ", pm_states[i]); - } #endif #ifdef CONFIG_HIBERNATION s += sprintf(s, "%s\n", "disk"); @@ -314,7 +314,7 @@ static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_MIN; - const char * const *s; + struct pm_sleep_state *s; #endif char *p; int len; @@ -328,8 +328,9 @@ static suspend_state_t decode_state(const char *buf, size_t n) #ifdef CONFIG_SUSPEND for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) - return state; + if (s->state && len == strlen(s->label) + && !strncmp(buf, s->label, len)) + return s->state; #endif return PM_SUSPEND_ON; @@ -447,8 +448,8 @@ static ssize_t autosleep_show(struct kobject *kobj, #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) - return sprintf(buf, "%s\n", valid_state(state) ? - pm_states[state] : "error"); + return sprintf(buf, "%s\n", pm_states[state].state ? + pm_states[state].label : "error"); #endif #ifdef CONFIG_HIBERNATION return sprintf(buf, "disk\n"); diff --git a/kernel/power/power.h b/kernel/power/power.h index 7d4b7ff..f770cad3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -175,17 +175,20 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND +struct pm_sleep_state { + const char *label; + suspend_state_t state; +}; + /* kernel/power/suspend.c */ -extern const char *const pm_states[]; +extern struct pm_sleep_state pm_states[]; -extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); #else /* !CONFIG_SUSPEND */ static inline int suspend_devices_and_enter(suspend_state_t state) { return -ENOSYS; } -static inline bool valid_state(suspend_state_t state) { return false; } #endif /* !CONFIG_SUSPEND */ #ifdef CONFIG_PM_TEST_SUSPEND diff --git a/kernel/power/process.c b/kernel/power/process.c index 06ec886..f1fe7ec 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -107,6 +107,28 @@ static int try_to_freeze_tasks(bool user_only) return todo ? -EBUSY : 0; } +/* + * Returns true if all freezable tasks (except for current) are frozen already + */ +static bool check_frozen_processes(void) +{ + struct task_struct *g, *p; + bool ret = true; + + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + if (p != current && !freezer_should_skip(p) && + !frozen(p)) { + ret = false; + goto done; + } + } +done: + read_unlock(&tasklist_lock); + + return ret; +} + /** * freeze_processes - Signal user space processes to enter the refrigerator. * The current thread will not be frozen. The same process that calls @@ -117,6 +139,7 @@ static int try_to_freeze_tasks(bool user_only) int freeze_processes(void) { int error; + int oom_kills_saved; error = __usermodehelper_disable(UMH_FREEZING); if (error) @@ -130,12 +153,27 @@ int freeze_processes(void) printk("Freezing user space processes ... "); pm_freezing = true; + oom_kills_saved = oom_kills_count(); error = try_to_freeze_tasks(true); if (!error) { - printk("done."); __usermodehelper_set_disable_depth(UMH_DISABLED); oom_killer_disable(); + + /* + * There might have been an OOM kill while we were + * freezing tasks and the killed task might be still + * on the way out so we have to double check for race. + */ + if (oom_kills_count() != oom_kills_saved && + !check_frozen_processes()) { + __usermodehelper_set_disable_depth(UMH_ENABLED); + printk("OOM in progress."); + error = -EBUSY; + goto done; + } + printk("done."); } +done: printk("\n"); BUG_ON(in_atomic()); @@ -184,6 +222,7 @@ void thaw_processes(void) printk("Restarting tasks ... "); + __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); read_lock(&tasklist_lock); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index b38109e..5e2bde1 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -730,6 +730,25 @@ static void mark_nosave_pages(struct memory_bitmap *bm) } } +static bool is_nosave_page(unsigned long pfn) +{ + struct nosave_region *region; + + list_for_each_entry(region, &nosave_regions, list) { + if (pfn >= region->start_pfn && pfn < region->end_pfn) { + pr_err("PM: %#010llx in e820 nosave region: " + "[mem %#010llx-%#010llx]\n", + (unsigned long long) pfn << PAGE_SHIFT, + (unsigned long long) region->start_pfn << PAGE_SHIFT, + ((unsigned long long) region->end_pfn << PAGE_SHIFT) + - 1); + return true; + } + } + + return false; +} + /** * create_basic_memory_bitmaps - create bitmaps needed for marking page * frames that should not be saved and free page frames. The pointers @@ -1774,7 +1793,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) do { pfn = memory_bm_next_pfn(bm); if (likely(pfn != BM_END_OF_MAP)) { - if (likely(pfn_valid(pfn))) + if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn)) swsusp_set_page_free(pfn_to_page(pfn)); else return -EFAULT; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index e6703bb..c2372ed 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -29,10 +29,10 @@ #include "power.h" -const char *const pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_FREEZE] = "freeze", - [PM_SUSPEND_STANDBY] = "standby", - [PM_SUSPEND_MEM] = "mem", +struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_FREEZE] = { .label = "freeze", .state = PM_SUSPEND_FREEZE }, + [PM_SUSPEND_STANDBY] = { .label = "standby", }, + [PM_SUSPEND_MEM] = { .label = "mem", }, }; static const struct platform_suspend_ops *suspend_ops; @@ -62,42 +62,34 @@ void freeze_wake(void) } EXPORT_SYMBOL_GPL(freeze_wake); +static bool valid_state(suspend_state_t state) +{ + /* + * PM_SUSPEND_STANDBY and PM_SUSPEND_MEM states need low level + * support and need to be valid to the low level + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + /** * suspend_set_ops - Set the global suspend method table. * @ops: Suspend operations to use. */ void suspend_set_ops(const struct platform_suspend_ops *ops) { + suspend_state_t i; + lock_system_sleep(); + suspend_ops = ops; + for (i = PM_SUSPEND_STANDBY; i <= PM_SUSPEND_MEM; i++) + pm_states[i].state = valid_state(i) ? i : 0; + unlock_system_sleep(); } EXPORT_SYMBOL_GPL(suspend_set_ops); -bool valid_state(suspend_state_t state) -{ - if (state == PM_SUSPEND_FREEZE) { -#ifdef CONFIG_PM_DEBUG - if (pm_test_level != TEST_NONE && - pm_test_level != TEST_FREEZER && - pm_test_level != TEST_DEVICES && - pm_test_level != TEST_PLATFORM) { - printk(KERN_WARNING "Unsupported pm_test mode for " - "freeze state, please choose " - "none/freezer/devices/platform.\n"); - return false; - } -#endif - return true; - } - /* - * PM_SUSPEND_STANDBY and PM_SUSPEND_MEMORY states need lowlevel - * support and need to be valid to the lowlevel - * implementation, no valid callback implies that none are valid. - */ - return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); -} - /** * suspend_valid_only_mem - Generic memory-only valid callback. * @@ -328,9 +320,17 @@ static int enter_state(suspend_state_t state) { int error; - if (!valid_state(state)) - return -ENODEV; - + if (state == PM_SUSPEND_FREEZE) { +#ifdef CONFIG_PM_DEBUG + if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) { + pr_warning("PM: Unsupported test mode for freeze state," + "please choose none/freezer/devices/platform.\n"); + return -EAGAIN; + } +#endif + } else if (!valid_state(state)) { + return -EINVAL; + } if (!mutex_trylock(&pm_mutex)) return -EBUSY; @@ -341,7 +341,7 @@ static int enter_state(suspend_state_t state) sys_sync(); printk("done.\n"); - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label); error = suspend_prepare(state); if (error) goto Unlock; @@ -349,7 +349,7 @@ static int enter_state(suspend_state_t state) if (suspend_test(TEST_FREEZER)) goto Finish; - pr_debug("PM: Entering %s sleep\n", pm_states[state]); + pr_debug("PM: Entering %s sleep\n", pm_states[state].label); pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); pm_restore_gfp_mask(); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 9b2a1d5..269b097 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -92,13 +92,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) } if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); if (status == -ENODEV) state = PM_SUSPEND_STANDBY; } if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state]); + printk(info_test, pm_states[state].label); status = pm_suspend(state); } if (status < 0) @@ -136,18 +136,16 @@ static char warn_bad_state[] __initdata = static int __init setup_test_suspend(char *value) { - unsigned i; + suspend_state_t i; /* "=mem" ==> "mem" */ value++; - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (!pm_states[i]) - continue; - if (strcmp(pm_states[i], value) != 0) - continue; - test_state = (__force suspend_state_t) i; - return 0; - } + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + if (!strcmp(pm_states[i].label, value)) { + test_state = pm_states[i].state; + return 0; + } + printk(warn_bad_state, value); return 0; } @@ -164,8 +162,8 @@ static int __init test_suspend(void) /* PM is initialized by now; is that state testable? */ if (test_state == PM_SUSPEND_ON) goto done; - if (!valid_state(test_state)) { - printk(warn_bad_state, pm_states[test_state]); + if (!pm_states[test_state].state) { + printk(warn_bad_state, pm_states[test_state].label); goto done; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 981ff61..985ec66 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2560,7 +2560,7 @@ void wake_up_klogd(void) preempt_enable(); } -int printk_sched(const char *fmt, ...) +int printk_deferred(const char *fmt, ...) { unsigned long flags; va_list args; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 507fab1..978857a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1148,6 +1148,22 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) } /* + * Awaken the grace-period kthread for the specified flavor of RCU. + * Don't do a self-awaken, and don't bother awakening when there is + * nothing for the grace-period kthread to do (as in several CPUs + * raced to awaken, and we lost), and finally don't try to awaken + * a kthread that has not yet been created. + */ +static void rcu_gp_kthread_wake(struct rcu_state *rsp) +{ + if (current == rsp->gp_kthread || + !ACCESS_ONCE(rsp->gp_flags) || + !rsp->gp_kthread) + return; + swait_wake(&rsp->gp_wq); +} + +/* * If there is room, assign a ->completed number to any callbacks on * this CPU that have not already been assigned. Also accelerate any * callbacks that were previously assigned a ->completed number that has @@ -1545,7 +1561,7 @@ static void rsp_wakeup(struct irq_work *work) struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); /* Wake up rcu_gp_kthread() to start the grace period. */ - swait_wake(&rsp->gp_wq); + rcu_gp_kthread_wake(rsp); } /* @@ -1619,7 +1635,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); - swait_wake(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ + rcu_gp_kthread_wake(rsp); } /* @@ -2189,8 +2205,7 @@ static void force_quiescent_state(struct rcu_state *rsp) } rsp->gp_flags |= RCU_GP_FLAG_FQS; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); - /* Memory barrier implied by wake_up() path. */ - swait_wake(&rsp->gp_wq); + rcu_gp_kthread_wake(rsp); } /* diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h index 14193d5..ab29b6a 100644 --- a/kernel/rtmutex-debug.h +++ b/kernel/rtmutex-debug.h @@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, { return (waiter != NULL); } + +static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) +{ + debug_rt_mutex_print_deadlock(w); +} diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 4057bc6..5c70d79 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -95,6 +95,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) owner = *p; } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); } + +/* + * Safe fastpath aware unlock: + * 1) Clear the waiters bit + * 2) Drop lock->wait_lock + * 3) Try to unlock the lock with cmpxchg + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) + __releases(lock->wait_lock) +{ + struct task_struct *owner = rt_mutex_owner(lock); + + clear_rt_mutex_waiters(lock); + raw_spin_unlock(&lock->wait_lock); + /* + * If a new waiter comes in between the unlock and the cmpxchg + * we have two situations: + * + * unlock(wait_lock); + * lock(wait_lock); + * cmpxchg(p, owner, 0) == owner + * mark_rt_mutex_waiters(lock); + * acquire(lock); + * or: + * + * unlock(wait_lock); + * lock(wait_lock); + * mark_rt_mutex_waiters(lock); + * + * cmpxchg(p, owner, 0) != owner + * enqueue_waiter(); + * unlock(wait_lock); + * lock(wait_lock); + * wake waiter(); + * unlock(wait_lock); + * lock(wait_lock); + * acquire(lock); + */ + return rt_mutex_cmpxchg(lock, owner, NULL); +} + #else # define rt_mutex_cmpxchg(l,c,n) (0) static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) @@ -102,6 +143,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) lock->owner = (struct task_struct *) ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); } + +/* + * Simple slow path only version: lock->owner is protected by lock->wait_lock. + */ +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) + __releases(lock->wait_lock) +{ + lock->owner = NULL; + raw_spin_unlock(&lock->wait_lock); + return true; +} #endif static inline void init_lists(struct rt_mutex *lock) @@ -181,27 +233,37 @@ static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) */ int max_lock_depth = 1024; +static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) +{ + struct rt_mutex_waiter *waiter = p->pi_blocked_on; + return rt_mutex_real_waiter(waiter) ? waiter->lock : NULL; +} + /* * Adjust the priority chain. Also used for deadlock detection. * Decreases task's usage by one - may thus free the task. * - * @task: the task owning the mutex (owner) for which a chain walk is probably - * needed + * @task: the task owning the mutex (owner) for which a chain walk is + * probably needed * @deadlock_detect: do we have to carry out deadlock detection? - * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck - * things for a task that has just got its priority adjusted, and - * is waiting on a mutex) + * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck + * things for a task that has just got its priority adjusted, and + * is waiting on a mutex) + * @next_lock: the mutex on which the owner of @orig_lock was blocked before + * we dropped its pi_lock. Is never dereferenced, only used for + * comparison to detect lock chain changes. * @orig_waiter: rt_mutex_waiter struct for the task that has just donated - * its priority to the mutex owner (can be NULL in the case - * depicted above or if the top waiter is gone away and we are - * actually deboosting the owner) - * @top_task: the current top waiter + * its priority to the mutex owner (can be NULL in the case + * depicted above or if the top waiter is gone away and we are + * actually deboosting the owner) + * @top_task: the current top waiter * * Returns 0 or -EDEADLK. */ static int rt_mutex_adjust_prio_chain(struct task_struct *task, int deadlock_detect, struct rt_mutex *orig_lock, + struct rt_mutex *next_lock, struct rt_mutex_waiter *orig_waiter, struct task_struct *top_task) { @@ -235,7 +297,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } put_task_struct(task); - return deadlock_detect ? -EDEADLK : 0; + return -EDEADLK; } retry: /* @@ -260,13 +322,32 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto out_unlock_pi; /* + * We dropped all locks after taking a refcount on @task, so + * the task might have moved on in the lock chain or even left + * the chain completely and blocks now on an unrelated lock or + * on @orig_lock. + * + * We stored the lock on which @task was blocked in @next_lock, + * so we can detect the chain change. + */ + if (next_lock != waiter->lock) + goto out_unlock_pi; + + /* * Drop out, when the task has no waiters. Note, * top_waiter can be NULL, when we are in the deboosting * mode! */ - if (top_waiter && (!task_has_pi_waiters(task) || - top_waiter != task_top_pi_waiter(task))) - goto out_unlock_pi; + if (top_waiter) { + if (!task_has_pi_waiters(task)) + goto out_unlock_pi; + /* + * If deadlock detection is off, we stop here if we + * are not the top pi waiter of the task. + */ + if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) + goto out_unlock_pi; + } /* * When deadlock detection is off then we check, if further @@ -282,11 +363,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto retry; } - /* Deadlock detection */ + /* + * Deadlock detection. If the lock is the same as the original + * lock which caused us to walk the lock chain or if the + * current lock is owned by the task which initiated the chain + * walk, we detected a deadlock. + */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); raw_spin_unlock(&lock->wait_lock); - ret = deadlock_detect ? -EDEADLK : 0; + ret = -EDEADLK; goto out_unlock_pi; } @@ -335,11 +421,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, __rt_mutex_adjust_prio(task); } + /* + * Check whether the task which owns the current lock is pi + * blocked itself. If yes we store a pointer to the lock for + * the lock chain change detection above. After we dropped + * task->pi_lock next_lock cannot be dereferenced anymore. + */ + next_lock = task_blocked_on_lock(task); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); top_waiter = rt_mutex_top_waiter(lock); raw_spin_unlock(&lock->wait_lock); + /* + * We reached the end of the lock chain. Stop right here. No + * point to go back just to figure that out. + */ + if (!next_lock) + goto out_put_task; + if (!detect_deadlock && waiter != top_waiter) goto out_put_task; @@ -478,8 +579,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; - unsigned long flags; + struct rt_mutex *next_lock; int chain_walk = 0, res; + unsigned long flags; + + /* + * Early deadlock detection. We really don't want the task to + * enqueue on itself just to untangle the mess later. It's not + * only an optimization. We drop the locks, so another waiter + * can come in before the chain walk detects the deadlock. So + * the other will detect the deadlock and return -EDEADLOCK, + * which is wrong, as the other waiter is not in a deadlock + * situation. + */ + if (owner == task) + return -EDEADLK; raw_spin_lock_irqsave(&task->pi_lock, flags); @@ -517,20 +631,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, if (!owner) return 0; + raw_spin_lock_irqsave(&owner->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { - raw_spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); __rt_mutex_adjust_prio(owner); if (rt_mutex_real_waiter(owner->pi_blocked_on)) chain_walk = 1; - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); - } - else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) + } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { chain_walk = 1; + } - if (!chain_walk) + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); + + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + /* + * Even if full deadlock detection is on, if the owner is not + * blocked itself, we can avoid finding this out in the chain + * walk. + */ + if (!chain_walk || !next_lock) return 0; /* @@ -542,8 +664,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - task); + res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, + next_lock, waiter, task); raw_spin_lock(&lock->wait_lock); @@ -553,7 +675,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, /* * Wake up the next waiter on the lock. * - * Remove the top waiter from the current tasks waiter list and wake it up. + * Remove the top waiter from the current tasks pi waiter list and + * wake it up. * * Called with lock->wait_lock held. */ @@ -574,10 +697,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock) */ plist_del(&waiter->pi_list_entry, ¤t->pi_waiters); - rt_mutex_set_owner(lock, NULL); + /* + * As we are waking up the top waiter, and the waiter stays + * queued on the lock until it gets the lock, this lock + * obviously has waiters. Just set the bit here and this has + * the added benefit of forcing all new tasks into the + * slow path making sure no task of lower priority than + * the top waiter can steal this lock. + */ + lock->owner = (void *) RT_MUTEX_HAS_WAITERS; raw_spin_unlock_irqrestore(¤t->pi_lock, flags); + /* + * It's safe to dereference waiter as it cannot go away as + * long as we hold lock->wait_lock. The waiter task needs to + * acquire it in order to dequeue the waiter. + */ rt_mutex_wake_waiter(waiter); } @@ -592,8 +728,8 @@ static void remove_waiter(struct rt_mutex *lock, { int first = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); + struct rt_mutex *next_lock = NULL; unsigned long flags; - int chain_walk = 0; raw_spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); @@ -617,15 +753,15 @@ static void remove_waiter(struct rt_mutex *lock, } __rt_mutex_adjust_prio(owner); - if (rt_mutex_real_waiter(owner->pi_blocked_on)) - chain_walk = 1; + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); raw_spin_unlock_irqrestore(&owner->pi_lock, flags); } WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); - if (!chain_walk) + if (!next_lock) return; /* gets dropped in rt_mutex_adjust_prio_chain()! */ @@ -633,7 +769,7 @@ static void remove_waiter(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); + rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); raw_spin_lock(&lock->wait_lock); } @@ -646,6 +782,7 @@ static void remove_waiter(struct rt_mutex *lock, void rt_mutex_adjust_pi(struct task_struct *task) { struct rt_mutex_waiter *waiter; + struct rt_mutex *next_lock; unsigned long flags; raw_spin_lock_irqsave(&task->pi_lock, flags); @@ -656,11 +793,13 @@ void rt_mutex_adjust_pi(struct task_struct *task) raw_spin_unlock_irqrestore(&task->pi_lock, flags); return; } + next_lock = waiter->lock; /* gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(task); raw_spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); + + rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); } #ifdef CONFIG_PREEMPT_RT_FULL @@ -952,12 +1091,12 @@ int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) return 0; + migrate_disable(); rt_spin_lock(lock); - if (atomic_dec_and_test(atomic)){ - migrate_disable(); + if (atomic_dec_and_test(atomic)) return 1; - } rt_spin_unlock(lock); + migrate_enable(); return 0; } EXPORT_SYMBOL(atomic_dec_and_spin_lock); @@ -1143,6 +1282,26 @@ static void ww_mutex_account_lock(struct rt_mutex *lock, } #endif +static void rt_mutex_handle_deadlock(int res, int detect_deadlock, + struct rt_mutex_waiter *w) +{ + /* + * If the result is not -EDEADLOCK or the caller requested + * deadlock detection, nothing to do here. + */ + if (res != -EDEADLOCK || detect_deadlock) + return; + + /* + * Yell lowdly and stop the task right here. + */ + rt_mutex_print_deadlock(w); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } +} + /* * Slow path lock function: */ @@ -1183,10 +1342,13 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, set_current_state(TASK_RUNNING); - if (unlikely(ret)) + if (unlikely(ret)) { remove_waiter(lock, &waiter); - else if (ww_ctx) + rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); + + } else if (ww_ctx) { ww_mutex_account_lock(lock, ww_ctx); + } /* * try_to_take_rt_mutex() sets the waiter bit @@ -1244,12 +1406,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock) rt_mutex_deadlock_account_unlock(current); - if (!rt_mutex_has_waiters(lock)) { - lock->owner = NULL; - raw_spin_unlock(&lock->wait_lock); - return; + /* + * We must be careful here if the fast path is enabled. If we + * have no waiters queued we cannot set owner to NULL here + * because of: + * + * foo->lock->owner = NULL; + * rtmutex_lock(foo->lock); <- fast path + * free = atomic_dec_and_test(foo->refcnt); + * rtmutex_unlock(foo->lock); <- fast path + * if (free) + * kfree(foo); + * raw_spin_unlock(foo->lock->wait_lock); + * + * So for the fastpath enabled kernel: + * + * Nothing can set the waiters bit as long as we hold + * lock->wait_lock. So we do the following sequence: + * + * owner = rt_mutex_owner(lock); + * clear_rt_mutex_waiters(lock); + * raw_spin_unlock(&lock->wait_lock); + * if (cmpxchg(&lock->owner, owner, 0) == owner) + * return; + * goto retry; + * + * The fastpath disabled variant is simple as all access to + * lock->owner is serialized by lock->wait_lock: + * + * lock->owner = NULL; + * raw_spin_unlock(&lock->wait_lock); + */ + while (!rt_mutex_has_waiters(lock)) { + /* Drops lock->wait_lock ! */ + if (unlock_rt_mutex_safe(lock) == true) + return; + /* Relock the rtmutex and try again */ + raw_spin_lock(&lock->wait_lock); } + /* + * The wakeup next waiter path does not suffer from the above + * race. See the comments there. + */ wakeup_next_waiter(lock); raw_spin_unlock(&lock->wait_lock); @@ -1548,7 +1747,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, raw_spin_unlock_irq(&task->pi_lock); #endif - ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + /* We enforce deadlock detection for futexes */ + ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); if (ret && !rt_mutex_owner(lock)) { /* diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h index a1a1dd0..f6a1f3c 100644 --- a/kernel/rtmutex.h +++ b/kernel/rtmutex.h @@ -24,3 +24,8 @@ #define debug_rt_mutex_print_deadlock(w) do { } while (0) #define debug_rt_mutex_detect_deadlock(w,d) (d) #define debug_rt_mutex_reset_waiter(w) do { } while (0) + +static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) +{ + WARN(1, "rtmutex deadlock detected\n"); +} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8749d20..af7418b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1284,17 +1284,11 @@ out: * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", + printk_deferred("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } } - /* - * Clear PF_NO_SETAFFINITY, otherwise we wreckage - * migrate_disable/enable. See optimization for - * PF_NO_SETAFFINITY tasks there. - */ - p->flags &= ~PF_NO_SETAFFINITY; return dest_cpu; } @@ -2651,9 +2645,8 @@ need_resched: static inline void sched_submit_work(struct task_struct *tsk) { - if (!tsk->state || tsk_is_pi_blocked(tsk)) + if (!tsk->state) return; - /* * If a worker went to sleep, notify and ask workqueue whether * it wants to wake up a task to maintain concurrency. @@ -2661,6 +2654,10 @@ static inline void sched_submit_work(struct task_struct *tsk) if (tsk->flags & PF_WQ_WORKER) wq_worker_sleeping(tsk); + + if (tsk_is_pi_blocked(tsk)) + return; + /* * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. @@ -5084,7 +5081,6 @@ static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, void *hcpu) { switch (action & ~CPU_TASKS_FROZEN) { - case CPU_STARTING: case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 8b836b3..3031bac 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -70,8 +70,7 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, int idx = 0; int task_pri = convert_prio(p->prio); - if (task_pri >= MAX_RT_PRIO) - return 0; + BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); for (idx = 0; idx < task_pri; idx++) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1681f49..760e149 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -326,50 +326,50 @@ out: * softirq as those do not count in task exec_runtime any more. */ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) + struct rq *rq, int ticks) { - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); + cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); + u64 cputime = (__force u64) cputime_one_jiffy; u64 *cpustat = kcpustat_this_cpu->cpustat; if (steal_account_process_tick()) return; + cputime *= ticks; + scaled *= ticks; + if (irqtime_account_hi_update()) { - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_IRQ] += cputime; } else if (irqtime_account_si_update()) { - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; + cpustat[CPUTIME_SOFTIRQ] += cputime; } else if (this_cpu_ksoftirqd() == p) { /* * ksoftirqd time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. * Also, p->stime needs to be updated for ksoftirqd. */ - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SOFTIRQ); + __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); } else if (user_tick) { - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_user_time(p, cputime, scaled); } else if (p == rq->idle) { - account_idle_time(cputime_one_jiffy); + account_idle_time(cputime); } else if (p->flags & PF_VCPU) { /* System time or guest time */ - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); + account_guest_time(p, cputime, scaled); } else { - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, - CPUTIME_SYSTEM); + __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); } } static void irqtime_account_idle_ticks(int ticks) { - int i; struct rq *rq = this_rq(); - for (i = 0; i < ticks; i++) - irqtime_account_process_tick(current, 0, rq); + irqtime_account_process_tick(current, 0, rq, ticks); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ static inline void irqtime_account_idle_ticks(int ticks) {} static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, - struct rq *rq) {} + struct rq *rq, int nr_ticks) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ /* @@ -458,7 +458,7 @@ void account_process_tick(struct task_struct *p, int user_tick) return; if (sched_clock_irqtime) { - irqtime_account_process_tick(p, user_tick, rq); + irqtime_account_process_tick(p, user_tick, rq, 1); return; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 70812af..f10a1ec 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -557,7 +557,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) avg_atom = p->se.sum_exec_runtime; if (nr_switches) - do_div(avg_atom, nr_switches); + avg_atom = div64_ul(avg_atom, nr_switches); else avg_atom = -1LL; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0af1448..773c5db 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1579,13 +1579,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, } wakeup = 0; } else { - /* - * Task re-woke on same cpu (or else migrate_task_rq_fair() - * would have made count negative); we must be careful to avoid - * double-accounting blocked time after synchronizing decays. - */ - se->avg.last_runnable_update += __synchronize_entity_decay(se) - << 20; + __synchronize_entity_decay(se); } /* migrated tasks did not contribute to our blocked load */ @@ -4410,6 +4404,7 @@ static unsigned long scale_rt_power(int cpu) { struct rq *rq = cpu_rq(cpu); u64 total, available, age_stamp, avg; + s64 delta; /* * Since we're reading these variables without serialization make sure @@ -4418,7 +4413,11 @@ static unsigned long scale_rt_power(int cpu) age_stamp = ACCESS_ONCE(rq->age_stamp); avg = ACCESS_ONCE(rq->rt_avg); - total = sched_avg_period() + (rq_clock(rq) - age_stamp); + delta = rq_clock(rq) - age_stamp; + if (unlikely(delta < 0)) + delta = 0; + + total = sched_avg_period() + delta; if (unlikely(total < avg)) { /* Ensures that power won't end up being negative */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 240fc60..eebb35e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -830,7 +830,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (!once) { once = true; - printk_sched("sched: RT throttling activated\n"); + printk_deferred("sched: RT throttling activated\n"); } } else { /* diff --git a/kernel/smp.c b/kernel/smp.c index 0564571..7d1187c 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -650,7 +650,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), if (cond_func(cpu, info)) { ret = smp_call_function_single(cpu, func, info, wait); - WARN_ON_ONCE(!ret); + WARN_ON_ONCE(ret); } preempt_enable(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2a9db91..1677410 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -138,7 +138,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; static int minolduid; -static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; @@ -1287,7 +1286,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(percpu_pagelist_fraction), .mode = 0644, .proc_handler = percpu_pagelist_fraction_sysctl_handler, - .extra1 = &min_percpu_pagelist_fract, + .extra1 = &zero, }, #ifdef CONFIG_MMU { diff --git a/kernel/time.c b/kernel/time.c index 7c7964c..3eb322e 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -195,6 +195,10 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, if (tv) { if (copy_from_user(&user_tv, tv, sizeof(*tv))) return -EFAULT; + + if (!timeval_valid(&user_tv)) + return -EINVAL; + new_ts.tv_sec = user_tv.tv_sec; new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; } @@ -496,17 +500,20 @@ EXPORT_SYMBOL(usecs_to_jiffies); * that a remainder subtract here would not do the right thing as the * resolution values don't fall on second boundries. I.e. the line: * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding. + * Note that due to the small error in the multiplier here, this + * rounding is incorrect for sufficiently large values of tv_nsec, but + * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're + * OK. * * Rather, we just shift the bits off the right. * * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec * value to a scaled second value. */ -unsigned long -timespec_to_jiffies(const struct timespec *value) +static unsigned long +__timespec_to_jiffies(unsigned long sec, long nsec) { - unsigned long sec = value->tv_sec; - long nsec = value->tv_nsec + TICK_NSEC - 1; + nsec = nsec + TICK_NSEC - 1; if (sec >= MAX_SEC_IN_JIFFIES){ sec = MAX_SEC_IN_JIFFIES; @@ -517,6 +524,13 @@ timespec_to_jiffies(const struct timespec *value) (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; } + +unsigned long +timespec_to_jiffies(const struct timespec *value) +{ + return __timespec_to_jiffies(value->tv_sec, value->tv_nsec); +} + EXPORT_SYMBOL(timespec_to_jiffies); void @@ -533,31 +547,27 @@ jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) } EXPORT_SYMBOL(jiffies_to_timespec); -/* Same for "timeval" +/* + * We could use a similar algorithm to timespec_to_jiffies (with a + * different multiplier for usec instead of nsec). But this has a + * problem with rounding: we can't exactly add TICK_NSEC - 1 to the + * usec value, since it's not necessarily integral. * - * Well, almost. The problem here is that the real system resolution is - * in nanoseconds and the value being converted is in micro seconds. - * Also for some machines (those that use HZ = 1024, in-particular), - * there is a LARGE error in the tick size in microseconds. - - * The solution we use is to do the rounding AFTER we convert the - * microsecond part. Thus the USEC_ROUND, the bits to be shifted off. - * Instruction wise, this should cost only an additional add with carry - * instruction above the way it was done above. + * We could instead round in the intermediate scaled representation + * (i.e. in units of 1/2^(large scale) jiffies) but that's also + * perilous: the scaling introduces a small positive error, which + * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1 + * units to the intermediate before shifting) leads to accidental + * overflow and overestimates. + * + * At the cost of one additional multiplication by a constant, just + * use the timespec implementation. */ unsigned long timeval_to_jiffies(const struct timeval *value) { - unsigned long sec = value->tv_sec; - long usec = value->tv_usec; - - if (sec >= MAX_SEC_IN_JIFFIES){ - sec = MAX_SEC_IN_JIFFIES; - usec = 0; - } - return (((u64)sec * SEC_CONVERSION) + - (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> - (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; + return __timespec_to_jiffies(value->tv_sec, + value->tv_usec * NSEC_PER_USEC); } EXPORT_SYMBOL(timeval_to_jiffies); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 88c9c65..cd45a07 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -464,18 +464,26 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, ktime_t now) { + unsigned long flags; struct k_itimer *ptr = container_of(alarm, struct k_itimer, it.alarm.alarmtimer); - if (posix_timer_event(ptr, 0) != 0) - ptr->it_overrun++; + enum alarmtimer_restart result = ALARMTIMER_NORESTART; + + spin_lock_irqsave(&ptr->it_lock, flags); + if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) { + if (posix_timer_event(ptr, 0) != 0) + ptr->it_overrun++; + } /* Re-add periodic timers */ if (ptr->it.alarm.interval.tv64) { ptr->it_overrun += alarm_forward(alarm, now, ptr->it.alarm.interval); - return ALARMTIMER_RESTART; + result = ALARMTIMER_RESTART; } - return ALARMTIMER_NORESTART; + spin_unlock_irqrestore(&ptr->it_lock, flags); + + return result; } /** @@ -541,18 +549,22 @@ static int alarm_timer_create(struct k_itimer *new_timer) * @new_timer: k_itimer pointer * @cur_setting: itimerspec data to fill * - * Copies the itimerspec data out from the k_itimer + * Copies out the current itimerspec data */ static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { - memset(cur_setting, 0, sizeof(struct itimerspec)); + ktime_t relative_expiry_time = + alarm_expires_remaining(&(timr->it.alarm.alarmtimer)); - cur_setting->it_interval = - ktime_to_timespec(timr->it.alarm.interval); - cur_setting->it_value = - ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires); - return; + if (ktime_to_ns(relative_expiry_time) > 0) { + cur_setting->it_value = ktime_to_timespec(relative_expiry_time); + } else { + cur_setting->it_value.tv_sec = 0; + cur_setting->it_value.tv_nsec = 0; + } + + cur_setting->it_interval = ktime_to_timespec(timr->it.alarm.interval); } /** @@ -585,9 +597,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, struct itimerspec *new_setting, struct itimerspec *old_setting) { + ktime_t exp; + if (!rtcdev) return -ENOTSUPP; + if (flags & ~TIMER_ABSTIME) + return -EINVAL; + if (old_setting) alarm_timer_get(timr, old_setting); @@ -597,8 +614,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, /* start the timer */ timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); - alarm_start(&timr->it.alarm.alarmtimer, - timespec_to_ktime(new_setting->it_value)); + exp = timespec_to_ktime(new_setting->it_value); + /* Convert (if necessary) to absolute time */ + if (flags != TIMER_ABSTIME) { + ktime_t now; + + now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); + exp = ktime_add(now, exp); + } + + alarm_start(&timr->it.alarm.alarmtimer, exp); return 0; } @@ -730,6 +755,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (!alarmtimer_get_rtcdev()) return -ENOTSUPP; + if (flags & ~TIMER_ABSTIME) + return -EINVAL; + if (!capable(CAP_WAKE_ALARM)) return -EPERM; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 662c579..c2eb27b 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -146,7 +146,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev) { /* Nothing to do if we already reached the limit */ if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { - printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); + printk_deferred(KERN_WARNING + "CE: Reprogramming failure. Giving up\n"); dev->next_event.tv64 = KTIME_MAX; return -ETIME; } @@ -159,9 +160,10 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev) if (dev->min_delta_ns > MIN_DELTA_LIMIT) dev->min_delta_ns = MIN_DELTA_LIMIT; - printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", - dev->name ? dev->name : "?", - (unsigned long long) dev->min_delta_ns); + printk_deferred(KERN_WARNING + "CE: %s increased min_delta_ns to %llu nsec\n", + dev->name ? dev->name : "?", + (unsigned long long) dev->min_delta_ns); return 0; } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index d6132cd..b9e3a96 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -671,6 +671,13 @@ int ntp_validate_timex(struct timex *txc) if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) return -EPERM; + if (txc->modes & ADJ_FREQUENCY) { + if (LONG_MIN / PPM_SCALE > txc->freq) + return -EINVAL; + if (LONG_MAX / PPM_SCALE < txc->freq) + return -EINVAL; + } + return 0; } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 1b80eb0..07e90b4 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -262,7 +262,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev, bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev) { - if (tick_check_percpu(curdev, newdev, smp_processor_id())) + if (!tick_check_percpu(curdev, newdev, smp_processor_id())) return false; return tick_check_preferred(curdev, newdev); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3740f28..7e5c94b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -804,7 +804,6 @@ void tick_nohz_idle_enter(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); /** * tick_nohz_irq_exit - update next tick event from interrupt exit @@ -932,7 +931,6 @@ void tick_nohz_idle_exit(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { @@ -968,7 +966,7 @@ static void tick_nohz_switch_to_nohz(void) struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t next; - if (!tick_nohz_active) + if (!tick_nohz_enabled) return; local_irq_disable(); diff --git a/kernel/timer.c b/kernel/timer.c index cc34e42..3b79da2 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -852,7 +852,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) bit = find_last_bit(&mask, BITS_PER_LONG); - mask = (1 << bit) - 1; + mask = (1UL << bit) - 1; expires_limit = expires_limit & ~(mask); @@ -1461,6 +1461,19 @@ void run_local_timers(void) * the timer softirq. */ #ifdef CONFIG_PREEMPT_RT_FULL + +#ifndef CONFIG_SMP + /* + * The spin_do_trylock() later may fail as the lock may be hold before + * the interrupt arrived. The spin-lock debugging code will raise a + * warning if the try_lock fails on UP. Since this is only an + * optimization for the FULL_NO_HZ case (not to run the timer softirq on + * an nohz_full CPU) we don't really care and shedule the softirq. + */ + raise_softirq(TIMER_SOFTIRQ); + return; +#endif + /* On RT, irq work runs from softirq */ if (irq_work_needs_cpu()) { raise_softirq(TIMER_SOFTIRQ); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7f727b3..e0e5f73 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -703,6 +703,7 @@ void blk_trace_shutdown(struct request_queue *q) * blk_add_trace_rq - Add a trace for a request oriented action * @q: queue the io is for * @rq: the source request + * @nr_bytes: number of completed bytes * @what: the action * * Description: @@ -710,7 +711,7 @@ void blk_trace_shutdown(struct request_queue *q) * **/ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) + unsigned int nr_bytes, u32 what) { struct blk_trace *bt = q->blk_trace; @@ -719,11 +720,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, + __blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags, what, rq->errors, rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, rq->cmd_flags, what, rq->errors, 0, NULL); } } @@ -731,33 +732,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, static void blk_add_trace_rq_abort(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT); } static void blk_add_trace_rq_insert(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT); } static void blk_add_trace_rq_issue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE); } static void blk_add_trace_rq_requeue(void *ignore, struct request_queue *q, struct request *rq) { - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE); } static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, - struct request *rq) + struct request *rq, + unsigned int nr_bytes) { - blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); + blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE); } /** diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e66411f..d2ab10b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -331,12 +331,12 @@ static void update_ftrace_function(void) func = ftrace_ops_list_func; } + update_function_graph_func(); + /* If there's no change, then do nothing more here */ if (ftrace_trace_function == func) return; - update_function_graph_func(); - /* * If we are using the list function, it doesn't care * about the function_trace_ops. @@ -4252,16 +4252,11 @@ static void ftrace_init_module(struct module *mod, ftrace_process_locs(mod, start, end); } -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) +void ftrace_module_init(struct module *mod) { - struct module *mod = data; - - if (val == MODULE_STATE_COMING) - ftrace_init_module(mod, mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); - return 0; + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); } static int ftrace_module_notify_exit(struct notifier_block *self, @@ -4275,11 +4270,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, return 0; } #else -static int ftrace_module_notify_enter(struct notifier_block *self, - unsigned long val, void *data) -{ - return 0; -} static int ftrace_module_notify_exit(struct notifier_block *self, unsigned long val, void *data) { @@ -4287,11 +4277,6 @@ static int ftrace_module_notify_exit(struct notifier_block *self, } #endif /* CONFIG_MODULES */ -struct notifier_block ftrace_module_enter_nb = { - .notifier_call = ftrace_module_notify_enter, - .priority = INT_MAX, /* Run before anything that can use kprobes */ -}; - struct notifier_block ftrace_module_exit_nb = { .notifier_call = ftrace_module_notify_exit, .priority = INT_MIN, /* Run after anything that can remove kprobes */ @@ -4328,10 +4313,6 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); - ret = register_module_notifier(&ftrace_module_enter_nb); - if (ret) - pr_warning("Failed to register trace ftrace module enter notifier\n"); - ret = register_module_notifier(&ftrace_module_exit_nb); if (ret) pr_warning("Failed to register trace ftrace module exit notifier\n"); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0e337ee..21ee379 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work) * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -void ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; DEFINE_WAIT(wait); @@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return -ENODEV; cpu_buffer = buffer->buffers[cpu]; work = &cpu_buffer->irq_work; } @@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu) schedule(); finish_wait(&work->waiters, &wait); + return 0; } /** @@ -613,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) - return POLLIN | POLLRDNORM; - if (cpu == RING_BUFFER_ALL_CPUS) work = &buffer->irq_work; else { @@ -627,8 +626,22 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - work->waiters_pending = true; poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + /* + * There's a tight race between setting the waiters_pending and + * checking if the ring buffer is empty. Once the waiters_pending bit + * is set, the next event will wake the task up, but we can get stuck + * if there's only a single event in. + * + * FIXME: Ideally, we need a memory barrier on the writer side as well, + * but adding a memory barrier to all events will cause too much of a + * performance hit in the fast path. We only need a memory barrier when + * the buffer goes from empty to having content. But as this race is + * extremely small, and it's not a problem if another event comes in, we + * will fix it later. + */ + smp_mb(); if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) @@ -1982,7 +1995,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) /** * rb_update_event - update event type and data - * @event: the even to update + * @event: the event to update * @type: the type of event * @length: the size of the event field in the ring buffer * @@ -3355,21 +3368,16 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; /* Iterator usage is expected to have record disabled */ - if (list_empty(&cpu_buffer->reader_page->list)) { - iter->head_page = rb_set_head_page(cpu_buffer); - if (unlikely(!iter->head_page)) - return; - iter->head = iter->head_page->read; - } else { - iter->head_page = cpu_buffer->reader_page; - iter->head = cpu_buffer->reader_page->read; - } + iter->head_page = cpu_buffer->reader_page; + iter->head = cpu_buffer->reader_page->read; + + iter->cache_reader_page = iter->head_page; + iter->cache_read = cpu_buffer->read; + if (iter->head) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; - iter->cache_reader_page = cpu_buffer->reader_page; - iter->cache_read = cpu_buffer->read; } /** @@ -3762,12 +3770,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) return NULL; /* - * We repeat when a time extend is encountered. - * Since the time extend is always attached to a data event, - * we should never loop more than once. - * (We never hit the following condition more than twice). + * We repeat when a time extend is encountered or we hit + * the end of the page. Since the time extend is always attached + * to a data event, we should never loop more than three times. + * Once for going to next page, once on time extend, and + * finally once to get the event. + * (We never hit the following condition more than thrice). */ - if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) + if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) return NULL; if (rb_per_cpu_empty(cpu_buffer)) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f9401ed..9f79237 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -434,6 +434,12 @@ int __trace_puts(unsigned long ip, const char *str, int size) struct print_entry *entry; unsigned long irq_flags; int alloc; + int pc; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -442,8 +448,8 @@ int __trace_puts(unsigned long ip, const char *str, int size) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; - event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - irq_flags, preempt_count()); + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + irq_flags, pc); if (!event) return 0; @@ -460,6 +466,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) entry->buf[size] = '\0'; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return size; } @@ -477,6 +484,12 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned long irq_flags; int size = sizeof(struct bputs_entry); + int pc; + + if (!(trace_flags & TRACE_ITER_PRINTK)) + return 0; + + pc = preempt_count(); if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -484,7 +497,7 @@ int __trace_bputs(unsigned long ip, const char *str) local_save_flags(irq_flags); buffer = global_trace.trace_buffer.buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - irq_flags, preempt_count()); + irq_flags, pc); if (!event) return 0; @@ -493,6 +506,7 @@ int __trace_bputs(unsigned long ip, const char *str) entry->str = str; __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 4, pc); return 1; } @@ -750,7 +764,7 @@ static struct { { trace_clock_local, "local", 1 }, { trace_clock_global, "global", 1 }, { trace_clock_counter, "counter", 0 }, - { trace_clock_jiffies, "uptime", 1 }, + { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, ARCH_TRACE_CLOCKS }; @@ -1044,13 +1058,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ -static void default_wait_pipe(struct trace_iterator *iter) +static int default_wait_pipe(struct trace_iterator *iter) { /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) - return; + return 0; - ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); + return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -1323,7 +1337,6 @@ void tracing_start(void) arch_spin_unlock(&ftrace_max_lock); - ftrace_start(); out: raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); } @@ -1370,7 +1383,6 @@ void tracing_stop(void) struct ring_buffer *buffer; unsigned long flags; - ftrace_stop(); raw_spin_lock_irqsave(&global_trace.start_lock, flags); if (global_trace.stop_count++) goto out; @@ -1417,12 +1429,12 @@ static void tracing_stop_tr(struct trace_array *tr) void trace_stop_cmdline_recording(void); -static void trace_save_cmdline(struct task_struct *tsk) +static int trace_save_cmdline(struct task_struct *tsk) { unsigned pid, idx; if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT)) - return; + return 0; /* * It's not the end of the world if we don't get @@ -1431,7 +1443,7 @@ static void trace_save_cmdline(struct task_struct *tsk) * so if we miss here, then better luck next time. */ if (!arch_spin_trylock(&trace_cmdline_lock)) - return; + return 0; idx = map_pid_to_cmdline[tsk->pid]; if (idx == NO_CMDLINE_MAP) { @@ -1456,6 +1468,8 @@ static void trace_save_cmdline(struct task_struct *tsk) memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN); arch_spin_unlock(&trace_cmdline_lock); + + return 1; } void trace_find_cmdline(int pid, char comm[]) @@ -1497,9 +1511,8 @@ void tracing_record_cmdline(struct task_struct *tsk) if (!__this_cpu_read(trace_cmdline_save)) return; - __this_cpu_write(trace_cmdline_save, false); - - trace_save_cmdline(tsk); + if (trace_save_cmdline(tsk)) + __this_cpu_write(trace_cmdline_save, false); } void @@ -4070,17 +4083,19 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) * * Anyway, this is really very primitive wakeup. */ -void poll_wait_pipe(struct trace_iterator *iter) +int poll_wait_pipe(struct trace_iterator *iter) { set_current_state(TASK_INTERRUPTIBLE); /* sleep for 100 msecs, and try again. */ schedule_timeout(HZ / 10); + return 0; } /* Must be called with trace_types_lock mutex held. */ static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; + int ret; while (trace_empty(iter)) { @@ -4090,10 +4105,13 @@ static int tracing_wait_pipe(struct file *filp) mutex_unlock(&iter->mutex); - iter->trace->wait_pipe(iter); + ret = iter->trace->wait_pipe(iter); mutex_lock(&iter->mutex); + if (ret) + return ret; + if (signal_pending(current)) return -EINTR; @@ -5027,8 +5045,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, goto out_unlock; } mutex_unlock(&trace_types_lock); - iter->trace->wait_pipe(iter); + ret = iter->trace->wait_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) { + size = ret; + goto out_unlock; + } if (signal_pending(current)) { size = -EINTR; goto out_unlock; @@ -5240,8 +5262,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, goto out; } mutex_unlock(&trace_types_lock); - iter->trace->wait_pipe(iter); + ret = iter->trace->wait_pipe(iter); mutex_lock(&trace_types_lock); + if (ret) + goto out; if (signal_pending(current)) { ret = -EINTR; goto out; @@ -6049,7 +6073,7 @@ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t m int ret; /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); if (WARN_ON_ONCE(parent != trace_instance_dir)) return -ENOENT; @@ -6076,7 +6100,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry) int ret; /* Paranoid: Make sure the parent is the "instances" directory */ - parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); + parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); if (WARN_ON_ONCE(parent != trace_instance_dir)) return -ENOENT; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 109291a..90584b5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -336,7 +336,7 @@ struct tracer { void (*stop)(struct trace_array *tr); void (*open)(struct trace_iterator *iter); void (*pipe_open)(struct trace_iterator *iter); - void (*wait_pipe)(struct trace_iterator *iter); + int (*wait_pipe)(struct trace_iterator *iter); void (*close)(struct trace_iterator *iter); void (*pipe_close)(struct trace_iterator *iter); ssize_t (*read)(struct trace_iterator *iter, @@ -551,7 +551,7 @@ void trace_init_global_iter(struct trace_iterator *iter); void tracing_iter_reset(struct trace_iterator *iter, int cpu); -void poll_wait_pipe(struct trace_iterator *iter); +int poll_wait_pipe(struct trace_iterator *iter); void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 26dc348..57b67b1 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -59,13 +59,14 @@ u64 notrace trace_clock(void) /* * trace_jiffy_clock(): Simply use jiffies as a clock counter. + * Note that this use of jiffies_64 is not completely safe on + * 32-bit systems. But the window is tiny, and the effect if + * we are affected is that we will have an obviously bogus + * timestamp on a trace event - i.e. not life threatening. */ u64 notrace trace_clock_jiffies(void) { - u64 jiffy = jiffies - INITIAL_JIFFIES; - - /* Return nsecs */ - return (u64)jiffies_to_usecs(jiffy) * 1000ULL; + return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); } /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9e49f3f..6574925 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -429,7 +429,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) if (dir) { spin_lock(&dir->d_lock); /* probably unneeded */ - list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dir->d_subdirs, d_child) { if (child->d_inode) /* probably unneeded */ child->d_inode->i_private = NULL; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 559329d..d8ce71b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -312,7 +312,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) int size; syscall_nr = trace_get_syscall_nr(current, regs); - if (syscall_nr < 0) + if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; if (!test_bit(syscall_nr, tr->enabled_enter_syscalls)) return; @@ -354,7 +354,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) int syscall_nr; syscall_nr = trace_get_syscall_nr(current, regs); - if (syscall_nr < 0) + if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; if (!test_bit(syscall_nr, tr->enabled_exit_syscalls)) return; @@ -557,7 +557,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) int size; syscall_nr = trace_get_syscall_nr(current, regs); - if (syscall_nr < 0) + if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; @@ -631,7 +631,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) int size; syscall_nr = trace_get_syscall_nr(current, regs); - if (syscall_nr < 0) + if (syscall_nr < 0 || syscall_nr >= NR_syscalls) return; if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 031cc56..63630ae 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -641,6 +641,9 @@ static int tracepoint_module_coming(struct module *mod) struct tp_module *tp_mod, *iter; int ret = 0; + if (!mod->num_tracepoints) + return 0; + /* * We skip modules that taint the kernel, especially those with different * module headers (for forced load), to make sure we don't cause a crash. @@ -684,6 +687,9 @@ static int tracepoint_module_going(struct module *mod) { struct tp_module *pos; + if (!mod->num_tracepoints) + return 0; + mutex_lock(&tracepoints_mutex); tracepoint_update_probe_range(mod->tracepoints_ptrs, mod->tracepoints_ptrs + mod->num_tracepoints); diff --git a/kernel/uid16.c b/kernel/uid16.c index 602e5bb..d58cc4d 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!ns_capable(current_user_ns(), CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/user.c b/kernel/user.c index 2800008..b143217 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,7 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, + .flags = USERNS_INIT_FLAGS, }; EXPORT_SYMBOL_GPL(init_user_ns); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 6991139..c09fe8b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,6 +24,7 @@ #include <linux/fs_struct.h> static struct kmem_cache *user_ns_cachep __read_mostly; +static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -99,6 +100,11 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ + mutex_lock(&userns_state_mutex); + ns->flags = parent_ns->flags; + mutex_unlock(&userns_state_mutex); + set_cred_user_ns(new, ns); return 0; @@ -575,9 +581,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent return false; } - -static DEFINE_MUTEX(id_map_mutex); - static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -594,7 +597,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; /* - * The id_map_mutex serializes all writes to any given map. + * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * @@ -612,7 +615,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); ret = -EPERM; /* Only allow one successful write to the map */ @@ -739,7 +742,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; out: - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); if (page) free_page(page); return ret; @@ -798,17 +801,21 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { - /* Allow mapping to your own filesystem ids */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + const struct cred *cred = file->f_cred; + /* Don't allow mappings that would allow anything that wouldn't + * be allowed without the establishment of unprivileged mappings. + */ + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && + uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) + if (uid_eq(uid, cred->euid)) return true; - } - else if (cap_setid == CAP_SETGID) { + } else if (cap_setid == CAP_SETGID) { kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, file->f_cred->fsgid)) + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && + gid_eq(gid, cred->egid)) return true; } } @@ -828,6 +835,100 @@ static bool new_idmap_permitted(const struct file *file, return false; } +int proc_setgroups_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + unsigned long userns_flags = ACCESS_ONCE(ns->flags); + + seq_printf(seq, "%s\n", + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? + "allow" : "deny"); + return 0; +} + +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + char kbuf[8], *pos; + bool setgroups_allowed; + ssize_t ret; + + /* Only allow a very narrow range of strings to be written */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= sizeof(kbuf))) + goto out; + + /* What was written? */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + ret = -EINVAL; + if (strncmp(pos, "allow", 5) == 0) { + pos += 5; + setgroups_allowed = true; + } + else if (strncmp(pos, "deny", 4) == 0) { + pos += 4; + setgroups_allowed = false; + } + else + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + ret = -EPERM; + mutex_lock(&userns_state_mutex); + if (setgroups_allowed) { + /* Enabling setgroups after setgroups has been disabled + * is not allowed. + */ + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) + goto out_unlock; + } else { + /* Permanently disabling setgroups after setgroups has + * been enabled by writing the gid_map is not allowed. + */ + if (ns->gid_map.nr_extents != 0) + goto out_unlock; + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; + } + mutex_unlock(&userns_state_mutex); + + /* Report a successful write */ + *ppos = count; + ret = count; +out: + return ret; +out_unlock: + mutex_unlock(&userns_state_mutex); + goto out; +} + +bool userns_may_setgroups(const struct user_namespace *ns) +{ + bool allowed; + + mutex_lock(&userns_state_mutex); + /* It is not safe to use setgroups until a gid mapping in + * the user namespace has been established. + */ + allowed = ns->gid_map.nr_extents != 0; + /* Is setgroups allowed? */ + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); + mutex_unlock(&userns_state_mutex); + + return allowed; +} + static void *userns_get(struct task_struct *task) { struct user_namespace *user_ns; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9efb7ce..bae6e25 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -126,6 +126,11 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * + * On RT we need the extra protection via rt_lock_idle_list() for + * the list manipulations against read access from + * wq_worker_sleeping(). All other places are nicely serialized via + * pool->lock. + * * MG: pool->manager_mutex and pool->lock protected. Writes require both * locks. Reads can happen under either lock. * @@ -409,6 +414,31 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ else +#ifdef CONFIG_PREEMPT_RT_BASE +static inline void rt_lock_idle_list(struct worker_pool *pool) +{ + preempt_disable(); +} +static inline void rt_unlock_idle_list(struct worker_pool *pool) +{ + preempt_enable(); +} +static inline void sched_lock_idle_list(struct worker_pool *pool) { } +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } +#else +static inline void rt_lock_idle_list(struct worker_pool *pool) { } +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } +static inline void sched_lock_idle_list(struct worker_pool *pool) +{ + spin_lock_irq(&pool->lock); +} +static inline void sched_unlock_idle_list(struct worker_pool *pool) +{ + spin_unlock_irq(&pool->lock); +} +#endif + + #ifdef CONFIG_DEBUG_OBJECTS_WORK static struct debug_obj_descr work_debug_descr; @@ -801,10 +831,16 @@ static struct worker *first_worker(struct worker_pool *pool) */ static void wake_up_worker(struct worker_pool *pool) { - struct worker *worker = first_worker(pool); + struct worker *worker; + + rt_lock_idle_list(pool); + + worker = first_worker(pool); if (likely(worker)) wake_up_process(worker->task); + + rt_unlock_idle_list(pool); } /** @@ -832,7 +868,7 @@ void wq_worker_running(struct task_struct *task) */ void wq_worker_sleeping(struct task_struct *task) { - struct worker *next, *worker = kthread_data(task); + struct worker *worker = kthread_data(task); struct worker_pool *pool; /* @@ -849,25 +885,18 @@ void wq_worker_sleeping(struct task_struct *task) return; worker->sleeping = 1; - spin_lock_irq(&pool->lock); + /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). * Please read comment there. - * - * NOT_RUNNING is clear. This means that we're bound to and - * running on the local cpu w/ rq lock held and preemption - * disabled, which in turn means that none else could be - * manipulating idle_list, so dereferencing idle_list without pool - * lock is safe. */ if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) { - next = first_worker(pool); - if (next) - wake_up_process(next->task); + sched_lock_idle_list(pool); + wake_up_worker(pool); + sched_unlock_idle_list(pool); } - spin_unlock_irq(&pool->lock); } /** @@ -1571,7 +1600,9 @@ static void worker_enter_idle(struct worker *worker) worker->last_active = jiffies; /* idle_list is LIFO */ + rt_lock_idle_list(pool); list_add(&worker->entry, &pool->idle_list); + rt_unlock_idle_list(pool); if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); @@ -1604,7 +1635,9 @@ static void worker_leave_idle(struct worker *worker) return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; + rt_lock_idle_list(pool); list_del_init(&worker->entry); + rt_unlock_idle_list(pool); } /** @@ -1849,7 +1882,9 @@ static void destroy_worker(struct worker *worker) */ get_task_struct(worker->task); + rt_lock_idle_list(pool); list_del_init(&worker->entry); + rt_unlock_idle_list(pool); worker->flags |= WORKER_DIE; idr_remove(&pool->worker_idr, worker->id); @@ -1901,6 +1936,12 @@ static void send_mayday(struct work_struct *work) /* mayday mayday mayday */ if (list_empty(&pwq->mayday_node)) { + /* + * If @pwq is for an unbound wq, its base ref may be put at + * any time due to an attribute change. Pin @pwq until the + * rescuer is done with it. + */ + get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); wake_up_process(wq->rescuer->task); } @@ -2383,6 +2424,7 @@ static int rescuer_thread(void *__rescuer) struct worker *rescuer = __rescuer; struct workqueue_struct *wq = rescuer->rescue_wq; struct list_head *scheduled = &rescuer->scheduled; + bool should_stop; set_user_nice(current, RESCUER_NICE_LEVEL); @@ -2394,11 +2436,15 @@ static int rescuer_thread(void *__rescuer) repeat: set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - rescuer->task->flags &= ~PF_WQ_WORKER; - return 0; - } + /* + * By the time the rescuer is requested to stop, the workqueue + * shouldn't have any work pending, but @wq->maydays may still have + * pwq(s) queued. This can happen by non-rescuer workers consuming + * all the work items before the rescuer got to them. Go through + * @wq->maydays processing before acting on should_stop so that the + * list is always empty on exit. + */ + should_stop = kthread_should_stop(); /* see whether any pwq is asking for help */ spin_lock_irq(&wq_mayday_lock); @@ -2430,6 +2476,12 @@ repeat: process_scheduled_works(rescuer); /* + * Put the reference grabbed by send_mayday(). @pool won't + * go away while we're holding its lock. + */ + put_pwq(pwq); + + /* * Leave this pool. If keep_working() is %true, notify a * regular worker; otherwise, we end up with 0 concurrency * and stalling the execution. @@ -2444,6 +2496,12 @@ repeat: spin_unlock_irq(&wq_mayday_lock); + if (should_stop) { + __set_current_state(TASK_RUNNING); + rescuer->task->flags &= ~PF_WQ_WORKER; + return 0; + } + /* rescuers should never participate in concurrency management */ WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); schedule(); @@ -3392,6 +3450,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) } } + dev_set_uevent_suppress(&wq_dev->dev, false); kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); return 0; } @@ -4093,7 +4152,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, if (!pwq) { pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", wq->name); - goto out_unlock; + mutex_lock(&wq->mutex); + goto use_dfl_pwq; } /* @@ -5008,7 +5068,7 @@ static void __init wq_numa_init(void) BUG_ON(!tbl); for_each_node(node) - BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, + BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node_online(node) ? node : NUMA_NO_NODE)); for_each_possible_cpu(cpu) { diff --git a/lib/bitmap.c b/lib/bitmap.c index 06f7e4f..e5c4ebe 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -131,7 +131,9 @@ void __bitmap_shift_right(unsigned long *dst, lower = src[off + k]; if (left && off + k == lim - 1) lower &= mask; - dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem; + dst[k] = lower >> rem; + if (rem) + dst[k] |= upper << (BITS_PER_LONG - rem); if (left && k == lim - 1) dst[k] &= mask; } @@ -172,7 +174,9 @@ void __bitmap_shift_left(unsigned long *dst, upper = src[k]; if (left && k == lim - 1) upper &= (1UL << left) - 1; - dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem; + dst[k + off] = upper << rem; + if (rem) + dst[k + off] |= lower >> (BITS_PER_LONG - rem); if (left && k + off == lim - 1) dst[k + off] &= (1UL << left) - 1; } diff --git a/lib/btree.c b/lib/btree.c index f9a4846..4264871 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -198,6 +198,7 @@ EXPORT_SYMBOL_GPL(btree_init); void btree_destroy(struct btree_head *head) { + mempool_free(head->node, head->mempool); mempool_destroy(head->mempool); head->mempool = NULL; } diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 31c5f76..f504027 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -184,7 +184,7 @@ static int INIT get_next_block(struct bunzip_data *bd) if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; origPtr = get_bits(bd, 24); - if (origPtr > dbufSize) + if (origPtr >= dbufSize) return RETVAL_DATA_ERROR; /* mapping table: if some byte values are never used (encoding things like ascii text), the compression code removes the gaps to have fewer @@ -251,7 +251,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa, id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; /* if already at the top layer, we need to grow */ - if (id >= 1 << (idp->layers * IDR_BITS)) { + if (id > idr_max(idp->layers)) { *starting_id = id; return -EAGAIN; } @@ -858,12 +858,10 @@ void *idr_replace(struct idr *idp, void *ptr, int id) if (!p) return ERR_PTR(-EINVAL); - n = (p->layer+1) * IDR_BITS; - - if (id >= (1 << n)) + if (id > idr_max(p->layer + 1)) return ERR_PTR(-EINVAL); - n -= IDR_BITS; + n = p->layer * IDR_BITS; while ((n > 0) && p) { p = p->ary[(id >> n) & IDR_MASK]; n -= IDR_BITS; diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index df6839e..7a85967 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -72,6 +72,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) len = *ip++; for (; len == 255; length += 255) len = *ip++; + if (unlikely(length > (size_t)(length + len))) + goto _output_error; length += len; } @@ -106,6 +108,8 @@ static int lz4_uncompress(const char *source, char *dest, int osize) if (length == ML_MASK) { for (; *ip == 255; length += 255) ip++; + if (unlikely(length > (size_t)(length + *ip))) + goto _output_error; length += *ip++; } @@ -155,7 +159,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* write overflow error detected */ _output_error: - return (int) (-(((char *)ip) - source)); + return -1; } static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, @@ -188,6 +192,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, int s = 255; while ((ip < iend) && (s == 255)) { s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; } } @@ -228,6 +234,8 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, if (length == ML_MASK) { while (ip < iend) { int s = *ip++; + if (unlikely(length > (size_t)(length + s))) + goto _output_error; length += s; if (s == 255) continue; @@ -280,7 +288,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, /* write overflow error detected */ _output_error: - return (int) (-(((char *) ip) - source)); + return -1; } int lz4_decompress(const unsigned char *src, size_t *src_len, diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index 569985d..a1c387f 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -25,6 +25,16 @@ #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun +/* This MAX_255_COUNT is the maximum number of times we can add 255 to a base + * count without overflowing an integer. The multiply will overflow when + * multiplying 255 by more than MAXINT/255. The sum will overflow earlier + * depending on the base count. Since the base count is taken from a u8 + * and a few bits, it is safe to assume that it will always be lower than + * or equal to 2*255, thus we can always prevent any overflow by accepting + * two less 255 steps. See Documentation/lzo.txt for more information. + */ +#define MAX_255_COUNT ((((size_t)~0) / 255) - 2) + int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, unsigned char *out, size_t *out_len) { @@ -55,12 +65,19 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, if (t < 16) { if (likely(state == 0)) { if (unlikely(t == 0)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 15 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 15 + *ip++; } t += 3; copy_literal_run: @@ -116,12 +133,19 @@ copy_literal_run: } else if (t >= 32) { t = (t & 31) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 31 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 31 + *ip++; NEED_IP(2); } m_pos = op - 1; @@ -134,12 +158,19 @@ copy_literal_run: m_pos -= (t & 8) << 11; t = (t & 7) + (3 - 1); if (unlikely(t == 2)) { + size_t offset; + const unsigned char *ip_last = ip; + while (unlikely(*ip == 0)) { - t += 255; ip++; NEED_IP(1); } - t += 7 + *ip++; + offset = ip - ip_last; + if (unlikely(offset > MAX_255_COUNT)) + return LZO_E_ERROR; + + offset = (offset << 8) - offset; + t += offset + 7 + *ip++; NEED_IP(2); } next = get_unaligned_le16(ip); diff --git a/lib/nlattr.c b/lib/nlattr.c index fc67547..10ad042 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -201,8 +201,8 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, } if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); + pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n", + rem, current->comm); err = 0; errout: diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 93c5d5e..741a426 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -166,7 +166,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, struct percpu_counter *fbc; compute_batch_value(); - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; cpu = (unsigned long)hcpu; diff --git a/lib/plist.c b/lib/plist.c index 1ebc95f..0f2084d 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head) plist_check_head(head); } +/** + * plist_requeue - Requeue @node at end of same-prio entries. + * + * This is essentially an optimized plist_del() followed by + * plist_add(). It moves an entry already in the plist to + * after any other same-priority entries. + * + * @node: &struct plist_node pointer - entry to be moved + * @head: &struct plist_head pointer - list head + */ +void plist_requeue(struct plist_node *node, struct plist_head *head) +{ + struct plist_node *iter; + struct list_head *node_next = &head->node_list; + + plist_check_head(head); + BUG_ON(plist_head_empty(head)); + BUG_ON(plist_node_empty(node)); + + if (node == plist_last(head)) + return; + + iter = plist_next(node); + + if (node->prio != iter->prio) + return; + + plist_del(node, head); + + plist_for_each_continue(iter, head) { + if (node->prio != iter->prio) { + node_next = &iter->node_list; + break; + } + } + list_add_tail(&node->node_list, node_next); + + plist_check_head(head); +} + #ifdef CONFIG_DEBUG_PI_LIST #include <linux/sched.h> #include <linux/module.h> @@ -170,6 +210,14 @@ static void __init plist_test_check(int nr_expect) BUG_ON(prio_pos->prio_list.next != &first->prio_list); } +static void __init plist_test_requeue(struct plist_node *node) +{ + plist_requeue(node, &test_head); + + if (node != plist_last(&test_head)) + BUG_ON(node->prio == plist_next(node)->prio); +} + static int __init plist_test(void) { int nr_expect = 0, i, loop; @@ -193,6 +241,10 @@ static int __init plist_test(void) nr_expect--; } plist_test_check(nr_expect); + if (!plist_node_empty(test_node + i)) { + plist_test_requeue(test_node + i); + plist_test_check(nr_expect); + } } for (i = 0; i < ARRAY_SIZE(test_node); i++) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e7b61e8..b7ab981 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -949,81 +949,6 @@ next: } EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); - -/** - * radix_tree_next_hole - find the next hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest - * indexed hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'return - index >= max_scan' - * will be true). In rare cases of index wrap-around, 0 will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 5, then subsequently a hole is created at index 10, - * radix_tree_next_hole covering both indexes may return 10 if called - * under rcu_read_lock. - */ -unsigned long radix_tree_next_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index++; - if (index == 0) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_next_hole); - -/** - * radix_tree_prev_hole - find the prev hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search backwards in the range [max(index-max_scan+1, 0), index] - * for the first hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 10, then subsequently a hole is created at index 5, - * radix_tree_prev_hole covering both indexes may return 5 if called under - * rcu_read_lock. - */ -unsigned long radix_tree_prev_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index--; - if (index == ULONG_MAX) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_prev_hole); - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root @@ -1338,15 +1263,18 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) } /** - * radix_tree_delete - delete an item from a radix tree + * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root * @index: index key + * @item: expected item * - * Remove the item at @index from the radix tree rooted at @root. + * Remove @item at @index from the radix tree rooted at @root. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns the address of the deleted item, or NULL if it was not present + * or the entry at the given @index was not @item. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +void *radix_tree_delete_item(struct radix_tree_root *root, + unsigned long index, void *item) { struct radix_tree_node *node = NULL; struct radix_tree_node *slot = NULL; @@ -1381,6 +1309,11 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) if (slot == NULL) goto out; + if (item && slot != item) { + slot = NULL; + goto out; + } + /* * Clear all tags associated with the item to be deleted. * This way of doing it would be inefficient, but seldom is any set. @@ -1425,6 +1358,21 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) out: return slot; } +EXPORT_SYMBOL(radix_tree_delete_item); + +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_delete_item(root, index, NULL); +} EXPORT_SYMBOL(radix_tree_delete); /** diff --git a/lib/string.c b/lib/string.c index e5878de..43d0781 100644 --- a/lib/string.c +++ b/lib/string.c @@ -586,6 +586,22 @@ void *memset(void *s, int c, size_t count) EXPORT_SYMBOL(memset); #endif +/** + * memzero_explicit - Fill a region of memory (e.g. sensitive + * keying data) with 0s. + * @s: Pointer to the start of the area. + * @count: The size of the area. + * + * memzero_explicit() doesn't need an arch-specific version as + * it just invokes the one of memset() implicitly. + */ +void memzero_explicit(void *s, size_t count) +{ + memset(s, 0, count); + OPTIMIZER_HIDE_VAR(s); +} +EXPORT_SYMBOL(memzero_explicit); + #ifndef __HAVE_ARCH_MEMCPY /** * memcpy - Copy one area of memory to another diff --git a/localversion-rt b/localversion-rt index b72862e..7549346 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt30 +-rt51 @@ -254,6 +254,9 @@ config MIGRATION pages as migration can relocate pages to satisfy a huge page allocation instead of reclaiming. +config ARCH_ENABLE_HUGEPAGE_MIGRATION + boolean + config PHYS_ADDR_T_64BIT def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT diff --git a/mm/Makefile b/mm/Makefile index 305d10a..fb51bc6 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -16,7 +16,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o balloon_compaction.o \ + compaction.o balloon_compaction.o vmacache.o \ interval_tree.o list_lru.o $(mmu-y) obj-y += init-mm.o diff --git a/mm/compaction.c b/mm/compaction.c index d2c6751..adb6d05 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -89,7 +89,8 @@ static void __reset_isolation_suitable(struct zone *zone) unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; - zone->compact_cached_migrate_pfn = start_pfn; + zone->compact_cached_migrate_pfn[0] = start_pfn; + zone->compact_cached_migrate_pfn[1] = start_pfn; zone->compact_cached_free_pfn = end_pfn; zone->compact_blockskip_flush = false; @@ -131,9 +132,10 @@ void reset_isolation_suitable(pg_data_t *pgdat) */ static void update_pageblock_skip(struct compact_control *cc, struct page *page, unsigned long nr_isolated, - bool migrate_scanner) + bool set_unsuitable, bool migrate_scanner) { struct zone *zone = cc->zone; + unsigned long pfn; if (cc->ignore_skip_hint) return; @@ -141,20 +143,32 @@ static void update_pageblock_skip(struct compact_control *cc, if (!page) return; - if (!nr_isolated) { - unsigned long pfn = page_to_pfn(page); + if (nr_isolated) + return; + + /* + * Only skip pageblocks when all forms of compaction will be known to + * fail in the near future. + */ + if (set_unsuitable) set_pageblock_skip(page); - /* Update where compaction should restart */ - if (migrate_scanner) { - if (!cc->finished_update_migrate && - pfn > zone->compact_cached_migrate_pfn) - zone->compact_cached_migrate_pfn = pfn; - } else { - if (!cc->finished_update_free && - pfn < zone->compact_cached_free_pfn) - zone->compact_cached_free_pfn = pfn; - } + pfn = page_to_pfn(page); + + /* Update where async and sync compaction should restart */ + if (migrate_scanner) { + if (cc->finished_update_migrate) + return; + if (pfn > zone->compact_cached_migrate_pfn[0]) + zone->compact_cached_migrate_pfn[0] = pfn; + if (cc->mode != MIGRATE_ASYNC && + pfn > zone->compact_cached_migrate_pfn[1]) + zone->compact_cached_migrate_pfn[1] = pfn; + } else { + if (cc->finished_update_free) + return; + if (pfn < zone->compact_cached_free_pfn) + zone->compact_cached_free_pfn = pfn; } } #else @@ -166,7 +180,7 @@ static inline bool isolation_suitable(struct compact_control *cc, static void update_pageblock_skip(struct compact_control *cc, struct page *page, unsigned long nr_isolated, - bool migrate_scanner) + bool set_unsuitable, bool migrate_scanner) { } #endif /* CONFIG_COMPACTION */ @@ -195,7 +209,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, } /* async aborts if taking too long or contended */ - if (!cc->sync) { + if (cc->mode == MIGRATE_ASYNC) { cc->contended = true; return false; } @@ -208,30 +222,39 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, return true; } -static inline bool compact_trylock_irqsave(spinlock_t *lock, - unsigned long *flags, struct compact_control *cc) +/* + * Aside from avoiding lock contention, compaction also periodically checks + * need_resched() and either schedules in sync compaction or aborts async + * compaction. This is similar to what compact_checklock_irqsave() does, but + * is used where no lock is concerned. + * + * Returns false when no scheduling was needed, or sync compaction scheduled. + * Returns true when async compaction should abort. + */ +static inline bool compact_should_abort(struct compact_control *cc) { - return compact_checklock_irqsave(lock, flags, false, cc); + /* async compaction aborts if contended */ + if (need_resched()) { + if (cc->mode == MIGRATE_ASYNC) { + cc->contended = true; + return true; + } + + cond_resched(); + } + + return false; } /* Returns true if the page is within a block suitable for migration to */ static bool suitable_migration_target(struct page *page) { - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_RESERVE) - return false; - - if (is_migrate_isolate(migratetype)) - return false; - - /* If the page is a large free page, then allow migration */ + /* If the page is a large free page, then disallow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; + return false; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) + if (migrate_async_suitable(get_pageblock_migratetype(page))) return true; /* Otherwise skip the block */ @@ -254,6 +277,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, struct page *cursor, *valid_page = NULL; unsigned long flags; bool locked = false; + bool checked_pageblock = false; cursor = pfn_to_page(blockpfn); @@ -285,8 +309,16 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, break; /* Recheck this is a suitable migration target under lock */ - if (!strict && !suitable_migration_target(page)) - break; + if (!strict && !checked_pageblock) { + /* + * We need to check suitability of pageblock only once + * and this isolate_freepages_block() is called with + * pageblock range, so just check once is sufficient. + */ + checked_pageblock = true; + if (!suitable_migration_target(page)) + break; + } /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) @@ -330,7 +362,8 @@ isolate_fail: /* Update the pageblock-skip if the whole pageblock was scanned */ if (blockpfn == end_pfn) - update_pageblock_skip(cc, valid_page, total_isolated, false); + update_pageblock_skip(cc, valid_page, total_isolated, true, + false); count_compact_events(COMPACTFREE_SCANNED, nr_scanned); if (total_isolated) @@ -461,11 +494,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; - isolate_mode_t mode = 0; struct lruvec *lruvec; unsigned long flags; bool locked = false; struct page *page = NULL, *valid_page = NULL; + bool set_unsuitable = true; + const isolate_mode_t mode = (cc->mode == MIGRATE_ASYNC ? + ISOLATE_ASYNC_MIGRATE : 0) | + (unevictable ? ISOLATE_UNEVICTABLE : 0); /* * Ensure that there are not too many pages isolated from the LRU @@ -474,7 +510,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ - if (!cc->sync) + if (cc->mode == MIGRATE_ASYNC) return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -483,11 +519,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, return 0; } + if (compact_should_abort(cc)) + return 0; + /* Time to isolate some pages for migration */ - cond_resched(); for (; low_pfn < end_pfn; low_pfn++) { /* give a chance to irqs before checking need_resched() */ - if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) { + if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) { if (should_release_lock(&zone->lru_lock)) { spin_unlock_irqrestore(&zone->lru_lock, flags); locked = false; @@ -526,25 +564,31 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, /* If isolation recently failed, do not retry */ pageblock_nr = low_pfn >> pageblock_order; - if (!isolation_suitable(cc, page)) - goto next_pageblock; + if (last_pageblock_nr != pageblock_nr) { + int mt; + + last_pageblock_nr = pageblock_nr; + if (!isolation_suitable(cc, page)) + goto next_pageblock; + + /* + * For async migration, also only scan in MOVABLE + * blocks. Async migration is optimistic to see if + * the minimum amount of work satisfies the allocation + */ + mt = get_pageblock_migratetype(page); + if (cc->mode == MIGRATE_ASYNC && + !migrate_async_suitable(mt)) { + set_unsuitable = false; + goto next_pageblock; + } + } /* Skip if free */ if (PageBuddy(page)) continue; /* - * For async migration, also only scan in MOVABLE blocks. Async - * migration is optimistic to see if the minimum amount of work - * satisfies the allocation - */ - if (!cc->sync && last_pageblock_nr != pageblock_nr && - !migrate_async_suitable(get_pageblock_migratetype(page))) { - cc->finished_update_migrate = true; - goto next_pageblock; - } - - /* * Check may be lockless but that's ok as we recheck later. * It's possible to migrate LRU pages and balloon pages * Skip any other type of page @@ -553,11 +597,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (unlikely(balloon_page_movable(page))) { if (locked && balloon_page_isolate(page)) { /* Successfully isolated */ - cc->finished_update_migrate = true; - list_add(&page->lru, migratelist); - cc->nr_migratepages++; - nr_isolated++; - goto check_compact_cluster; + goto isolate_success; } } continue; @@ -580,6 +620,15 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ + if (!page_mapping(page) && + page_count(page) > page_mapcount(page)) + continue; + /* Check if it is ok to still hold the lock */ locked = compact_checklock_irqsave(&zone->lru_lock, &flags, locked, cc); @@ -594,12 +643,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!cc->sync) - mode |= ISOLATE_ASYNC_MIGRATE; - - if (unevictable) - mode |= ISOLATE_UNEVICTABLE; - lruvec = mem_cgroup_page_lruvec(page, zone); /* Try isolate the page */ @@ -609,13 +652,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ - cc->finished_update_migrate = true; del_page_from_lru_list(page, lruvec, page_lru(page)); + +isolate_success: + cc->finished_update_migrate = true; list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; -check_compact_cluster: /* Avoid isolating too much */ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { ++low_pfn; @@ -626,7 +670,6 @@ check_compact_cluster: next_pageblock: low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1; - last_pageblock_nr = pageblock_nr; } acct_isolated(zone, locked, cc); @@ -634,9 +677,13 @@ next_pageblock: if (locked) spin_unlock_irqrestore(&zone->lru_lock, flags); - /* Update the pageblock-skip if the whole pageblock was scanned */ + /* + * Update the pageblock-skip information and cached scanner pfn, + * if the whole pageblock was scanned without isolating any page. + */ if (low_pfn == end_pfn) - update_pageblock_skip(cc, valid_page, nr_isolated, true); + update_pageblock_skip(cc, valid_page, nr_isolated, + set_unsuitable, true); trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); @@ -657,44 +704,48 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; + unsigned long block_start_pfn; /* start of current pageblock */ + unsigned long block_end_pfn; /* end of current pageblock */ + unsigned long low_pfn; /* lowest pfn scanner is able to scan */ int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; /* * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. + * successfully isolated from, zone-cached value, or the end of the + * zone when isolating for the first time. We need this aligned to + * the pageblock boundary, because we do + * block_start_pfn -= pageblock_nr_pages in the for loop. + * For ending point, take care when isolating in last pageblock of a + * a zone which ends in the middle of a pageblock. + * The low boundary is the end of the pageblock the migration scanner + * is using. */ - pfn = cc->free_pfn; - low_pfn = cc->migrate_pfn + pageblock_nr_pages; - - /* - * Take care that if the migration scanner is at the end of the zone - * that the free scanner does not accidentally move to the next zone - * in the next isolation cycle. - */ - high_pfn = min(low_pfn, pfn); - - z_end_pfn = zone_end_pfn(zone); + block_start_pfn = cc->free_pfn & ~(pageblock_nr_pages-1); + block_end_pfn = min(block_start_pfn + pageblock_nr_pages, + zone_end_pfn(zone)); + low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate * and free page scanners meet or enough free pages are isolated. */ - for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; - pfn -= pageblock_nr_pages) { + for (; block_start_pfn >= low_pfn && cc->nr_migratepages > nr_freepages; + block_end_pfn = block_start_pfn, + block_start_pfn -= pageblock_nr_pages) { unsigned long isolated; /* * This can iterate a massively long zone without finding any * suitable migration targets, so periodically check if we need - * to schedule. + * to schedule, or even abort async compaction. */ - cond_resched(); + if (!(block_start_pfn % (SWAP_CLUSTER_MAX * pageblock_nr_pages)) + && compact_should_abort(cc)) + break; - if (!pfn_valid(pfn)) + if (!pfn_valid(block_start_pfn)) continue; /* @@ -704,7 +755,7 @@ static void isolate_freepages(struct zone *zone, * i.e. it's possible that all pages within a zones range of * pages do not belong to a single zone. */ - page = pfn_to_page(pfn); + page = pfn_to_page(block_start_pfn); if (page_zone(page) != zone) continue; @@ -717,35 +768,38 @@ static void isolate_freepages(struct zone *zone, continue; /* Found a block suitable for isolating free pages from */ - isolated = 0; + cc->free_pfn = block_start_pfn; + isolated = isolate_freepages_block(cc, block_start_pfn, + block_end_pfn, freelist, false); + nr_freepages += isolated; /* - * As pfn may not start aligned, pfn+pageblock_nr_page - * may cross a MAX_ORDER_NR_PAGES boundary and miss - * a pfn_valid check. Ensure isolate_freepages_block() - * only scans within a pageblock + * Set a flag that we successfully isolated in this pageblock. + * In the next loop iteration, zone->compact_cached_free_pfn + * will not be updated and thus it will effectively contain the + * highest pageblock we isolated pages from. */ - end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); - end_pfn = min(end_pfn, z_end_pfn); - isolated = isolate_freepages_block(cc, pfn, end_pfn, - freelist, false); - nr_freepages += isolated; + if (isolated) + cc->finished_update_free = true; /* - * Record the highest PFN we isolated pages from. When next - * looking for free pages, the search will restart here as - * page migration may have returned some pages to the allocator + * isolate_freepages_block() might have aborted due to async + * compaction being contended */ - if (isolated) { - cc->finished_update_free = true; - high_pfn = max(high_pfn, pfn); - } + if (cc->contended) + break; } /* split_free_page does not map the pages */ map_pages(freelist); - cc->free_pfn = high_pfn; + /* + * If we crossed the migrate scanner, we want to keep it that way + * so that compact_finished() may detect this + */ + if (block_start_pfn < low_pfn) + cc->free_pfn = cc->migrate_pfn; + cc->nr_freepages = nr_freepages; } @@ -760,9 +814,13 @@ static struct page *compaction_alloc(struct page *migratepage, struct compact_control *cc = (struct compact_control *)data; struct page *freepage; - /* Isolate free pages if necessary */ + /* + * Isolate free pages if necessary, and if we are not aborting due to + * contention. + */ if (list_empty(&cc->freepages)) { - isolate_freepages(cc->zone, cc); + if (!cc->contended) + isolate_freepages(cc->zone, cc); if (list_empty(&cc->freepages)) return NULL; @@ -776,23 +834,16 @@ static struct page *compaction_alloc(struct page *migratepage, } /* - * We cannot control nr_migratepages and nr_freepages fully when migration is - * running as migrate_pages() has no knowledge of compact_control. When - * migration is complete, we count the number of pages on the lists by hand. + * This is a migrate-callback that "frees" freepages back to the isolated + * freelist. All pages on the freelist are from the same zone, so there is no + * special handling needed for NUMA. */ -static void update_nr_listpages(struct compact_control *cc) +static void compaction_free(struct page *page, unsigned long data) { - int nr_migratepages = 0; - int nr_freepages = 0; - struct page *page; - - list_for_each_entry(page, &cc->migratepages, lru) - nr_migratepages++; - list_for_each_entry(page, &cc->freepages, lru) - nr_freepages++; + struct compact_control *cc = (struct compact_control *)data; - cc->nr_migratepages = nr_migratepages; - cc->nr_freepages = nr_freepages; + list_add(&page->lru, &cc->freepages); + cc->nr_freepages++; } /* possible outcome of isolate_migratepages */ @@ -839,11 +890,16 @@ static int compact_finished(struct zone *zone, unsigned int order; unsigned long watermark; - if (fatal_signal_pending(current)) + if (cc->contended || fatal_signal_pending(current)) return COMPACT_PARTIAL; /* Compaction run completes if the migrate and free scanner meet */ if (cc->free_pfn <= cc->migrate_pfn) { + /* Let the next compaction start anew. */ + zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn; + zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn; + zone->compact_cached_free_pfn = zone_end_pfn(zone); + /* * Mark that the PG_migrate_skip information should be cleared * by kswapd when it goes to sleep. kswapd does not set the @@ -941,6 +997,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) int ret; unsigned long start_pfn = zone->zone_start_pfn; unsigned long end_pfn = zone_end_pfn(zone); + const bool sync = cc->mode != MIGRATE_ASYNC; ret = compaction_suitable(zone, cc->order); switch (ret) { @@ -954,11 +1011,19 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) } /* + * Clear pageblock skip if there were failures recently and compaction + * is about to be retried after being deferred. kswapd does not do + * this reset as it'll reset the cached information when going to sleep. + */ + if (compaction_restarting(zone, cc->order) && !current_is_kswapd()) + __reset_isolation_suitable(zone); + + /* * Setup to move all movable pages to the end of the zone. Used cached * information on where the scanners should start but check that it * is initialised by ensuring the values are within zone boundaries. */ - cc->migrate_pfn = zone->compact_cached_migrate_pfn; + cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; cc->free_pfn = zone->compact_cached_free_pfn; if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) { cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1); @@ -966,21 +1031,15 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) } if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) { cc->migrate_pfn = start_pfn; - zone->compact_cached_migrate_pfn = cc->migrate_pfn; + zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; + zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; } - /* - * Clear pageblock skip if there were failures recently and compaction - * is about to be retried after being deferred. kswapd does not do - * this reset as it'll reset the cached information when going to sleep. - */ - if (compaction_restarting(zone, cc->order) && !current_is_kswapd()) - __reset_isolation_suitable(zone); + trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn); migrate_prep_local(); while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { - unsigned long nr_migrate, nr_remaining; int err; switch (isolate_migratepages(zone, cc)) { @@ -995,22 +1054,25 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) ; } - nr_migrate = cc->nr_migratepages; + if (!cc->nr_migratepages) + continue; + err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)cc, - cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC, + compaction_free, (unsigned long)cc, cc->mode, MR_COMPACTION); - update_nr_listpages(cc); - nr_remaining = cc->nr_migratepages; - trace_mm_compaction_migratepages(nr_migrate - nr_remaining, - nr_remaining); + trace_mm_compaction_migratepages(cc->nr_migratepages, err, + &cc->migratepages); - /* Release isolated pages not migrated */ + /* All pages were either migrated or will be released */ + cc->nr_migratepages = 0; if (err) { putback_movable_pages(&cc->migratepages); - cc->nr_migratepages = 0; - if (err == -ENOMEM) { + /* + * migrate_pages() may return -ENOMEM when scanners meet + * and we want compact_finished() to detect it + */ + if (err == -ENOMEM && cc->free_pfn > cc->migrate_pfn) { ret = COMPACT_PARTIAL; goto out; } @@ -1022,12 +1084,13 @@ out: cc->nr_freepages -= release_freepages(&cc->freepages); VM_BUG_ON(cc->nr_freepages != 0); + trace_mm_compaction_end(ret); + return ret; } -static unsigned long compact_zone_order(struct zone *zone, - int order, gfp_t gfp_mask, - bool sync, bool *contended) +static unsigned long compact_zone_order(struct zone *zone, int order, + gfp_t gfp_mask, enum migrate_mode mode, bool *contended) { unsigned long ret; struct compact_control cc = { @@ -1036,7 +1099,7 @@ static unsigned long compact_zone_order(struct zone *zone, .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, - .sync = sync, + .mode = mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1058,7 +1121,7 @@ int sysctl_extfrag_threshold = 500; * @order: The order of the current allocation * @gfp_mask: The GFP mask of the current allocation * @nodemask: The allowed nodes to allocate from - * @sync: Whether migration is synchronous or not + * @mode: The migration mode for async, sync light, or sync migration * @contended: Return value that is true if compaction was aborted due to lock contention * @page: Optionally capture a free page of the requested order during compaction * @@ -1066,7 +1129,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended) + enum migrate_mode mode, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1091,7 +1154,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync, + status = compact_zone_order(zone, order, gfp_mask, mode, contended); rc = max(status, rc); @@ -1127,13 +1190,9 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) compact_zone(zone, cc); if (cc->order > 0) { - int ok = zone_watermark_ok(zone, cc->order, - low_wmark_pages(zone), 0, 0); - if (ok && cc->order >= zone->compact_order_failed) - zone->compact_order_failed = cc->order + 1; - /* Currently async compaction is never deferred. */ - else if (!ok && cc->sync) - defer_compaction(zone, cc->order); + if (zone_watermark_ok(zone, cc->order, + low_wmark_pages(zone), 0, 0)) + compaction_defer_reset(zone, cc->order, false); } VM_BUG_ON(!list_empty(&cc->freepages)); @@ -1145,7 +1204,7 @@ void compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, - .sync = false, + .mode = MIGRATE_ASYNC, }; if (!order) @@ -1158,7 +1217,8 @@ static void compact_node(int nid) { struct compact_control cc = { .order = -1, - .sync = true, + .mode = MIGRATE_SYNC, + .ignore_skip_hint = true, }; __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/filemap.c b/mm/filemap.c index 3d2d39a..bd08e9b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -192,9 +192,11 @@ static int filemap_check_errors(struct address_space *mapping) { int ret = 0; /* Check for outstanding write errors */ - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) + if (test_bit(AS_ENOSPC, &mapping->flags) && + test_and_clear_bit(AS_ENOSPC, &mapping->flags)) ret = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) + if (test_bit(AS_EIO, &mapping->flags) && + test_and_clear_bit(AS_EIO, &mapping->flags)) ret = -EIO; return ret; } @@ -446,6 +448,29 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) } EXPORT_SYMBOL_GPL(replace_page_cache_page); +static int page_cache_tree_insert(struct address_space *mapping, + struct page *page) +{ + void **slot; + int error; + + slot = radix_tree_lookup_slot(&mapping->page_tree, page->index); + if (slot) { + void *p; + + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); + if (!radix_tree_exceptional_entry(p)) + return -EEXIST; + radix_tree_replace_slot(slot, page); + mapping->nrpages++; + return 0; + } + error = radix_tree_insert(&mapping->page_tree, page->index, page); + if (!error) + mapping->nrpages++; + return error; +} + /** * add_to_page_cache_locked - add a locked page to the pagecache * @page: page to add @@ -480,11 +505,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, page->index = offset; spin_lock_irq(&mapping->tree_lock); - error = radix_tree_insert(&mapping->page_tree, offset, page); + error = page_cache_tree_insert(mapping, page); radix_tree_preload_end(); if (unlikely(error)) goto err_insert; - mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); spin_unlock_irq(&mapping->tree_lock); trace_mm_filemap_add_to_page_cache(page); @@ -520,10 +544,10 @@ struct page *__page_cache_alloc(gfp_t gfp) if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); page = alloc_pages_exact_node(n, gfp, 0); - } while (!put_mems_allowed(cpuset_mems_cookie) && !page); + } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; } @@ -620,8 +644,17 @@ EXPORT_SYMBOL(unlock_page); */ void end_page_writeback(struct page *page) { - if (TestClearPageReclaim(page)) + /* + * TestClearPageReclaim could be used here but it is an atomic + * operation and overkill in this particular case. Failing to + * shuffle a page marked for immediate reclaim is too mild to + * justify taking an atomic operation penalty at the end of + * ever page writeback. + */ + if (PageReclaim(page)) { + ClearPageReclaim(page); rotate_reclaimable_page(page); + } if (!test_clear_page_writeback(page)) BUG(); @@ -686,14 +719,101 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, } /** - * find_get_page - find and get a page reference + * page_cache_next_hole - find the next hole (not-present entry) + * @mapping: mapping + * @index: index + * @max_scan: maximum range to search + * + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the + * lowest indexed hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'return - index >= + * max_scan' will be true). In rare cases of index wrap-around, 0 will + * be returned. + * + * page_cache_next_hole may be called under rcu_read_lock. However, + * like radix_tree_gang_lookup, this will not atomically search a + * snapshot of the tree at a single point in time. For example, if a + * hole is created at index 5, then subsequently a hole is created at + * index 10, page_cache_next_hole covering both indexes may return 10 + * if called under rcu_read_lock. + */ +pgoff_t page_cache_next_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + struct page *page; + + page = radix_tree_lookup(&mapping->page_tree, index); + if (!page || radix_tree_exceptional_entry(page)) + break; + index++; + if (index == 0) + break; + } + + return index; +} +EXPORT_SYMBOL(page_cache_next_hole); + +/** + * page_cache_prev_hole - find the prev hole (not-present entry) + * @mapping: mapping + * @index: index + * @max_scan: maximum range to search + * + * Search backwards in the range [max(index-max_scan+1, 0), index] for + * the first hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'index - return >= + * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX + * will be returned. + * + * page_cache_prev_hole may be called under rcu_read_lock. However, + * like radix_tree_gang_lookup, this will not atomically search a + * snapshot of the tree at a single point in time. For example, if a + * hole is created at index 10, then subsequently a hole is created at + * index 5, page_cache_prev_hole covering both indexes may return 5 if + * called under rcu_read_lock. + */ +pgoff_t page_cache_prev_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + struct page *page; + + page = radix_tree_lookup(&mapping->page_tree, index); + if (!page || radix_tree_exceptional_entry(page)) + break; + index--; + if (index == ULONG_MAX) + break; + } + + return index; +} +EXPORT_SYMBOL(page_cache_prev_hole); + +/** + * find_get_entry - find and get a page cache entry * @mapping: the address_space to search - * @offset: the page index + * @offset: the page cache index * - * Is there a pagecache struct page at the given (mapping, offset) tuple? - * If yes, increment its refcount and return it; if no, return NULL. + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned with an increased refcount. + * + * If the slot holds a shadow entry of a previously evicted page, it + * is returned. + * + * Otherwise, %NULL is returned. */ -struct page *find_get_page(struct address_space *mapping, pgoff_t offset) +struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) { void **pagep; struct page *page; @@ -734,24 +854,30 @@ out: return page; } -EXPORT_SYMBOL(find_get_page); +EXPORT_SYMBOL(find_get_entry); /** - * find_lock_page - locate, pin and lock a pagecache page + * find_lock_entry - locate, pin and lock a page cache entry * @mapping: the address_space to search - * @offset: the page index + * @offset: the page cache index * - * Locates the desired pagecache page, locks it, increments its reference - * count and returns its address. + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. * - * Returns zero if the page was not present. find_lock_page() may sleep. + * If the slot holds a shadow entry of a previously evicted page, it + * is returned. + * + * Otherwise, %NULL is returned. + * + * find_lock_entry() may sleep. */ -struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) +struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) { struct page *page; repeat: - page = find_get_page(mapping, offset); + page = find_get_entry(mapping, offset); if (page && !radix_tree_exception(page)) { lock_page(page); /* Has the page been truncated? */ @@ -764,44 +890,86 @@ repeat: } return page; } -EXPORT_SYMBOL(find_lock_page); +EXPORT_SYMBOL(find_lock_entry); /** - * find_or_create_page - locate or add a pagecache page - * @mapping: the page's address_space - * @index: the page's index into the mapping - * @gfp_mask: page allocation mode + * pagecache_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * @fgp_flags: PCG flags + * @gfp_mask: gfp mask to use for the page cache data page allocation + * + * Looks up the page cache slot at @mapping & @offset. * - * Locates a page in the pagecache. If the page is not present, a new page - * is allocated using @gfp_mask and is added to the pagecache and to the VM's - * LRU list. The returned page is locked and has its reference count - * incremented. + * PCG flags modify how the page is returned * - * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic - * allocation! + * FGP_ACCESSED: the page will be marked accessed + * FGP_LOCK: Page is return locked + * FGP_CREAT: If page is not present then a new page is allocated using + * @gfp_mask and added to the page cache and the VM's LRU + * list. The page is returned locked and with an increased + * refcount. Otherwise, %NULL is returned. * - * find_or_create_page() returns the desired page's address, or zero on - * memory exhaustion. + * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even + * if the GFP flags specified for FGP_CREAT are atomic. + * + * If there is a page cache page, it is returned with an increased refcount. */ -struct page *find_or_create_page(struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask) +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, + int fgp_flags, gfp_t gfp_mask) { struct page *page; - int err; + repeat: - page = find_lock_page(mapping, index); - if (!page) { + page = find_get_entry(mapping, offset); + if (radix_tree_exceptional_entry(page)) + page = NULL; + if (!page) + goto no_page; + + if (fgp_flags & FGP_LOCK) { + if (fgp_flags & FGP_NOWAIT) { + if (!trylock_page(page)) { + page_cache_release(page); + return NULL; + } + } else { + lock_page(page); + } + + /* Has the page been truncated? */ + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + page_cache_release(page); + goto repeat; + } + VM_BUG_ON(page->index != offset); + } + + if (page && (fgp_flags & FGP_ACCESSED)) + mark_page_accessed(page); + +no_page: + if (!page && (fgp_flags & FGP_CREAT)) { + int err; + if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) + gfp_mask |= __GFP_WRITE; + if (fgp_flags & FGP_NOFS) + gfp_mask &= ~__GFP_FS; + page = __page_cache_alloc(gfp_mask); if (!page) return NULL; - /* - * We want a regular kernel memory (not highmem or DMA etc) - * allocation for the radix tree nodes, but we need to honour - * the context-specific requirements the caller has asked for. - * GFP_RECLAIM_MASK collects those requirements. - */ - err = add_to_page_cache_lru(page, mapping, index, - (gfp_mask & GFP_RECLAIM_MASK)); + + if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) + fgp_flags |= FGP_LOCK; + + /* Init accessed so avoit atomic mark_page_accessed later */ + if (fgp_flags & FGP_ACCESSED) + init_page_accessed(page); + + err = add_to_page_cache_lru(page, mapping, offset, + gfp_mask & GFP_RECLAIM_MASK); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -809,9 +977,80 @@ repeat: goto repeat; } } + return page; } -EXPORT_SYMBOL(find_or_create_page); +EXPORT_SYMBOL(pagecache_get_page); + +/** + * find_get_entries - gang pagecache lookup + * @mapping: The address_space to search + * @start: The starting page cache index + * @nr_entries: The maximum number of entries + * @entries: Where the resulting entries are placed + * @indices: The cache indices corresponding to the entries in @entries + * + * find_get_entries() will search for and return a group of up to + * @nr_entries entries in the mapping. The entries are placed at + * @entries. find_get_entries() takes a reference against any actual + * pages it returns. + * + * The search returns a group of mapping-contiguous page cache entries + * with ascending indexes. There may be holes in the indices due to + * not-present pages. + * + * Any shadow entries of evicted pages are included in the returned + * array. + * + * find_get_entries() returns the number of pages and shadow entries + * which were found. + */ +unsigned find_get_entries(struct address_space *mapping, + pgoff_t start, unsigned int nr_entries, + struct page **entries, pgoff_t *indices) +{ + void **slot; + unsigned int ret = 0; + struct radix_tree_iter iter; + + if (!nr_entries) + return 0; + + rcu_read_lock(); +restart: + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { + struct page *page; +repeat: + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + continue; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + goto restart; + /* + * Otherwise, we must be storing a swap entry + * here as an exceptional entry: so return it + * without attempting to raise page count. + */ + goto export; + } + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *slot)) { + page_cache_release(page); + goto repeat; + } +export: + indices[ret] = iter.index; + entries[ret] = page; + if (++ret == nr_entries) + break; + } + rcu_read_unlock(); + return ret; +} /** * find_get_pages - gang pagecache lookup @@ -1031,39 +1270,6 @@ repeat: } EXPORT_SYMBOL(find_get_pages_tag); -/** - * grab_cache_page_nowait - returns locked page at given index in given cache - * @mapping: target address_space - * @index: the page index - * - * Same as grab_cache_page(), but do not wait if the page is unavailable. - * This is intended for speculative data generators, where the data can - * be regenerated if the page couldn't be grabbed. This routine should - * be safe to call while holding the lock for another page. - * - * Clear __GFP_FS when allocating the page to avoid recursion into the fs - * and deadlock against the caller's locked page. - */ -struct page * -grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) -{ - struct page *page = find_get_page(mapping, index); - - if (page) { - if (trylock_page(page)) - return page; - page_cache_release(page); - return NULL; - } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); - if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) { - page_cache_release(page); - page = NULL; - } - return page; -} -EXPORT_SYMBOL(grab_cache_page_nowait); - /* * CD/DVDs are error prone. When a medium error occurs, the driver may fail * a _large_ part of the i/o request. Imagine the worst scenario: @@ -1797,6 +2003,18 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma) EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); +static struct page *wait_on_page_read(struct page *page) +{ + if (!IS_ERR(page)) { + wait_on_page_locked(page); + if (!PageUptodate(page)) { + page_cache_release(page); + page = ERR_PTR(-EIO); + } + } + return page; +} + static struct page *__read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *, struct page *), @@ -1823,6 +2041,8 @@ repeat: if (err < 0) { page_cache_release(page); page = ERR_PTR(err); + } else { + page = wait_on_page_read(page); } } return page; @@ -1859,6 +2079,10 @@ retry: if (err < 0) { page_cache_release(page); return ERR_PTR(err); + } else { + page = wait_on_page_read(page); + if (IS_ERR(page)) + return page; } out: mark_page_accessed(page); @@ -1866,40 +2090,25 @@ out: } /** - * read_cache_page_async - read into page cache, fill it if needed + * read_cache_page - read into page cache, fill it if needed * @mapping: the page's address_space * @index: the page index * @filler: function to perform the read * @data: first arg to filler(data, page) function, often left as NULL * - * Same as read_cache_page, but don't wait for page to become unlocked - * after submitting it to the filler. - * * Read into the page cache. If a page already exists, and PageUptodate() is - * not set, try to fill the page but don't wait for it to become unlocked. + * not set, try to fill the page and wait for it to become unlocked. * * If the page does not get brought uptodate, return -EIO. */ -struct page *read_cache_page_async(struct address_space *mapping, +struct page *read_cache_page(struct address_space *mapping, pgoff_t index, int (*filler)(void *, struct page *), void *data) { return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); } -EXPORT_SYMBOL(read_cache_page_async); - -static struct page *wait_on_page_read(struct page *page) -{ - if (!IS_ERR(page)) { - wait_on_page_locked(page); - if (!PageUptodate(page)) { - page_cache_release(page); - page = ERR_PTR(-EIO); - } - } - return page; -} +EXPORT_SYMBOL(read_cache_page); /** * read_cache_page_gfp - read into page cache, using specified page allocation flags. @@ -1918,31 +2127,10 @@ struct page *read_cache_page_gfp(struct address_space *mapping, { filler_t *filler = (filler_t *)mapping->a_ops->readpage; - return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); + return do_read_cache_page(mapping, index, filler, NULL, gfp); } EXPORT_SYMBOL(read_cache_page_gfp); -/** - * read_cache_page - read into page cache, fill it if needed - * @mapping: the page's address_space - * @index: the page index - * @filler: function to perform the read - * @data: first arg to filler(data, page) function, often left as NULL - * - * Read into the page cache. If a page already exists, and PageUptodate() is - * not set, try to fill the page then wait for it to become unlocked. - * - * If the page does not get brought uptodate, return -EIO. - */ -struct page *read_cache_page(struct address_space *mapping, - pgoff_t index, - int (*filler)(void *, struct page *), - void *data) -{ - return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); -} -EXPORT_SYMBOL(read_cache_page); - static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { @@ -1976,7 +2164,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, char *kaddr; size_t copied; - BUG_ON(!pagefault_disabled()); kaddr = kmap_atomic(page); if (likely(i->nr_segs == 1)) { int left; @@ -2186,7 +2373,6 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, { const struct address_space_operations *aops = mapping->a_ops; - mark_page_accessed(page); return aops->write_end(file, mapping, pos, len, copied, page, fsdata); } EXPORT_SYMBOL(pagecache_write_end); @@ -2268,34 +2454,17 @@ EXPORT_SYMBOL(generic_file_direct_write); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { - int status; - gfp_t gfp_mask; struct page *page; - gfp_t gfp_notmask = 0; + int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT; - gfp_mask = mapping_gfp_mask(mapping); - if (mapping_cap_account_dirty(mapping)) - gfp_mask |= __GFP_WRITE; if (flags & AOP_FLAG_NOFS) - gfp_notmask = __GFP_FS; -repeat: - page = find_lock_page(mapping, index); + fgp_flags |= FGP_NOFS; + + page = pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); if (page) - goto found; + wait_for_stable_page(page); - page = __page_cache_alloc(gfp_mask & ~gfp_notmask); - if (!page) - return NULL; - status = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & ~gfp_notmask); - if (unlikely(status)) { - page_cache_release(page); - if (status == -EEXIST) - goto repeat; - return NULL; - } -found: - wait_for_stable_page(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); @@ -2344,18 +2513,15 @@ again: status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); - if (unlikely(status)) + if (unlikely(status < 0)) break; if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - pagefault_enable(); flush_dcache_page(page); - mark_page_accessed(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); if (unlikely(status < 0)) @@ -2555,8 +2721,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); - if (err < 0 && ret > 0) + err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (err < 0) ret = err; } return ret; diff --git a/mm/fremap.c b/mm/fremap.c index bbc4d66..34feba6 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -23,28 +23,44 @@ #include "internal.h" +static int mm_counter(struct page *page) +{ + return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; +} + static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; + struct page *page; + swp_entry_t entry; if (pte_present(pte)) { - struct page *page; - flush_cache_page(vma, addr, pte_pfn(pte)); pte = ptep_clear_flush(vma, addr, ptep); page = vm_normal_page(vma, addr, pte); if (page) { if (pte_dirty(pte)) set_page_dirty(page); + update_hiwater_rss(mm); + dec_mm_counter(mm, mm_counter(page)); page_remove_rmap(page); page_cache_release(page); + } + } else { /* zap_pte() is not called when pte_none() */ + if (!pte_file(pte)) { update_hiwater_rss(mm); - dec_mm_counter(mm, MM_FILEPAGES); + entry = pte_to_swp_entry(pte); + if (non_swap_entry(entry)) { + if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + dec_mm_counter(mm, mm_counter(page)); + } + } else { + free_swap_and_cache(entry); + dec_mm_counter(mm, MM_SWAPENTS); + } } - } else { - if (!pte_file(pte)) - free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear_not_present_full(mm, addr, ptep, 0); } } diff --git a/mm/frontswap.c b/mm/frontswap.c index 1b24bdc..f2a3571 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -244,8 +244,10 @@ int __frontswap_store(struct page *page) the (older) page from frontswap */ inc_frontswap_failed_stores(); - if (dup) + if (dup) { __frontswap_clear(sis, offset); + frontswap_ops->invalidate_page(type, offset); + } } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ @@ -327,15 +329,12 @@ EXPORT_SYMBOL(__frontswap_invalidate_area); static unsigned long __frontswap_curr_pages(void) { - int type; unsigned long totalpages = 0; struct swap_info_struct *si = NULL; assert_spin_locked(&swap_lock); - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; + plist_for_each_entry(si, &swap_active_head, list) totalpages += atomic_read(&si->frontswap_pages); - } return totalpages; } @@ -347,11 +346,9 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, int si_frontswap_pages; unsigned long total_pages_to_unuse = total; unsigned long pages = 0, pages_to_unuse = 0; - int type; assert_spin_locked(&swap_lock); - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; + plist_for_each_entry(si, &swap_active_head, list) { si_frontswap_pages = atomic_read(&si->frontswap_pages); if (total_pages_to_unuse < si_frontswap_pages) { pages = pages_to_unuse = total_pages_to_unuse; @@ -366,7 +363,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, } vm_unacct_memory(pages); *unused = pages_to_unuse; - *swapid = type; + *swapid = si->type; ret = 0; break; } @@ -413,7 +410,7 @@ void frontswap_shrink(unsigned long target_pages) /* * we don't want to hold swap_lock while doing a very * lengthy try_to_unuse, but swap_list may change - * so restart scan from swap_list.head each time + * so restart scan from swap_active_head each time */ spin_lock(&swap_lock); ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 389973f..04535b6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -192,7 +192,7 @@ retry: preempt_disable(); if (cmpxchg(&huge_zero_page, NULL, zero_page)) { preempt_enable(); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); goto retry; } @@ -224,7 +224,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); - __free_page(zero_page); + __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -758,14 +758,6 @@ static inline struct page *alloc_hugepage_vma(int defrag, HPAGE_PMD_ORDER, vma, haddr, nd); } -#ifndef CONFIG_NUMA -static inline struct page *alloc_hugepage(int defrag) -{ - return alloc_pages(alloc_hugepage_gfpmask(defrag, 0), - HPAGE_PMD_ORDER); -} -#endif - static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) @@ -1549,15 +1541,22 @@ pmd_t *page_check_address_pmd(struct page *page, unsigned long address, enum page_check_address_pmd_flag flag) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd, *ret = NULL; if (address & ~HPAGE_PMD_MASK) goto out; - pmd = mm_find_pmd(mm, address); - if (!pmd) + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) goto out; - if (pmd_none(*pmd)) + pmd = pmd_offset(pud, address); + + if (!pmd_present(*pmd)) goto out; if (pmd_page(*pmd) != page) goto out; @@ -1748,21 +1747,24 @@ static int __split_huge_page_map(struct page *page, if (pmd) { pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); + if (pmd_write(*pmd)) + BUG_ON(page_mapcount(page) != 1); haddr = address; for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { pte_t *pte, entry; BUG_ON(PageCompound(page+i)); + /* + * Note that pmd_numa is not transferred deliberately + * to avoid any possibility that pte_numa leaks to + * a PROT_NONE VMA by accident. + */ entry = mk_pte(page + i, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); if (!pmd_write(*pmd)) entry = pte_wrprotect(entry); - else - BUG_ON(page_mapcount(page) != 1); if (!pmd_young(*pmd)) entry = pte_mkold(entry); - if (pmd_numa(*pmd)) - entry = pte_mknuma(entry); pte = pte_offset_map(&_pmd, haddr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, haddr, pte, entry); @@ -2197,7 +2199,58 @@ static void khugepaged_alloc_sleep(void) msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); } +static int khugepaged_node_load[MAX_NUMNODES]; + +static bool khugepaged_scan_abort(int nid) +{ + int i; + + /* + * If zone_reclaim_mode is disabled, then no extra effort is made to + * allocate memory locally. + */ + if (!zone_reclaim_mode) + return false; + + /* If there is a count for this node already, it must be acceptable */ + if (khugepaged_node_load[nid]) + return false; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (!khugepaged_node_load[i]) + continue; + if (node_distance(nid, i) > RECLAIM_DISTANCE) + return true; + } + return false; +} + #ifdef CONFIG_NUMA +static int khugepaged_find_target_node(void) +{ + static int last_khugepaged_target_node = NUMA_NO_NODE; + int nid, target_node = 0, max_value = 0; + + /* find first node with max normal pages hit */ + for (nid = 0; nid < MAX_NUMNODES; nid++) + if (khugepaged_node_load[nid] > max_value) { + max_value = khugepaged_node_load[nid]; + target_node = nid; + } + + /* do some balance if several nodes have the same hit record */ + if (target_node <= last_khugepaged_target_node) + for (nid = last_khugepaged_target_node + 1; nid < MAX_NUMNODES; + nid++) + if (max_value == khugepaged_node_load[nid]) { + target_node = nid; + break; + } + + last_khugepaged_target_node = target_node; + return target_node; +} + static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) { if (IS_ERR(*hpage)) { @@ -2231,9 +2284,8 @@ static struct page * mmap_sem in read mode is good idea also to allow greater * scalability. */ - *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address, - node, __GFP_OTHER_NODE); - + *hpage = alloc_pages_exact_node(node, alloc_hugepage_gfpmask( + khugepaged_defrag(), __GFP_OTHER_NODE), HPAGE_PMD_ORDER); /* * After allocating the hugepage, release the mmap_sem read lock in * preparation for taking it in write mode. @@ -2249,6 +2301,17 @@ static struct page return *hpage; } #else +static int khugepaged_find_target_node(void) +{ + return 0; +} + +static inline struct page *alloc_hugepage(int defrag) +{ + return alloc_pages(alloc_hugepage_gfpmask(defrag, 0), + HPAGE_PMD_ORDER); +} + static struct page *khugepaged_alloc_hugepage(bool *wait) { struct page *hpage; @@ -2352,8 +2415,6 @@ static void collapse_huge_page(struct mm_struct *mm, pmd = mm_find_pmd(mm, address); if (!pmd) goto out; - if (pmd_trans_huge(*pmd)) - goto out; anon_vma_lock_write(vma->anon_vma); @@ -2452,9 +2513,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, pmd = mm_find_pmd(mm, address); if (!pmd) goto out; - if (pmd_trans_huge(*pmd)) - goto out; + memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -2471,12 +2531,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, if (unlikely(!page)) goto out_unmap; /* - * Chose the node of the first page. This could - * be more sophisticated and look at more pages, - * but isn't for now. + * Record which node the original page is from and save this + * information to khugepaged_node_load[]. + * Khupaged will allocate hugepage from the node has the max + * hit record. */ - if (node == NUMA_NO_NODE) - node = page_to_nid(page); + node = page_to_nid(page); + if (khugepaged_scan_abort(node)) + goto out_unmap; + khugepaged_node_load[node]++; VM_BUG_ON(PageCompound(page)); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; @@ -2491,9 +2554,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, ret = 1; out_unmap: pte_unmap_unlock(pte, ptl); - if (ret) + if (ret) { + node = khugepaged_find_target_node(); /* collapse_huge_page will return with the mmap_sem released */ collapse_huge_page(mm, address, hpage, vma, node); + } out: return ret; } @@ -2801,12 +2866,22 @@ void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, static void split_huge_page_address(struct mm_struct *mm, unsigned long address) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); - pmd = mm_find_pmd(mm, address); - if (!pmd) + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) return; /* * Caller holds the mmap_sem write mode, so a huge pmd cannot diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f0a4ca4..c33d8a6 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -574,7 +574,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, goto err; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = huge_zonelist(vma, address, htlb_alloc_mask(h), &mpol, &nodemask); @@ -596,7 +596,7 @@ retry_cpuset: } mpol_cond_put(mpol); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; @@ -1177,6 +1177,7 @@ static void return_unused_surplus_pages(struct hstate *h, while (nr_pages--) { if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) break; + cond_resched_lock(&hugetlb_lock); } } @@ -1552,6 +1553,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, while (min_count < persistent_huge_pages(h)) { if (!free_pool_huge_page(h, nodes_allowed, 0)) break; + cond_resched_lock(&hugetlb_lock); } while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) @@ -2112,6 +2114,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy, unsigned long tmp; int ret; + if (!hugepages_supported()) + return -ENOTSUPP; + tmp = h->max_huge_pages; if (write && h->order >= MAX_ORDER) @@ -2165,6 +2170,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, unsigned long tmp; int ret; + if (!hugepages_supported()) + return -ENOTSUPP; + tmp = h->nr_overcommit_huge_pages; if (write && h->order >= MAX_ORDER) @@ -2190,6 +2198,8 @@ out: void hugetlb_report_meminfo(struct seq_file *m) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return; seq_printf(m, "HugePages_Total: %5lu\n" "HugePages_Free: %5lu\n" @@ -2206,6 +2216,8 @@ void hugetlb_report_meminfo(struct seq_file *m) int hugetlb_report_node_meminfo(int nid, char *buf) { struct hstate *h = &default_hstate; + if (!hugepages_supported()) + return 0; return sprintf(buf, "Node %d HugePages_Total: %5u\n" "Node %d HugePages_Free: %5u\n" @@ -2220,6 +2232,9 @@ void hugetlb_show_meminfo(void) struct hstate *h; int nid; + if (!hugepages_supported()) + return; + for_each_node_state(nid, N_MEMORY) for_each_hstate(h) pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", @@ -2379,6 +2394,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, update_mmu_cache(vma, address, ptep); } +static int is_hugetlb_entry_migration(pte_t pte) +{ + swp_entry_t swp; + + if (huge_pte_none(pte) || pte_present(pte)) + return 0; + swp = pte_to_swp_entry(pte); + if (non_swap_entry(swp) && is_migration_entry(swp)) + return 1; + else + return 0; +} + +static int is_hugetlb_entry_hwpoisoned(pte_t pte) +{ + swp_entry_t swp; + + if (huge_pte_none(pte) || pte_present(pte)) + return 0; + swp = pte_to_swp_entry(pte); + if (non_swap_entry(swp) && is_hwpoison_entry(swp)) + return 1; + else + return 0; +} int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) @@ -2406,7 +2446,24 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_lock(&dst->page_table_lock); spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); - if (!huge_pte_none(huge_ptep_get(src_pte))) { + entry = huge_ptep_get(src_pte); + if (huge_pte_none(entry)) { /* skip none entry */ + ; + } else if (unlikely(is_hugetlb_entry_migration(entry) || + is_hugetlb_entry_hwpoisoned(entry))) { + swp_entry_t swp_entry = pte_to_swp_entry(entry); + + if (is_write_migration_entry(swp_entry) && cow) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&swp_entry); + entry = swp_entry_to_pte(swp_entry); + set_huge_pte_at(src, addr, src_pte, entry); + } + set_huge_pte_at(dst, addr, dst_pte, entry); + } else { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); entry = huge_ptep_get(src_pte); @@ -2424,32 +2481,6 @@ nomem: return -ENOMEM; } -static int is_hugetlb_entry_migration(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return 0; - swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_migration_entry(swp)) - return 1; - else - return 0; -} - -static int is_hugetlb_entry_hwpoisoned(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return 0; - swp = pte_to_swp_entry(pte); - if (non_swap_entry(swp) && is_hwpoison_entry(swp)) - return 1; - else - return 0; -} - void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) diff --git a/mm/internal.h b/mm/internal.h index 8b6cfd6..d610f7c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -11,6 +11,7 @@ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H +#include <linux/fs.h> #include <linux/mm.h> void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, @@ -21,6 +22,20 @@ static inline void set_page_count(struct page *page, int v) atomic_set(&page->_count, v); } +extern int __do_page_cache_readahead(struct address_space *mapping, + struct file *filp, pgoff_t offset, unsigned long nr_to_read, + unsigned long lookahead_size); + +/* + * Submit IO for the read-ahead request in file_ra_state. + */ +static inline unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) +{ + return __do_page_cache_readahead(mapping, filp, + ra->start, ra->size, ra->async_size); +} + /* * Turn a non-refcounted page (->_count == 0) into refcounted with * a count of one. @@ -120,7 +135,7 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ + enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ bool finished_update_free; /* True when the zone cached pfns are * no longer being updated @@ -130,7 +145,10 @@ struct compact_control { int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - bool contended; /* True if a lock was contended */ + bool contended; /* True if a lock was contended, or + * need_resched() true during async + * compaction + */ }; unsigned long @@ -369,5 +387,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ +#define ALLOC_FAIR 0x100 /* fair zone allocation */ #endif /* __MM_INTERNAL_H */ diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e126b0e..31f01c5 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -753,7 +753,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) } spin_lock_irqsave(&object->lock, flags); - if (ptr + size > object->pointer + object->size) { + if (size == SIZE_MAX) { + size = object->pointer + object->size - ptr; + } else if (ptr + size > object->pointer + object->size) { kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr); dump_object_info(object); kmem_cache_free(scan_area_cache, area); @@ -945,7 +945,6 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, pmd = mm_find_pmd(mm, addr); if (!pmd) goto out; - BUG_ON(pmd_trans_huge(*pmd)); mmun_start = addr; mmun_end = addr + PAGE_SIZE; diff --git a/mm/madvise.c b/mm/madvise.c index 539eeb9..a402f8f 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -195,7 +195,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, for (; start < end; start += PAGE_SIZE) { index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - page = find_get_page(mapping, index); + page = find_get_entry(mapping, index); if (!radix_tree_exceptional_entry(page)) { if (page) page_cache_release(page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bc16ebc..003521e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -250,6 +250,9 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; + /* css_online() has been completed */ + int initialized; + /* * the counter to account for mem+swap usage. */ @@ -1089,9 +1092,23 @@ skip_node: * skipping css reference should be safe. */ if (next_css) { - if ((next_css == &root->css) || - ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) - return mem_cgroup_from_css(next_css); + struct mem_cgroup *memcg = mem_cgroup_from_css(next_css); + + if (next_css == &root->css) + return memcg; + + if (css_tryget(next_css)) { + if (memcg->initialized) { + /* + * Make sure the memcg is initialized: + * mem_cgroup_css_online() orders the the + * initialization against setting the flag. + */ + smp_rmb(); + return memcg; + } + css_put(next_css); + } prev_css = next_css; goto skip_node; @@ -1820,13 +1837,18 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, break; }; points = oom_badness(task, memcg, NULL, totalpages); - if (points > chosen_points) { - if (chosen) - put_task_struct(chosen); - chosen = task; - chosen_points = points; - get_task_struct(chosen); - } + if (!points || points < chosen_points) + continue; + /* Prefer thread group leaders for display purposes */ + if (points == chosen_points && + thread_group_leader(chosen)) + continue; + + if (chosen) + put_task_struct(chosen); + chosen = task; + chosen_points = points; + get_task_struct(chosen); } css_task_iter_end(&it); } @@ -5643,8 +5665,12 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) { struct mem_cgroup_eventfd_list *ev; + spin_lock(&memcg_oom_lock); + list_for_each_entry(ev, &memcg->oom_notify, list) eventfd_signal(ev->eventfd, 1); + + spin_unlock(&memcg_oom_lock); return 0; } @@ -6322,6 +6348,16 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) error = memcg_init_kmem(memcg, &mem_cgroup_subsys); mutex_unlock(&memcg_create_mutex); + + if (!error) { + /* + * Make sure the memcg is initialized: mem_cgroup_iter() + * orders reading memcg->initialized against its callers + * reading the memcg members. + */ + smp_wmb(); + memcg->initialized = 1; + } return error; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 56ad540..6b6c906 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -208,9 +208,9 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, #endif si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; - if ((flags & MF_ACTION_REQUIRED) && t == current) { + if ((flags & MF_ACTION_REQUIRED) && t->mm == current->mm) { si.si_code = BUS_MCEERR_AR; - ret = force_sig_info(SIGBUS, &si, t); + ret = force_sig_info(SIGBUS, &si, current); } else { /* * Don't use force here, it's convenient if the signal @@ -384,20 +384,51 @@ static void kill_procs(struct list_head *to_kill, int forcekill, int trapno, } } -static int task_early_kill(struct task_struct *tsk) +/* + * Find a dedicated thread which is supposed to handle SIGBUS(BUS_MCEERR_AO) + * on behalf of the thread group. Return task_struct of the (first found) + * dedicated thread if found, and return NULL otherwise. + * + * We already hold read_lock(&tasklist_lock) in the caller, so we don't + * have to call rcu_read_lock/unlock() in this function. + */ +static struct task_struct *find_early_kill_thread(struct task_struct *tsk) { + struct task_struct *t; + + for_each_thread(tsk, t) + if ((t->flags & PF_MCE_PROCESS) && (t->flags & PF_MCE_EARLY)) + return t; + return NULL; +} + +/* + * Determine whether a given process is "early kill" process which expects + * to be signaled when some page under the process is hwpoisoned. + * Return task_struct of the dedicated thread (main thread unless explicitly + * specified) if the process is "early kill," and otherwise returns NULL. + */ +static struct task_struct *task_early_kill(struct task_struct *tsk, + int force_early) +{ + struct task_struct *t; if (!tsk->mm) - return 0; - if (tsk->flags & PF_MCE_PROCESS) - return !!(tsk->flags & PF_MCE_EARLY); - return sysctl_memory_failure_early_kill; + return NULL; + if (force_early) + return tsk; + t = find_early_kill_thread(tsk); + if (t) + return t; + if (sysctl_memory_failure_early_kill) + return tsk; + return NULL; } /* * Collect processes when the error hit an anonymous page. */ static void collect_procs_anon(struct page *page, struct list_head *to_kill, - struct to_kill **tkc) + struct to_kill **tkc, int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -412,16 +443,17 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, read_lock(&tasklist_lock); for_each_process (tsk) { struct anon_vma_chain *vmac; + struct task_struct *t = task_early_kill(tsk, force_early); - if (!task_early_kill(tsk)) + if (!t) continue; anon_vma_interval_tree_foreach(vmac, &av->rb_root, pgoff, pgoff) { vma = vmac->vma; if (!page_mapped_in_vma(page, vma)) continue; - if (vma->vm_mm == tsk->mm) - add_to_kill(tsk, page, vma, to_kill, tkc); + if (vma->vm_mm == t->mm) + add_to_kill(t, page, vma, to_kill, tkc); } } read_unlock(&tasklist_lock); @@ -432,7 +464,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, * Collect processes when the error hit a file mapped page. */ static void collect_procs_file(struct page *page, struct list_head *to_kill, - struct to_kill **tkc) + struct to_kill **tkc, int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -442,10 +474,10 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, read_lock(&tasklist_lock); for_each_process(tsk) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct task_struct *t = task_early_kill(tsk, force_early); - if (!task_early_kill(tsk)) + if (!t) continue; - vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { /* @@ -455,8 +487,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, * Assume applications who requested early kill want * to be informed of all such data corruptions. */ - if (vma->vm_mm == tsk->mm) - add_to_kill(tsk, page, vma, to_kill, tkc); + if (vma->vm_mm == t->mm) + add_to_kill(t, page, vma, to_kill, tkc); } } read_unlock(&tasklist_lock); @@ -469,7 +501,8 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, * First preallocate one tokill structure outside the spin locks, * so that we can kill at least one process reasonably reliable. */ -static void collect_procs(struct page *page, struct list_head *tokill) +static void collect_procs(struct page *page, struct list_head *tokill, + int force_early) { struct to_kill *tk; @@ -480,9 +513,9 @@ static void collect_procs(struct page *page, struct list_head *tokill) if (!tk) return; if (PageAnon(page)) - collect_procs_anon(page, tokill, &tk); + collect_procs_anon(page, tokill, &tk, force_early); else - collect_procs_file(page, tokill, &tk); + collect_procs_file(page, tokill, &tk, force_early); kfree(tk); } @@ -967,7 +1000,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * there's nothing that can be done. */ if (kill) - collect_procs(ppage, &tokill); + collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED); ret = try_to_unmap(ppage, ttu); if (ret != SWAP_SUCCESS) @@ -1085,15 +1118,16 @@ int memory_failure(unsigned long pfn, int trapno, int flags) return 0; } else if (PageHuge(hpage)) { /* - * Check "just unpoisoned", "filter hit", and - * "race with other subpage." + * Check "filter hit" and "race with other subpage." */ lock_page(hpage); - if (!PageHWPoison(hpage) - || (hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != hpage && TestSetPageHWPoison(hpage))) { - atomic_long_sub(nr_pages, &num_poisoned_pages); - return 0; + if (PageHWPoison(hpage)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != hpage && TestSetPageHWPoison(hpage))) { + atomic_long_sub(nr_pages, &num_poisoned_pages); + unlock_page(hpage); + return 0; + } } set_page_hwpoison_huge_page(hpage); res = dequeue_hwpoisoned_huge_page(hpage); @@ -1156,6 +1190,8 @@ int memory_failure(unsigned long pfn, int trapno, int flags) */ if (!PageHWPoison(p)) { printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn); + atomic_long_sub(nr_pages, &num_poisoned_pages); + put_page(hpage); res = 0; goto out; } @@ -1518,7 +1554,7 @@ static int soft_offline_huge_page(struct page *page, int flags) /* Keep page count to indicate a given hugepage is isolated. */ list_move(&hpage->lru, &pagelist); - ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, + ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", @@ -1599,7 +1635,7 @@ static int __soft_offline_page(struct page *page, int flags) inc_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); list_add(&page->lru, &pagelist); - ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, + ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { putback_lru_pages(&pagelist); diff --git a/mm/memory.c b/mm/memory.c index 0dcdc84..068dc06 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -808,20 +808,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (!pte_file(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - if (likely(!non_swap_entry(entry))) + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } rss[MM_SWAPENTS]++; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); if (PageAnon(page)) @@ -878,7 +878,7 @@ out_set_pte: return 0; } -int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { @@ -1929,12 +1929,17 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, unsigned long address, unsigned int fault_flags) { struct vm_area_struct *vma; + vm_flags_t vm_flags; int ret; vma = find_extend_vma(mm, address); if (!vma || address < vma->vm_start) return -EFAULT; + vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ; + if (!(vm_flags & vma->vm_flags)) + return -EFAULT; + ret = handle_mm_fault(mm, vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_OOM) @@ -3189,7 +3194,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (prev && prev->vm_end == address) return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_downwards(vma, address - PAGE_SIZE); + return expand_downwards(vma, address - PAGE_SIZE); } if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { struct vm_area_struct *next = vma->vm_next; @@ -3198,7 +3203,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (next && next->vm_start == address + PAGE_SIZE) return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - expand_upwards(vma, address + PAGE_SIZE); + return expand_upwards(vma, address + PAGE_SIZE); } return 0; } @@ -3693,7 +3698,7 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = *pte; + entry = ACCESS_ONCE(*pte); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ed85fe3..d317305 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1321,7 +1321,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) * alloc_migrate_target should be improooooved!! * migrate_pages returns # of failed pages. */ - ret = migrate_pages(&source, alloc_migrate_target, 0, + ret = migrate_pages(&source, alloc_migrate_target, NULL, 0, MIGRATE_SYNC, MR_MEMORY_HOTPLUG); if (ret) putback_movable_pages(&source); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 927a69c..3650036 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -525,9 +525,13 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, #ifdef CONFIG_HUGETLB_PAGE int nid; struct page *page; + pte_t entry; spin_lock(&vma->vm_mm->page_table_lock); - page = pte_page(huge_ptep_get((pte_t *)pmd)); + entry = huge_ptep_get((pte_t *)pmd); + if (!pte_present(entry)) + goto unlock; + page = pte_page(entry); nid = page_to_nid(page); if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) goto unlock; @@ -649,19 +653,18 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, * @nodes and @flags,) it's isolated and queued to the pagelist which is * passed via @private.) */ -static struct vm_area_struct * +static int queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, const nodemask_t *nodes, unsigned long flags, void *private) { - int err; - struct vm_area_struct *first, *vma, *prev; - + int err = 0; + struct vm_area_struct *vma, *prev; - first = find_vma(mm, start); - if (!first) - return ERR_PTR(-EFAULT); + vma = find_vma(mm, start); + if (!vma) + return -EFAULT; prev = NULL; - for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { + for (; vma && vma->vm_start < end; vma = vma->vm_next) { unsigned long endvma = vma->vm_end; if (endvma > end) @@ -671,9 +674,9 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, if (!(flags & MPOL_MF_DISCONTIG_OK)) { if (!vma->vm_next && vma->vm_end < end) - return ERR_PTR(-EFAULT); + return -EFAULT; if (prev && prev->vm_end < vma->vm_start) - return ERR_PTR(-EFAULT); + return -EFAULT; } if (flags & MPOL_MF_LAZY) { @@ -687,15 +690,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, err = queue_pages_pgd_range(vma, start, endvma, nodes, flags, private); - if (err) { - first = ERR_PTR(err); + if (err) break; - } } next: prev = vma; } - return first; + return err; } /* @@ -1059,7 +1060,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest, flags | MPOL_MF_DISCONTIG_OK, &pagelist); if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, new_node_page, dest, + err = migrate_pages(&pagelist, new_node_page, NULL, dest, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(&pagelist); @@ -1180,30 +1181,31 @@ out: /* * Allocate a new page for page migration based on vma policy. - * Start assuming that page is mapped by vma pointed to by @private. + * Start by assuming the page is mapped by the same vma as contains @start. * Search forward from there, if not. N.B., this assumes that the * list of pages handed to migrate_pages()--which is how we get here-- * is in virtual address order. */ -static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +static struct page *new_page(struct page *page, unsigned long start, int **x) { - struct vm_area_struct *vma = (struct vm_area_struct *)private; + struct vm_area_struct *vma; unsigned long uninitialized_var(address); + vma = find_vma(current->mm, start); while (vma) { address = page_address_in_vma(page, vma); if (address != -EFAULT) break; vma = vma->vm_next; } - /* - * queue_pages_range() confirms that @page belongs to some vma, - * so vma shouldn't be NULL. - */ - BUG_ON(!vma); - if (PageHuge(page)) + if (PageHuge(page)) { + BUG_ON(!vma); return alloc_huge_page_noerr(vma, address, 1); + } + /* + * if !vma, alloc_page_vma() will use task or system default policy + */ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); } #else @@ -1219,7 +1221,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, return -ENOSYS; } -static struct page *new_vma_page(struct page *page, unsigned long private, int **x) +static struct page *new_page(struct page *page, unsigned long start, int **x) { return NULL; } @@ -1229,7 +1231,6 @@ static long do_mbind(unsigned long start, unsigned long len, unsigned short mode, unsigned short mode_flags, nodemask_t *nmask, unsigned long flags) { - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; struct mempolicy *new; unsigned long end; @@ -1295,11 +1296,9 @@ static long do_mbind(unsigned long start, unsigned long len, if (err) goto mpol_out; - vma = queue_pages_range(mm, start, end, nmask, + err = queue_pages_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); - - err = PTR_ERR(vma); /* maybe ... */ - if (!IS_ERR(vma)) + if (!err) err = mbind_range(mm, start, end, new); if (!err) { @@ -1307,9 +1306,8 @@ static long do_mbind(unsigned long start, unsigned long len, if (!list_empty(&pagelist)) { WARN_ON_ONCE(flags & MPOL_MF_LAZY); - nr_failed = migrate_pages(&pagelist, new_vma_page, - (unsigned long)vma, - MIGRATE_SYNC, MR_MEMPOLICY_MBIND); + nr_failed = migrate_pages(&pagelist, new_page, NULL, + start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); if (nr_failed) putback_movable_pages(&pagelist); } @@ -1875,7 +1873,7 @@ int node_random(const nodemask_t *maskp) * If the effective policy is 'BIND, returns a pointer to the mempolicy's * @nodemask for filtering the zonelist. * - * Must be protected by get_mems_allowed() + * Must be protected by read_mems_allowed_begin() */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, @@ -2039,7 +2037,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, retry_cpuset: pol = get_vma_policy(current, vma, addr); - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); if (unlikely(pol->mode == MPOL_INTERLEAVE)) { unsigned nid; @@ -2047,7 +2045,7 @@ retry_cpuset: nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); mpol_cond_put(pol); page = alloc_page_interleave(gfp, order, nid); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; @@ -2057,7 +2055,7 @@ retry_cpuset: policy_nodemask(gfp, pol)); if (unlikely(mpol_needs_cond_ref(pol))) __mpol_put(pol); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; } @@ -2091,7 +2089,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) pol = &default_policy; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); /* * No reference counting needed for current->mempolicy @@ -2104,7 +2102,7 @@ retry_cpuset: policy_zonelist(gfp, pol, numa_node_id()), policy_nodemask(gfp, pol)); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; @@ -2148,7 +2146,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) } else *new = *old; - rcu_read_lock(); if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); if (new->flags & MPOL_F_REBINDING) @@ -2156,7 +2153,6 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) else mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); } - rcu_read_unlock(); atomic_set(&new->refcnt, 1); return new; } diff --git a/mm/migrate.c b/mm/migrate.c index e3cf71d..fac5fa0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -136,8 +136,6 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, pmd = mm_find_pmd(mm, addr); if (!pmd) goto out; - if (pmd_trans_huge(*pmd)) - goto out; ptep = pte_offset_map(pmd, addr); @@ -164,8 +162,11 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); if (pte_swp_soft_dirty(*ptep)) pte = pte_mksoft_dirty(pte); + + /* Recheck VMA as permissions can change since migration started */ if (is_write_migration_entry(entry)) - pte = pte_mkwrite(pte); + pte = maybe_mkwrite(pte, vma); + #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { pte = pte_mkhuge(pte); @@ -867,8 +868,9 @@ out: * Obtain the lock on page, remove all ptes and migrate the page * to the newly allocated page in newpage. */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, enum migrate_mode mode) +static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, + unsigned long private, struct page *page, int force, + enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -912,11 +914,18 @@ out: page_is_file_cache(page)); putback_lru_page(page); } + /* - * Move the new page to the LRU. If migration was not successful - * then this will free the page. + * If migration was not successful and there's a freeing callback, use + * it. Otherwise, putback_lru_page() will drop the reference grabbed + * during isolation. */ - putback_lru_page(newpage); + if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { + ClearPageSwapBacked(newpage); + put_new_page(newpage, private); + } else + putback_lru_page(newpage); + if (result) { if (rc) *result = rc; @@ -945,8 +954,9 @@ out: * will wait in the page fault for migration to complete. */ static int unmap_and_move_huge_page(new_page_t get_new_page, - unsigned long private, struct page *hpage, - int force, enum migrate_mode mode) + free_page_t put_new_page, unsigned long private, + struct page *hpage, int force, + enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -982,20 +992,30 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (!page_mapped(hpage)) rc = move_to_new_page(new_hpage, hpage, 1, mode); - if (rc) + if (rc != MIGRATEPAGE_SUCCESS) remove_migration_ptes(hpage, hpage); if (anon_vma) put_anon_vma(anon_vma); - if (!rc) + if (rc == MIGRATEPAGE_SUCCESS) hugetlb_cgroup_migrate(hpage, new_hpage); unlock_page(hpage); out: if (rc != -EAGAIN) putback_active_hugepage(hpage); - put_page(new_hpage); + + /* + * If migration was not successful and there's a freeing callback, use + * it. Otherwise, put_page() will drop the reference grabbed during + * isolation. + */ + if (rc != MIGRATEPAGE_SUCCESS && put_new_page) + put_new_page(new_hpage, private); + else + put_page(new_hpage); + if (result) { if (rc) *result = rc; @@ -1012,6 +1032,8 @@ out: * @from: The list of pages to be migrated. * @get_new_page: The function used to allocate free pages to be used * as the target of the page migration. + * @put_new_page: The function used to free target pages if migration + * fails, or NULL if no special handling is necessary. * @private: Private data to be passed on to get_new_page() * @mode: The migration mode that specifies the constraints for * page migration, if any. @@ -1025,7 +1047,8 @@ out: * Returns the number of pages that were not migrated, or an error code. */ int migrate_pages(struct list_head *from, new_page_t get_new_page, - unsigned long private, enum migrate_mode mode, int reason) + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason) { int retry = 1; int nr_failed = 0; @@ -1047,10 +1070,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, if (PageHuge(page)) rc = unmap_and_move_huge_page(get_new_page, - private, page, pass > 2, mode); + put_new_page, private, page, + pass > 2, mode); else - rc = unmap_and_move(get_new_page, private, - page, pass > 2, mode); + rc = unmap_and_move(get_new_page, put_new_page, + private, page, pass > 2, mode); switch(rc) { case -ENOMEM: @@ -1194,7 +1218,7 @@ set_status: err = 0; if (!list_empty(&pagelist)) { - err = migrate_pages(&pagelist, new_page_node, + err = migrate_pages(&pagelist, new_page_node, NULL, (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL); if (err) putback_movable_pages(&pagelist); @@ -1643,7 +1667,8 @@ int migrate_misplaced_page(struct page *page, int node) list_add(&page->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, - node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); + NULL, node, MIGRATE_ASYNC, + MR_NUMA_MISPLACED); if (nr_remaining) { putback_lru_pages(&migratepages); isolated = 0; diff --git a/mm/mincore.c b/mm/mincore.c index da2be56..06cb810 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -70,13 +70,21 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) * any other file mapping (ie. marked !present and faulted in with * tmpfs's .fault). So swapped out tmpfs mappings are tested here. */ - page = find_get_page(mapping, pgoff); #ifdef CONFIG_SWAP - /* shmem/tmpfs may return swap: account for swapcache page too. */ - if (radix_tree_exceptional_entry(page)) { - swp_entry_t swap = radix_to_swp_entry(page); - page = find_get_page(swap_address_space(swap), swap.val); - } + if (shmem_mapping(mapping)) { + page = find_get_entry(mapping, pgoff); + /* + * shmem/tmpfs may return swap: account for swapcache + * page too. + */ + if (radix_tree_exceptional_entry(page)) { + swp_entry_t swp = radix_to_swp_entry(page); + page = find_get_page(swap_address_space(swp), swp.val); + } + } else + page = find_get_page(mapping, pgoff); +#else + page = find_get_page(mapping, pgoff); #endif if (page) { present = PageUptodate(page); @@ -79,6 +79,7 @@ void clear_page_mlock(struct page *page) */ void mlock_vma_page(struct page *page) { + /* Serialize with page migration */ BUG_ON(!PageLocked(page)); if (!TestSetPageMlocked(page)) { @@ -153,6 +154,7 @@ unsigned int munlock_vma_page(struct page *page) { unsigned int nr_pages; + /* For try_to_munlock() and to serialize with page migration */ BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/shm.h> #include <linux/mman.h> #include <linux/pagemap.h> @@ -682,8 +683,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, prev->vm_next = next = vma->vm_next; if (next) next->vm_prev = prev; - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -744,8 +746,11 @@ again: remove_next = 1 + (end > next->vm_end); * shrinking vma had, to cover any anon pages imported. */ if (exporter && exporter->anon_vma && !importer->anon_vma) { - if (anon_vma_clone(importer, exporter)) - return -ENOMEM; + int error; + + error = anon_vma_clone(importer, exporter); + if (error) + return error; importer->anon_vma = exporter->anon_vma; } } @@ -1980,34 +1985,33 @@ EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = NULL; + struct rb_node *rb_node; + struct vm_area_struct *vma; /* Check the cache first. */ - /* (Cache hit rate is typically around 35%.) */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { - struct rb_node *rb_node; + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; - rb_node = mm->mm_rb.rb_node; - vma = NULL; + rb_node = mm->mm_rb.rb_node; + vma = NULL; - while (rb_node) { - struct vm_area_struct *vma_tmp; - - vma_tmp = rb_entry(rb_node, - struct vm_area_struct, vm_rb); - - if (vma_tmp->vm_end > addr) { - vma = vma_tmp; - if (vma_tmp->vm_start <= addr) - break; - rb_node = rb_node->rb_left; - } else - rb_node = rb_node->rb_right; - } - if (vma) - mm->mmap_cache = vma; + while (rb_node) { + struct vm_area_struct *tmp; + + tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); + + if (tmp->vm_end > addr) { + vma = tmp; + if (tmp->vm_start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; } + + if (vma) + vmacache_update(addr, vma); return vma; } @@ -2045,14 +2049,17 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; - unsigned long new_start; + unsigned long new_start, actual_size; /* address space limit tests */ if (!may_expand_vm(mm, grow)) return -ENOMEM; /* Stack limit test */ - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + actual_size = size; + if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) + actual_size -= PAGE_SIZE; + if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ @@ -2379,7 +2386,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } else mm->highest_vm_end = prev ? prev->vm_end : 0; tail_vma->vm_next = NULL; - mm->mmap_cache = NULL; /* Kill the cache. */ + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -2416,7 +2425,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, if (err) goto out_free_vma; - if (anon_vma_clone(new, vma)) + err = anon_vma_clone(new, vma); + if (err) goto out_free_mpol; if (new->vm_file) diff --git a/mm/mremap.c b/mm/mremap.c index 0843feb..05f1180 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -194,10 +194,17 @@ unsigned long move_page_tables(struct vm_area_struct *vma, break; if (pmd_trans_huge(*old_pmd)) { int err = 0; - if (extent == HPAGE_PMD_SIZE) + if (extent == HPAGE_PMD_SIZE) { + VM_BUG_ON(vma->vm_file || !vma->anon_vma); + /* See comment in move_ptes() */ + if (need_rmap_locks) + anon_vma_lock_write(vma->anon_vma); err = move_huge_pmd(vma, new_vma, old_addr, new_addr, old_end, old_pmd, new_pmd); + if (need_rmap_locks) + anon_vma_unlock_write(vma->anon_vma); + } if (err > 0) { need_flush = true; continue; @@ -15,6 +15,7 @@ #include <linux/export.h> #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/file.h> @@ -767,16 +768,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) */ static void delete_vma_from_mm(struct vm_area_struct *vma) { + int i; struct address_space *mapping; struct mm_struct *mm = vma->vm_mm; + struct task_struct *curr = current; kenter("%p", vma); protect_vma(vma, 0); mm->map_count--; - if (mm->mmap_cache == vma) - mm->mmap_cache = NULL; + for (i = 0; i < VMACACHE_SIZE; i++) { + /* if the vma is cached, invalidate the entire cache */ + if (curr->vmacache[i] == vma) { + vmacache_invalidate(curr->mm); + break; + } + } /* remove the VMA from the mapping */ if (vma->vm_file) { @@ -824,8 +832,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma; /* check the cache first */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (vma && vma->vm_start <= addr && vma->vm_end > addr) + vma = vmacache_find(mm, addr); + if (likely(vma)) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -834,7 +842,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) if (vma->vm_start > addr) return NULL; if (vma->vm_end > addr) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } @@ -873,8 +881,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, unsigned long end = addr + len; /* check the cache first */ - vma = mm->mmap_cache; - if (vma && vma->vm_start == addr && vma->vm_end == end) + vma = vmacache_find_exact(mm, addr, end); + if (vma) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -885,7 +893,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, if (vma->vm_start > addr) return NULL; if (vma->vm_end == end) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index e73f01c..712a0f8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -47,19 +47,21 @@ static DEFINE_SPINLOCK(zone_scan_lock); #ifdef CONFIG_NUMA /** * has_intersects_mems_allowed() - check task eligiblity for kill - * @tsk: task struct of which task to consider + * @start: task struct of which task to consider * @mask: nodemask passed to page allocator for mempolicy ooms * * Task eligibility is determined by whether or not a candidate task, @tsk, * shares the same mempolicy nodes as current if it is bound by such a policy * and whether or not it has the same set of allowed cpuset nodes. */ -static bool has_intersects_mems_allowed(struct task_struct *tsk, +static bool has_intersects_mems_allowed(struct task_struct *start, const nodemask_t *mask) { - struct task_struct *start = tsk; + struct task_struct *tsk; + bool ret = false; - do { + rcu_read_lock(); + for_each_thread(start, tsk) { if (mask) { /* * If this is a mempolicy constrained oom, tsk's @@ -67,19 +69,20 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk, * mempolicy intersects current, otherwise it may be * needlessly killed. */ - if (mempolicy_nodemask_intersects(tsk, mask)) - return true; + ret = mempolicy_nodemask_intersects(tsk, mask); } else { /* * This is not a mempolicy constrained oom, so only * check the mems of tsk's cpuset. */ - if (cpuset_mems_allowed_intersects(current, tsk)) - return true; + ret = cpuset_mems_allowed_intersects(current, tsk); } - } while_each_thread(start, tsk); + if (ret) + break; + } + rcu_read_unlock(); - return false; + return ret; } #else static bool has_intersects_mems_allowed(struct task_struct *tsk, @@ -97,16 +100,21 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk, */ struct task_struct *find_lock_task_mm(struct task_struct *p) { - struct task_struct *t = p; + struct task_struct *t; - do { + rcu_read_lock(); + + for_each_thread(p, t) { task_lock(t); if (likely(t->mm)) - return t; + goto found; task_unlock(t); - } while_each_thread(p, t); + } + t = NULL; +found: + rcu_read_unlock(); - return NULL; + return t; } /* return true if the task is not adequate as candidate victim task. */ @@ -301,7 +309,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, unsigned long chosen_points = 0; rcu_read_lock(); - do_each_thread(g, p) { + for_each_process_thread(g, p) { unsigned int points; switch (oom_scan_process_thread(p, totalpages, nodemask, @@ -319,11 +327,15 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, break; }; points = oom_badness(p, NULL, nodemask, totalpages); - if (points > chosen_points) { - chosen = p; - chosen_points = points; - } - } while_each_thread(g, p); + if (!points || points < chosen_points) + continue; + /* Prefer thread group leaders for display purposes */ + if (points == chosen_points && thread_group_leader(chosen)) + continue; + + chosen = p; + chosen_points = points; + } if (chosen) get_task_struct(chosen); rcu_read_unlock(); @@ -394,6 +406,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, dump_tasks(memcg, nodemask); } +/* + * Number of OOM killer invocations (including memcg OOM killer). + * Primarily used by PM freezer to check for potential races with + * OOM killed frozen task. + */ +static atomic_t oom_kills = ATOMIC_INIT(0); + +int oom_kills_count(void) +{ + return atomic_read(&oom_kills); +} + +void note_oom_kill(void) +{ + atomic_inc(&oom_kills); +} + #define K(x) ((x) << (PAGE_SHIFT-10)) /* * Must be called while holding a reference to p, which will be released upon @@ -406,7 +435,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, { struct task_struct *victim = p; struct task_struct *child; - struct task_struct *t = p; + struct task_struct *t; struct mm_struct *mm; unsigned int victim_points = 0; static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, @@ -437,7 +466,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * still freeing memory. */ read_lock(&tasklist_lock); - do { + for_each_thread(p, t) { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -455,13 +484,11 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, get_task_struct(victim); } } - } while_each_thread(p, t); + } read_unlock(&tasklist_lock); - rcu_read_lock(); p = find_lock_task_mm(victim); if (!p) { - rcu_read_unlock(); put_task_struct(victim); return; } else if (victim != p) { @@ -487,6 +514,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * That thread will now get access to memory reserves since it has a * pending fatal signal. */ + rcu_read_lock(); for_each_process(p) if (p->mm == mm && !same_thread_group(p, victim) && !(p->flags & PF_KTHREAD)) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7106cb1..9f45f87 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty) * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) * => fast response on large errors; small oscillation near setpoint */ -static inline long long pos_ratio_polynom(unsigned long setpoint, +static long long pos_ratio_polynom(unsigned long setpoint, unsigned long dirty, unsigned long limit) { long long pos_ratio; long x; - x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, + x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, limit - setpoint + 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, x_intercept = bdi_setpoint + span; if (bdi_dirty < x_intercept - span / 4) { - pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty), + pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), x_intercept - bdi_setpoint + 1); } else pos_ratio /= 4; @@ -1324,9 +1324,9 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi, *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); if (bdi_bg_thresh) - *bdi_bg_thresh = div_u64((u64)*bdi_thresh * - background_thresh, - dirty_thresh); + *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh * + background_thresh, + dirty_thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need @@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page) return ret; } -int test_set_page_writeback(struct page *page) +int __test_set_page_writeback(struct page *page, bool keep_write) { struct address_space *mapping = page_mapping(page); int ret; @@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - radix_tree_tag_clear(&mapping->page_tree, - page_index(page), - PAGECACHE_TAG_TOWRITE); + if (!keep_write) + radix_tree_tag_clear(&mapping->page_tree, + page_index(page), + PAGECACHE_TAG_TOWRITE); spin_unlock_irqrestore(&mapping->tree_lock, flags); } else { ret = TestSetPageWriteback(page); @@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page) return ret; } -EXPORT_SYMBOL(test_set_page_writeback); +EXPORT_SYMBOL(__test_set_page_writeback); /* * Return true if any of the pages in the mapping are marked with the diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 36c40eb..4cf25ba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -70,6 +70,7 @@ /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); +#define MIN_PERCPU_PAGELIST_FRACTION (8) #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID DEFINE_PER_CPU(int, numa_node); @@ -417,7 +418,8 @@ static int destroy_compound_page(struct page *page, unsigned long order) return bad; } -static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) +static inline void prep_zero_page(struct page *page, unsigned int order, + gfp_t gfp_flags) { int i; @@ -461,7 +463,7 @@ static inline void set_page_guard_flag(struct page *page) { } static inline void clear_page_guard_flag(struct page *page) { } #endif -static inline void set_page_order(struct page *page, int order) +static inline void set_page_order(struct page *page, unsigned int order) { set_page_private(page, order); __SetPageBuddy(page); @@ -512,21 +514,31 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) * For recording page's order, we use page_private(page). */ static inline int page_is_buddy(struct page *page, struct page *buddy, - int order) + unsigned int order) { if (!pfn_valid_within(page_to_pfn(buddy))) return 0; - if (page_zone_id(page) != page_zone_id(buddy)) - return 0; - if (page_is_guard(buddy) && page_order(buddy) == order) { VM_BUG_ON(page_count(buddy) != 0); + + if (page_zone_id(page) != page_zone_id(buddy)) + return 0; + return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { VM_BUG_ON(page_count(buddy) != 0); + + /* + * zone check is done late to avoid uselessly + * calculating zone/node ids for pages that could + * never merge. + */ + if (page_zone_id(page) != page_zone_id(buddy)) + return 0; + return 1; } return 0; @@ -558,6 +570,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, */ static inline void __free_one_page(struct page *page, + unsigned long pfn, struct zone *zone, unsigned int order, int migratetype) { @@ -574,7 +587,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON(migratetype == -1); - page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); + page_idx = pfn & ((1 << MAX_ORDER) - 1); VM_BUG_ON(page_idx & ((1 << order) - 1)); VM_BUG_ON(bad_range(zone, page)); @@ -662,10 +675,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct list_head *list) { int to_free = count; + unsigned long nr_scanned; unsigned long flags; spin_lock_irqsave(&zone->lock, flags); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); while (!list_empty(list)) { struct page *page = list_first_entry(list, struct page, lru); @@ -676,7 +692,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, mt = get_freepage_migratetype(page); /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ - __free_one_page(page, zone, 0, mt); + __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); if (likely(!is_migrate_isolate_page(page))) { __mod_zone_page_state(zone, NR_FREE_PAGES, 1); @@ -732,15 +748,20 @@ static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src, } } -static void free_one_page(struct zone *zone, struct page *page, int order, +static void free_one_page(struct zone *zone, + struct page *page, unsigned long pfn, + unsigned int order, int migratetype) { + unsigned long nr_scanned; unsigned long flags; spin_lock_irqsave(&zone->lock, flags); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); - __free_one_page(page, zone, order, migratetype); + __free_one_page(page, pfn, zone, order, migratetype); if (unlikely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); spin_unlock_irqrestore(&zone->lock, flags); @@ -777,15 +798,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; int migratetype; + unsigned long pfn = page_to_pfn(page); if (!free_pages_prepare(page, order)) return; + migratetype = get_pfnblock_migratetype(page, pfn); local_lock_irqsave(pa_lock, flags); __count_vm_events(PGFREE, 1 << order); - migratetype = get_pageblock_migratetype(page); set_freepage_migratetype(page, migratetype); - free_one_page(page_zone(page), page, order, migratetype); + free_one_page(page_zone(page), page, pfn, order, migratetype); local_unlock_irqrestore(pa_lock, flags); } @@ -821,9 +843,21 @@ void __init init_cma_reserved_pageblock(struct page *page) set_page_count(p, 0); } while (++p, --i); - set_page_refcounted(page); set_pageblock_migratetype(page, MIGRATE_CMA); - __free_pages(page, pageblock_order); + + if (pageblock_order >= MAX_ORDER) { + i = pageblock_nr_pages; + p = page; + do { + set_page_refcounted(p); + __free_pages(p, MAX_ORDER - 1); + p += MAX_ORDER_NR_PAGES; + } while (i -= MAX_ORDER_NR_PAGES); + } else { + set_page_refcounted(page); + __free_pages(page, pageblock_order); + } + adjust_managed_page_count(page, pageblock_nr_pages); } #endif @@ -893,7 +927,7 @@ static inline int check_new_page(struct page *page) return 0; } -static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) { int i; @@ -942,6 +976,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, rmv_page_order(page); area->nr_free--; expand(zone, page, order, current_order, area, migratetype); + set_freepage_migratetype(page, migratetype); return page; } @@ -1066,6 +1101,12 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, { int current_order = page_order(page); + /* + * When borrowing from MIGRATE_CMA, we need to release the excess + * buddy pages to CMA itself. We also ensure the freepage_migratetype + * is set to CMA so it is returned to the correct freelist in case + * the page ends up being not actually allocated from the pcp lists. + */ if (is_migrate_cma(fallback_type)) return fallback_type; @@ -1097,16 +1138,17 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) +__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) { struct free_area *area; - int current_order; + unsigned int current_order; struct page *page; int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ - for (current_order = MAX_ORDER-1; current_order >= order; - --current_order) { + for (current_order = MAX_ORDER-1; + current_order >= order && current_order <= MAX_ORDER-1; + --current_order) { for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; @@ -1130,21 +1172,17 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) list_del(&page->lru); rmv_page_order(page); - /* - * Borrow the excess buddy pages as well, irrespective - * of whether we stole freepages, or took ownership of - * the pageblock or not. - * - * Exception: When borrowing from MIGRATE_CMA, release - * the excess buddy pages to CMA itself. - */ expand(zone, page, order, current_order, area, - is_migrate_cma(migratetype) - ? migratetype : start_migratetype); + new_type); + /* The freepage_migratetype may differ from pageblock's + * migratetype depending on the decisions in + * try_to_steal_freepages. This is OK as long as it does + * not differ for MIGRATE_CMA type. + */ + set_freepage_migratetype(page, new_type); - trace_mm_page_alloc_extfrag(page, order, - current_order, start_migratetype, migratetype, - new_type == start_migratetype); + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, migratetype, new_type); return page; } @@ -1190,9 +1228,9 @@ retry_reserve: */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, - int migratetype, int cold) + int migratetype, bool cold) { - int mt = migratetype, i; + int i; spin_lock(&zone->lock); for (i = 0; i < count; ++i) { @@ -1209,18 +1247,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * merge IO requests if the physical pages are ordered * properly. */ - if (likely(cold == 0)) + if (likely(!cold)) list_add(&page->lru, list); else list_add_tail(&page->lru, list); - if (IS_ENABLED(CONFIG_CMA)) { - mt = get_pageblock_migratetype(page); - if (!is_migrate_cma(mt) && !is_migrate_isolate(mt)) - mt = migratetype; - } - set_freepage_migratetype(page, mt); list = &page->lru; - if (is_migrate_cma(mt)) + if (is_migrate_cma(get_freepage_migratetype(page))) __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, -(1 << order)); } @@ -1258,15 +1290,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) local_unlock_irqrestore(pa_lock, flags); free_pcppages_bulk(zone, to_drain, &dst); } -static bool gfp_thisnode_allocation(gfp_t gfp_mask) -{ - return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; -} -#else -static bool gfp_thisnode_allocation(gfp_t gfp_mask) -{ - return false; -} #endif /* @@ -1365,7 +1388,7 @@ void mark_free_pages(struct zone *zone) { unsigned long pfn, max_zone_pfn; unsigned long flags; - int order, t; + unsigned int order, t; struct list_head *curr; if (zone_is_empty(zone)) @@ -1397,19 +1420,20 @@ void mark_free_pages(struct zone *zone) /* * Free a 0-order page - * cold == 1 ? free a cold page : free a hot page + * cold == true ? free a cold page : free a hot page */ -void free_hot_cold_page(struct page *page, int cold) +void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; + unsigned long pfn = page_to_pfn(page); int migratetype; if (!free_pages_prepare(page, 0)) return; - migratetype = get_pageblock_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); set_freepage_migratetype(page, migratetype); local_lock_irqsave(pa_lock, flags); __count_vm_event(PGFREE); @@ -1423,17 +1447,17 @@ void free_hot_cold_page(struct page *page, int cold) */ if (migratetype >= MIGRATE_PCPTYPES) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(zone, page, 0, migratetype); + free_one_page(zone, page, pfn, 0, migratetype); goto out; } migratetype = MIGRATE_MOVABLE; } pcp = &this_cpu_ptr(zone->pageset)->pcp; - if (cold) - list_add_tail(&page->lru, &pcp->lists[migratetype]); - else + if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); + else + list_add_tail(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = ACCESS_ONCE(pcp->batch); @@ -1453,7 +1477,7 @@ out: /* * Free a list of 0-order pages */ -void free_hot_cold_page_list(struct list_head *list, int cold) +void free_hot_cold_page_list(struct list_head *list, bool cold) { struct page *page, *next; @@ -1565,12 +1589,12 @@ int split_free_page(struct page *page) */ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, - struct zone *zone, int order, gfp_t gfp_flags, - int migratetype) + struct zone *zone, unsigned int order, + gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; - int cold = !!(gfp_flags & __GFP_COLD); + bool cold = ((gfp_flags & __GFP_COLD) != 0); again: if (likely(order == 0)) { @@ -1616,16 +1640,14 @@ again: goto failed; } __mod_zone_freepage_state(zone, -(1 << order), - get_pageblock_migratetype(page)); + get_freepage_migratetype(page)); spin_unlock(&zone->lock); } - /* - * NOTE: GFP_THISNODE allocations do not partake in the kswapd - * aging protocol, so they can't be fair. - */ - if (!gfp_thisnode_allocation(gfp_flags)) - __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && + !zone_is_fair_depleted(zone)) + zone_set_flag(zone, ZONE_FAIR_DEPLETED); __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); @@ -1722,12 +1744,12 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) * Return true if free pages are above 'mark'. This takes into account the order * of the allocation. */ -static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags, long free_pages) +static bool __zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags, + long free_pages) { /* free_pages my go negative - that's OK */ long min = mark; - long lowmem_reserve = z->lowmem_reserve[classzone_idx]; int o; long free_cma = 0; @@ -1742,7 +1764,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); #endif - if (free_pages - free_cma <= min + lowmem_reserve) + if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) return false; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable */ @@ -1757,15 +1779,15 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, return true; } -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, +bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, int alloc_flags) { return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, zone_page_state(z, NR_FREE_PAGES)); } -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags) +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags) { long free_pages = zone_page_state(z, NR_FREE_PAGES); @@ -1907,7 +1929,7 @@ static void __paginginit init_zone_allows_reclaim(int nid) { int i; - for_each_online_node(i) + for_each_node_state(i, N_MEMORY) if (node_distance(nid, i) <= RECLAIM_DISTANCE) node_set(i, NODE_DATA(nid)->reclaim_nodes); else @@ -1950,6 +1972,18 @@ static inline void init_zone_allows_reclaim(int nid) } #endif /* CONFIG_NUMA */ +static void reset_alloc_batches(struct zone *preferred_zone) +{ + struct zone *zone = preferred_zone->zone_pgdat->node_zones; + + do { + mod_zone_page_state(zone, NR_ALLOC_BATCH, + high_wmark_pages(zone) - low_wmark_pages(zone) - + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); + zone_clear_flag(zone, ZONE_FAIR_DEPLETED); + } while (zone++ != preferred_zone); +} + /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -1957,18 +1991,22 @@ static inline void init_zone_allows_reclaim(int nid) static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, - struct zone *preferred_zone, int migratetype) + struct zone *preferred_zone, int classzone_idx, int migratetype) { struct zoneref *z; struct page *page = NULL; - int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ + bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && + (gfp_mask & __GFP_WRITE); + int nr_fair_skipped = 0; + bool zonelist_rescan; - classzone_idx = zone_idx(preferred_zone); zonelist_scan: + zonelist_rescan = false; + /* * Scan zonelist, looking for a zone with enough free. * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. @@ -1980,34 +2018,23 @@ zonelist_scan: if (IS_ENABLED(CONFIG_NUMA) && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; - if ((alloc_flags & ALLOC_CPUSET) && + if (cpusets_enabled() && + (alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; - BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); - if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS)) - goto try_this_zone; /* * Distribute pages in proportion to the individual * zone size to ensure fair page aging. The zone a * page was allocated in should have no effect on the * time the page has in memory before being reclaimed. - * - * Try to stay in local zones in the fastpath. If - * that fails, the slowpath is entered, which will do - * another pass starting with the local zones, but - * ultimately fall back to remote zones that do not - * partake in the fairness round-robin cycle of this - * zonelist. - * - * NOTE: GFP_THISNODE allocations do not partake in - * the kswapd aging protocol, so they can't be fair. */ - if ((alloc_flags & ALLOC_WMARK_LOW) && - !gfp_thisnode_allocation(gfp_mask)) { - if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) - continue; + if (alloc_flags & ALLOC_FAIR) { if (!zone_local(preferred_zone, zone)) + break; + if (zone_is_fair_depleted(zone)) { + nr_fair_skipped++; continue; + } } /* * When allocating a page cache page for writing, we @@ -2035,15 +2062,19 @@ zonelist_scan: * will require awareness of zones in the * dirty-throttling and the flusher threads. */ - if ((alloc_flags & ALLOC_WMARK_LOW) && - (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) - goto this_zone_full; + if (consider_zone_dirty && !zone_dirty_ok(zone)) + continue; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (!zone_watermark_ok(zone, order, mark, classzone_idx, alloc_flags)) { int ret; + /* Checked here to keep the fast path fast */ + BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); + if (alloc_flags & ALLOC_NO_WATERMARKS) + goto try_this_zone; + if (IS_ENABLED(CONFIG_NUMA) && !did_zlc_setup && nr_online_nodes > 1) { /* @@ -2105,17 +2136,11 @@ try_this_zone: if (page) break; this_zone_full: - if (IS_ENABLED(CONFIG_NUMA)) + if (IS_ENABLED(CONFIG_NUMA) && zlc_active) zlc_mark_zone_full(zonelist, z); } - if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { - /* Disable zlc cache for second zonelist scan */ - zlc_active = 0; - goto zonelist_scan; - } - - if (page) + if (page) { /* * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was * necessary to allocate the page. The expectation is @@ -2124,8 +2149,37 @@ this_zone_full: * for !PFMEMALLOC purposes. */ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + return page; + } - return page; + /* + * The first pass makes sure allocations are spread fairly within the + * local node. However, the local node might have free pages left + * after the fairness batches are exhausted, and remote zones haven't + * even been considered yet. Try once more without fairness, and + * include remote zones now, before entering the slowpath and waking + * kswapd: prefer spilling to a remote zone over swapping locally. + */ + if (alloc_flags & ALLOC_FAIR) { + alloc_flags &= ~ALLOC_FAIR; + if (nr_fair_skipped) { + zonelist_rescan = true; + reset_alloc_batches(preferred_zone); + } + if (nr_online_nodes > 1) + zonelist_rescan = true; + } + + if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + zonelist_rescan = true; + } + + if (zonelist_rescan) + goto zonelist_scan; + + return NULL; } /* @@ -2241,7 +2295,7 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { struct page *page; @@ -2252,6 +2306,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, } /* + * PM-freezer should be notified that there might be an OOM killer on + * its way to kill and wake somebody up. This is too early and we might + * end up not killing anything but false positives are acceptable. + * See freeze_processes. + */ + note_oom_kill(); + + /* * Go through the zonelist yet one more time, keep very high watermark * here, this is only to catch a parallel oom killing, we must fail if * we're still under heavy pressure. @@ -2259,7 +2321,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) goto out; @@ -2294,7 +2356,7 @@ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, bool sync_migration, + int classzone_idx, int migratetype, enum migrate_mode mode, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { @@ -2308,7 +2370,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, current->flags |= PF_MEMALLOC; *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, - nodemask, sync_migration, + nodemask, mode, contended_compaction); current->flags &= ~PF_MEMALLOC; @@ -2322,13 +2384,10 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) { preferred_zone->compact_blockskip_flush = false; - preferred_zone->compact_considered = 0; - preferred_zone->compact_defer_shift = 0; - if (order >= preferred_zone->compact_order_failed) - preferred_zone->compact_order_failed = order + 1; + compaction_defer_reset(preferred_zone, order, true); count_vm_event(COMPACTSUCCESS); return page; } @@ -2344,7 +2403,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, * As async compaction considers a subset of pageblocks, only * defer if the failure was a sync compaction failure. */ - if (sync_migration) + if (mode != MIGRATE_ASYNC) defer_compaction(preferred_zone, order); cond_resched(); @@ -2357,9 +2416,9 @@ static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, bool sync_migration, - bool *contended_compaction, bool *deferred_compaction, - unsigned long *did_some_progress) + int classzone_idx, int migratetype, + enum migrate_mode mode, bool *contended_compaction, + bool *deferred_compaction, unsigned long *did_some_progress) { return NULL; } @@ -2398,7 +2457,7 @@ static inline struct page * __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) + int classzone_idx, int migratetype, unsigned long *did_some_progress) { struct page *page = NULL; bool drained = false; @@ -2416,7 +2475,8 @@ retry: page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, + migratetype); /* * If an allocation failed after direct reclaim, it could be because @@ -2439,14 +2499,14 @@ static inline struct page * __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { struct page *page; do { page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (!page && gfp_mask & __GFP_NOFAIL) wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); @@ -2455,7 +2515,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, return page; } -static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, +static void wake_all_kswapds(unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, struct zone *preferred_zone) @@ -2463,29 +2523,15 @@ static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, struct zoneref *z; struct zone *zone; - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { - if (!(gfp_mask & __GFP_NO_KSWAPD)) - wakeup_kswapd(zone, order, zone_idx(preferred_zone)); - /* - * Only reset the batches of zones that were actually - * considered in the fast path, we don't want to - * thrash fairness information for zones that are not - * actually part of this zonelist's round-robin cycle. - */ - if (!zone_local(preferred_zone, zone)) - continue; - mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - - low_wmark_pages(zone) - - zone_page_state(zone, NR_ALLOC_BATCH)); - } + for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) + wakeup_kswapd(zone, order, zone_idx(preferred_zone)); } static inline int gfp_to_alloc_flags(gfp_t gfp_mask) { int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; - const gfp_t wait = gfp_mask & __GFP_WAIT; + const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD)); /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); @@ -2494,20 +2540,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * The caller may dip into page reserves a bit more if the caller * cannot run direct reclaim, or if the caller has realtime scheduling * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will - * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). + * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). */ alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); - if (!wait) { + if (atomic) { /* - * Not worth trying to allocate harder for - * __GFP_NOMEMALLOC even if it can't schedule. + * Not worth trying to allocate harder for __GFP_NOMEMALLOC even + * if it can't schedule. */ - if (!(gfp_mask & __GFP_NOMEMALLOC)) + if (!(gfp_mask & __GFP_NOMEMALLOC)) alloc_flags |= ALLOC_HARDER; /* - * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. - * See also cpuset_zone_allowed() comment in kernel/cpuset.c. + * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the + * comment for __cpuset_node_allowed_softwall(). */ alloc_flags &= ~ALLOC_CPUSET; } else if (unlikely(rt_task(current)) && !in_interrupt()) @@ -2539,14 +2585,14 @@ static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { const gfp_t wait = gfp_mask & __GFP_WAIT; struct page *page = NULL; int alloc_flags; unsigned long pages_reclaimed = 0; unsigned long did_some_progress; - bool sync_migration = false; + enum migrate_mode migration_mode = MIGRATE_ASYNC; bool deferred_compaction = false; bool contended_compaction = false; @@ -2569,12 +2615,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * allowed per node queues are empty and that nodes are * over allocated. */ - if (gfp_thisnode_allocation(gfp_mask)) + if (IS_ENABLED(CONFIG_NUMA) && + (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; restart: - prepare_slowpath(gfp_mask, order, zonelist, - high_zoneidx, preferred_zone); + if (!(gfp_mask & __GFP_NO_KSWAPD)) + wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone); /* * OK, we're below the kswapd watermark and have kicked background @@ -2587,15 +2634,19 @@ restart: * Find the true preferred zone if the allocation is unconstrained by * cpusets. */ - if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) - first_zones_zonelist(zonelist, high_zoneidx, NULL, - &preferred_zone); + if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) { + struct zoneref *preferred_zoneref; + preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, + NULL, + &preferred_zone); + classzone_idx = zonelist_zone_idx(preferred_zoneref); + } rebalance: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) goto got_pg; @@ -2610,7 +2661,7 @@ rebalance: page = __alloc_pages_high_priority(gfp_mask, order, zonelist, high_zoneidx, nodemask, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) { goto got_pg; } @@ -2632,17 +2683,16 @@ rebalance: * Try direct compaction. The first pass is asynchronous. Subsequent * attempts after direct reclaim are synchronous */ - page = __alloc_pages_direct_compact(gfp_mask, order, - zonelist, high_zoneidx, - nodemask, - alloc_flags, preferred_zone, - migratetype, sync_migration, - &contended_compaction, + page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, + high_zoneidx, nodemask, alloc_flags, + preferred_zone, + classzone_idx, migratetype, + migration_mode, &contended_compaction, &deferred_compaction, &did_some_progress); if (page) goto got_pg; - sync_migration = true; + migration_mode = MIGRATE_SYNC_LIGHT; /* * If compaction is deferred for high-order allocations, it is because @@ -2659,7 +2709,8 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress); + classzone_idx, migratetype, + &did_some_progress); if (page) goto got_pg; @@ -2678,7 +2729,7 @@ rebalance: page = __alloc_pages_may_oom(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, - migratetype); + classzone_idx, migratetype); if (page) goto got_pg; @@ -2717,12 +2768,11 @@ rebalance: * direct reclaim and reclaim/compaction depends on compaction * being called after reclaim so call directly if necessary */ - page = __alloc_pages_direct_compact(gfp_mask, order, - zonelist, high_zoneidx, - nodemask, - alloc_flags, preferred_zone, - migratetype, sync_migration, - &contended_compaction, + page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, + high_zoneidx, nodemask, alloc_flags, + preferred_zone, + classzone_idx, migratetype, + migration_mode, &contended_compaction, &deferred_compaction, &did_some_progress); if (page) @@ -2748,11 +2798,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; + struct zoneref *preferred_zoneref; struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); unsigned int cpuset_mems_cookie; - int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; + int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; struct mem_cgroup *memcg = NULL; + int classzone_idx; gfp_mask &= gfp_allowed_mask; @@ -2779,14 +2831,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, return NULL; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); /* The preferred zone is used for statistics later */ - first_zones_zonelist(zonelist, high_zoneidx, + preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); if (!preferred_zone) goto out; + classzone_idx = zonelist_zone_idx(preferred_zoneref); #ifdef CONFIG_CMA if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) @@ -2795,7 +2848,7 @@ retry_cpuset: /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, alloc_flags, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (unlikely(!page)) { /* * Runtime PM, block IO and its error handling path @@ -2805,7 +2858,7 @@ retry_cpuset: gfp_mask = memalloc_noio_flags(gfp_mask); page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); } trace_mm_page_alloc(page, order, gfp_mask, migratetype); @@ -2817,7 +2870,7 @@ out: * the mask is being updated. If a page allocation is about to fail, * check if the cpuset changed during allocation and if so, retry. */ - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; memcg_kmem_commit_charge(page, memcg, order); @@ -2856,7 +2909,7 @@ void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); else __free_pages_ok(page, order); } @@ -3085,9 +3138,9 @@ bool skip_free_areas_node(unsigned int flags, int nid) goto out; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); ret = !node_isset(nid, cpuset_current_mems_allowed); - } while (!put_mems_allowed(cpuset_mems_cookie)); + } while (read_mems_allowed_retry(cpuset_mems_cookie)); out: return ret; } @@ -3240,12 +3293,12 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_FREE_CMA_PAGES)), K(zone_page_state(zone, NR_WRITEBACK_TEMP)), - zone->pages_scanned, + K(zone_page_state(zone, NR_PAGES_SCANNED)), (!zone_reclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %lu", zone->lowmem_reserve[i]); + printk(" %ld", zone->lowmem_reserve[i]); printk("\n"); } @@ -3985,6 +4038,7 @@ static void setup_zone_migrate_reserve(struct zone *zone) struct page *page; unsigned long block_migratetype; int reserve; + int old_reserve; /* * Get the start pfn, end pfn and the number of blocks to reserve @@ -4006,6 +4060,12 @@ static void setup_zone_migrate_reserve(struct zone *zone) * future allocation of hugepages at runtime. */ reserve = min(2, reserve); + old_reserve = zone->nr_migrate_reserve_block; + + /* When memory hot-add, we almost always need to do nothing */ + if (reserve == old_reserve) + return; + zone->nr_migrate_reserve_block = reserve; for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { if (!pfn_valid(pfn)) @@ -4043,6 +4103,12 @@ static void setup_zone_migrate_reserve(struct zone *zone) reserve--; continue; } + } else if (!old_reserve) { + /* + * At boot time we don't need to scan the whole zone + * for turning off MIGRATE_RESERVE. + */ + break; } /* @@ -4122,7 +4188,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, static void __meminit zone_init_free_lists(struct zone *zone) { - int order, t; + unsigned int order, t; for_each_migratetype_order(order, t) { INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); zone->free_area[order].nr_free = 0; @@ -4134,7 +4200,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) #endif -static int __meminit zone_batchsize(struct zone *zone) +static int zone_batchsize(struct zone *zone) { #ifdef CONFIG_MMU int batch; @@ -4250,8 +4316,8 @@ static void pageset_set_high(struct per_cpu_pageset *p, pageset_update(&p->pcp, high, batch); } -static void __meminit pageset_set_high_and_batch(struct zone *zone, - struct per_cpu_pageset *pcp) +static void pageset_set_high_and_batch(struct zone *zone, + struct per_cpu_pageset *pcp) { if (percpu_pagelist_fraction) pageset_set_high(pcp, @@ -4945,7 +5011,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; - init_zone_allows_reclaim(nid); + if (node_state(nid, N_MEMORY)) + init_zone_allows_reclaim(nid); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); #endif @@ -5535,7 +5602,7 @@ static void calculate_totalreserve_pages(void) for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; - unsigned long max = 0; + long max = 0; /* Find valid and maximum lowmem_reserve in the zone */ for (j = i; j < MAX_NR_ZONES; j++) { @@ -5650,9 +5717,8 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); __mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - - low_wmark_pages(zone) - - zone_page_state(zone, NR_ALLOC_BATCH)); + high_wmark_pages(zone) - low_wmark_pages(zone) - + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); @@ -5777,7 +5843,12 @@ module_init(init_per_zone_wmark_min) int min_free_kbytes_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec(table, write, buffer, length, ppos); + int rc; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + if (write) { user_min_free_kbytes = min_free_kbytes; setup_per_zone_wmarks(); @@ -5845,23 +5916,38 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { struct zone *zone; - unsigned int cpu; + int old_percpu_pagelist_fraction; int ret; + mutex_lock(&pcp_batch_high_lock); + old_percpu_pagelist_fraction = percpu_pagelist_fraction; + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (!write || (ret < 0)) - return ret; + if (!write || ret < 0) + goto out; + + /* Sanity checking to avoid pcp imbalance */ + if (percpu_pagelist_fraction && + percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) { + percpu_pagelist_fraction = old_percpu_pagelist_fraction; + ret = -EINVAL; + goto out; + } + + /* No change? */ + if (percpu_pagelist_fraction == old_percpu_pagelist_fraction) + goto out; - mutex_lock(&pcp_batch_high_lock); for_each_populated_zone(zone) { - unsigned long high; - high = zone->managed_pages / percpu_pagelist_fraction; + unsigned int cpu; + for_each_possible_cpu(cpu) - pageset_set_high(per_cpu_ptr(zone->pageset, cpu), - high); + pageset_set_high_and_batch(zone, + per_cpu_ptr(zone->pageset, cpu)); } +out: mutex_unlock(&pcp_batch_high_lock); - return 0; + return ret; } int hashdist = HASHDIST_DEFAULT; @@ -6004,53 +6090,64 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) * @end_bitidx: The last bit of interest * returns pageblock_bits flags */ -unsigned long get_pageblock_flags_group(struct page *page, - int start_bitidx, int end_bitidx) +unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask) { struct zone *zone; unsigned long *bitmap; - unsigned long pfn, bitidx; - unsigned long flags = 0; - unsigned long value = 1; + unsigned long bitidx, word_bitidx; + unsigned long word; zone = page_zone(page); - pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + bitidx &= (BITS_PER_LONG-1); - for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) - if (test_bit(bitidx + start_bitidx, bitmap)) - flags |= value; - - return flags; + word = bitmap[word_bitidx]; + bitidx += end_bitidx; + return (word >> (BITS_PER_LONG - bitidx - 1)) & mask; } /** - * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages + * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages * @page: The page within the block of interest * @start_bitidx: The first bit of interest * @end_bitidx: The last bit of interest * @flags: The flags to set */ -void set_pageblock_flags_group(struct page *page, unsigned long flags, - int start_bitidx, int end_bitidx) +void set_pfnblock_flags_mask(struct page *page, unsigned long flags, + unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask) { struct zone *zone; unsigned long *bitmap; - unsigned long pfn, bitidx; - unsigned long value = 1; + unsigned long bitidx, word_bitidx; + unsigned long old_word, word; + + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); zone = page_zone(page); - pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); + word_bitidx = bitidx / BITS_PER_LONG; + bitidx &= (BITS_PER_LONG-1); + VM_BUG_ON(!zone_spans_pfn(zone, pfn)); - for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) - if (flags & value) - __set_bit(bitidx + start_bitidx, bitmap); - else - __clear_bit(bitidx + start_bitidx, bitmap); + bitidx += end_bitidx; + mask <<= (BITS_PER_LONG - bitidx - 1); + flags <<= (BITS_PER_LONG - bitidx - 1); + + word = ACCESS_ONCE(bitmap[word_bitidx]); + for (;;) { + old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags); + if (word == old_word) + break; + word = old_word; + } } /* @@ -6210,7 +6307,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, cc->nr_migratepages -= nr_reclaimed; ret = migrate_pages(&cc->migratepages, alloc_migrate_target, - 0, MIGRATE_SYNC, MR_CMA); + NULL, 0, cc->mode, MR_CMA); } if (ret < 0) { putback_movable_pages(&cc->migratepages); @@ -6249,7 +6346,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), - .sync = true, + .mode = MIGRATE_SYNC, .ignore_skip_hint = true, }; INIT_LIST_HEAD(&cc.migratepages); @@ -6404,7 +6501,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) { struct page *page; struct zone *zone; - int order, i; + unsigned int order, i; unsigned long pfn; unsigned long flags; /* find the first valid pfn */ @@ -6456,7 +6553,7 @@ bool is_free_buddy_page(struct page *page) struct zone *zone = page_zone(page); unsigned long pfn = page_to_pfn(page); unsigned long flags; - int order; + unsigned int order; spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 98caeee..384518e 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -181,6 +181,7 @@ static void free_page_cgroup(void *addr) sizeof(struct page_cgroup) * PAGES_PER_SECTION; BUG_ON(PageReserved(page)); + kmemleak_free(addr); free_pages_exact(addr, table_size); } } diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 3707c71..5110816 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -108,7 +108,7 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, int page_start, int page_end) { const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; - unsigned int cpu; + unsigned int cpu, tcpu; int i; for_each_possible_cpu(cpu) { @@ -116,14 +116,23 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); - if (!*pagep) { - pcpu_free_pages(chunk, pages, populated, - page_start, page_end); - return -ENOMEM; - } + if (!*pagep) + goto err; } } return 0; + +err: + while (--i >= page_start) + __free_page(pages[pcpu_page_idx(cpu, i)]); + + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + for (i = page_start; i < page_end; i++) + __free_page(pages[pcpu_page_idx(tcpu, i)]); + } + return -ENOMEM; } /** @@ -263,6 +272,7 @@ err: __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), page_end - page_start); } + pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); return err; } diff --git a/mm/percpu.c b/mm/percpu.c index 8c8e08f..25e2ea5 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -612,7 +612,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); if (!chunk->map) { - kfree(chunk); + pcpu_mem_free(chunk, pcpu_chunk_struct_size); return NULL; } diff --git a/mm/readahead.c b/mm/readahead.c index e4ed041..0f35e98 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -8,9 +8,7 @@ */ #include <linux/kernel.h> -#include <linux/fs.h> #include <linux/gfp.h> -#include <linux/mm.h> #include <linux/export.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -20,6 +18,8 @@ #include <linux/syscalls.h> #include <linux/file.h> +#include "internal.h" + /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -149,8 +149,7 @@ out: * * Returns the number of pages requested, or the maximum amount of I/O allowed. */ -static int -__do_page_cache_readahead(struct address_space *mapping, struct file *filp, +int __do_page_cache_readahead(struct address_space *mapping, struct file *filp, pgoff_t offset, unsigned long nr_to_read, unsigned long lookahead_size) { @@ -179,7 +178,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, page_offset); rcu_read_unlock(); - if (page) + if (page && !radix_tree_exceptional_entry(page)) continue; page = page_cache_alloc_readahead(mapping); @@ -237,28 +236,14 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, return ret; } +#define MAX_READAHEAD ((512*4096)/PAGE_CACHE_SIZE) /* * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a * sensible upper limit. */ unsigned long max_sane_readahead(unsigned long nr) { - return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE) - + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2); -} - -/* - * Submit IO for the read-ahead request in file_ra_state. - */ -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, struct file *filp) -{ - int actual; - - actual = __do_page_cache_readahead(mapping, filp, - ra->start, ra->size, ra->async_size); - - return actual; + return min(nr, MAX_READAHEAD); } /* @@ -351,7 +336,7 @@ static pgoff_t count_history_pages(struct address_space *mapping, pgoff_t head; rcu_read_lock(); - head = radix_tree_prev_hole(&mapping->page_tree, offset - 1, max); + head = page_cache_prev_hole(mapping, offset - 1, max); rcu_read_unlock(); return offset - 1 - head; @@ -401,6 +386,7 @@ ondemand_readahead(struct address_space *mapping, unsigned long req_size) { unsigned long max = max_sane_readahead(ra->ra_pages); + pgoff_t prev_offset; /* * start of file @@ -430,7 +416,7 @@ ondemand_readahead(struct address_space *mapping, pgoff_t start; rcu_read_lock(); - start = radix_tree_next_hole(&mapping->page_tree, offset+1,max); + start = page_cache_next_hole(mapping, offset + 1, max); rcu_read_unlock(); if (!start || start - offset > max) @@ -452,8 +438,11 @@ ondemand_readahead(struct address_space *mapping, /* * sequential cache miss + * trivial case: (offset - prev_offset) == 1 + * unaligned reads: (offset - prev_offset) == 0 */ - if (offset - (ra->prev_pos >> PAGE_CACHE_SHIFT) <= 1UL) + prev_offset = (unsigned long long)ra->prev_pos >> PAGE_CACHE_SHIFT; + if (offset - prev_offset <= 1UL) goto initial_readahead; /* @@ -103,6 +103,7 @@ static inline void anon_vma_free(struct anon_vma *anon_vma) * LOCK should suffice since the actual taking of the lock must * happen _before_ what follows. */ + might_sleep(); if (rwsem_is_locked(&anon_vma->root->rwsem)) { anon_vma_lock_write(anon_vma); anon_vma_unlock_write(anon_vma); @@ -273,6 +274,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) { struct anon_vma_chain *avc; struct anon_vma *anon_vma; + int error; /* Don't bother if the parent process has no anon_vma here. */ if (!pvma->anon_vma) @@ -282,8 +284,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) * First, attach the new VMA to the parent VMA's anon_vmas, * so rmap can find non-COWed pages in child processes. */ - if (anon_vma_clone(vma, pvma)) - return -ENOMEM; + error = anon_vma_clone(vma, pvma); + if (error) + return error; /* Then add our own anon_vma. */ anon_vma = anon_vma_alloc(); @@ -426,8 +429,9 @@ struct anon_vma *page_get_anon_vma(struct page *page) * above cannot corrupt). */ if (!page_mapped(page)) { + rcu_read_unlock(); put_anon_vma(anon_vma); - anon_vma = NULL; + return NULL; } out: rcu_read_unlock(); @@ -477,9 +481,9 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page) } if (!page_mapped(page)) { + rcu_read_unlock(); put_anon_vma(anon_vma); - anon_vma = NULL; - goto out; + return NULL; } /* we pinned the anon_vma, its safe to sleep */ @@ -567,6 +571,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) pgd_t *pgd; pud_t *pud; pmd_t *pmd = NULL; + pmd_t pmde; pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) @@ -577,7 +582,13 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) goto out; pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) + /* + * Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at() + * without holding anon_vma lock for write. So when looking for a + * genuine pmde (in which to find pte), test present and !THP together. + */ + pmde = ACCESS_ONCE(*pmd); + if (!pmd_present(pmde) || pmd_trans_huge(pmde)) pmd = NULL; out: return pmd; @@ -613,9 +624,6 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, if (!pmd) return NULL; - if (pmd_trans_huge(*pmd)) - return NULL; - pte = pte_offset_map(pmd, address); /* Make a quick check before getting the lock */ if (!sync && !pte_present(*pte)) { @@ -1392,9 +1400,19 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, BUG_ON(!page || PageAnon(page)); if (locked_vma) { - mlock_vma_page(page); /* no-op if already mlocked */ - if (page == check_page) + if (page == check_page) { + /* we know we have check_page locked */ + mlock_vma_page(page); ret = SWAP_MLOCK; + } else if (trylock_page(page)) { + /* + * If we can lock the page, perform mlock. + * Otherwise leave the page alone, it will be + * eventually encountered again later. + */ + mlock_vma_page(page); + unlock_page(page); + } continue; /* don't unmap */ } @@ -1671,10 +1689,9 @@ void __put_anon_vma(struct anon_vma *anon_vma) { struct anon_vma *root = anon_vma->root; + anon_vma_free(anon_vma); if (root != anon_vma && atomic_dec_and_test(&root->refcount)) anon_vma_free(root); - - anon_vma_free(anon_vma); } #ifdef CONFIG_MIGRATION @@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt; #define SHORT_SYMLINK_LEN 128 /* - * shmem_fallocate and shmem_writepage communicate via inode->i_private - * (with i_mutex making sure that it has only one user at a time): - * we would prefer not to enlarge the shmem inode just for that. + * shmem_fallocate communicates with shmem_fault or shmem_writepage via + * inode->i_private (with i_mutex making sure that it has only one user at + * a time): we would prefer not to enlarge the shmem inode just for that. */ struct shmem_falloc { + wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ pgoff_t start; /* start of range currently being fallocated */ pgoff_t next; /* the next page offset to be fallocated */ pgoff_t nr_falloced; /* how many new pages have been fallocated */ @@ -242,19 +243,17 @@ static int shmem_radix_tree_replace(struct address_space *mapping, pgoff_t index, void *expected, void *replacement) { void **pslot; - void *item = NULL; + void *item; VM_BUG_ON(!expected); + VM_BUG_ON(!replacement); pslot = radix_tree_lookup_slot(&mapping->page_tree, index); - if (pslot) - item = radix_tree_deref_slot_protected(pslot, - &mapping->tree_lock); + if (!pslot) + return -ENOENT; + item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock); if (item != expected) return -ENOENT; - if (replacement) - radix_tree_replace_slot(pslot, replacement); - else - radix_tree_delete(&mapping->page_tree, index); + radix_tree_replace_slot(pslot, replacement); return 0; } @@ -331,84 +330,20 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap) } /* - * Like find_get_pages, but collecting swap entries as well as pages. - */ -static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, - pgoff_t start, unsigned int nr_pages, - struct page **pages, pgoff_t *indices) -{ - void **slot; - unsigned int ret = 0; - struct radix_tree_iter iter; - - if (!nr_pages) - return 0; - - rcu_read_lock(); -restart: - radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - struct page *page; -repeat: - page = radix_tree_deref_slot(slot); - if (unlikely(!page)) - continue; - if (radix_tree_exception(page)) { - if (radix_tree_deref_retry(page)) - goto restart; - /* - * Otherwise, we must be storing a swap entry - * here as an exceptional entry: so return it - * without attempting to raise page count. - */ - goto export; - } - if (!page_cache_get_speculative(page)) - goto repeat; - - /* Has the page moved? */ - if (unlikely(page != *slot)) { - page_cache_release(page); - goto repeat; - } -export: - indices[ret] = iter.index; - pages[ret] = page; - if (++ret == nr_pages) - break; - } - rcu_read_unlock(); - return ret; -} - -/* * Remove swap entry from radix tree, free the swap and its page cache. */ static int shmem_free_swap(struct address_space *mapping, pgoff_t index, void *radswap) { - int error; + void *old; spin_lock_irq(&mapping->tree_lock); - error = shmem_radix_tree_replace(mapping, index, radswap, NULL); + old = radix_tree_delete_item(&mapping->page_tree, index, radswap); spin_unlock_irq(&mapping->tree_lock); - if (!error) - free_swap_and_cache(radix_to_swp_entry(radswap)); - return error; -} - -/* - * Pagevec may contain swap entries, so shuffle up pages before releasing. - */ -static void shmem_deswap_pagevec(struct pagevec *pvec) -{ - int i, j; - - for (i = 0, j = 0; i < pagevec_count(pvec); i++) { - struct page *page = pvec->pages[i]; - if (!radix_tree_exceptional_entry(page)) - pvec->pages[j++] = page; - } - pvec->nr = j; + if (old != radswap) + return -ENOENT; + free_swap_and_cache(radix_to_swp_entry(radswap)); + return 0; } /* @@ -429,12 +364,12 @@ void shmem_unlock_mapping(struct address_space *mapping) * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it * has finished, if it hits a row of PAGEVEC_SIZE swap entries. */ - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, - PAGEVEC_SIZE, pvec.pages, indices); + pvec.nr = find_get_entries(mapping, index, + PAGEVEC_SIZE, pvec.pages, indices); if (!pvec.nr) break; index = indices[pvec.nr - 1] + 1; - shmem_deswap_pagevec(&pvec); + pagevec_remove_exceptionals(&pvec); check_move_unevictable_pages(pvec.pages, pvec.nr); pagevec_release(&pvec); cond_resched(); @@ -466,9 +401,9 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, pagevec_init(&pvec, 0); index = start; while (index < end) { - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE), - pvec.pages, indices); + pvec.nr = find_get_entries(mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE), + pvec.pages, indices); if (!pvec.nr) break; mem_cgroup_uncharge_start(); @@ -497,7 +432,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, } unlock_page(page); } - shmem_deswap_pagevec(&pvec); + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); @@ -533,22 +468,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, return; index = start; - for ( ; ; ) { + while (index < end) { cond_resched(); - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + + pvec.nr = find_get_entries(mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), - pvec.pages, indices); + pvec.pages, indices); if (!pvec.nr) { - if (index == start || unfalloc) + /* If all gone or hole-punch or unfalloc, we're done */ + if (index == start || end != -1) break; + /* But if truncating, restart to make sure all gone */ index = start; continue; } - if ((index == start || unfalloc) && indices[0] >= end) { - shmem_deswap_pagevec(&pvec); - pagevec_release(&pvec); - break; - } mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -560,8 +493,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (radix_tree_exceptional_entry(page)) { if (unfalloc) continue; - nr_swaps_freed += !shmem_free_swap(mapping, - index, page); + if (shmem_free_swap(mapping, index, page)) { + /* Swap was replaced by page: retry */ + index--; + break; + } + nr_swaps_freed++; continue; } @@ -570,11 +507,16 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (page->mapping == mapping) { VM_BUG_ON(PageWriteback(page)); truncate_inode_page(mapping, page); + } else { + /* Page was replaced by swap: retry */ + unlock_page(page); + index--; + break; } } unlock_page(page); } - shmem_deswap_pagevec(&pvec); + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); index++; @@ -826,6 +768,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) spin_lock(&inode->i_lock); shmem_falloc = inode->i_private; if (shmem_falloc && + !shmem_falloc->waitq && index >= shmem_falloc->start && index < shmem_falloc->next) shmem_falloc->nr_unswapped++; @@ -1082,7 +1025,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, return -EFBIG; repeat: swap.val = 0; - page = find_lock_page(mapping, index); + page = find_lock_entry(mapping, index); if (radix_tree_exceptional_entry(page)) { swap = radix_to_swp_entry(page); page = NULL; @@ -1094,6 +1037,9 @@ repeat: goto failed; } + if (page && sgp == SGP_WRITE) + mark_page_accessed(page); + /* fallocated page? */ if (page && !PageUptodate(page)) { if (sgp != SGP_READ) @@ -1175,6 +1121,9 @@ repeat: shmem_recalc_inode(inode); spin_unlock(&info->lock); + if (sgp == SGP_WRITE) + mark_page_accessed(page); + delete_from_swap_cache(page); set_page_dirty(page); swap_free(swap); @@ -1199,8 +1148,11 @@ repeat: goto decused; } - SetPageSwapBacked(page); + __SetPageSwapBacked(page); __set_page_locked(page); + if (sgp == SGP_WRITE) + init_page_accessed(page); + error = mem_cgroup_cache_charge(page, current->mm, gfp & GFP_RECLAIM_MASK); if (error) @@ -1300,6 +1252,64 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int error; int ret = VM_FAULT_LOCKED; + /* + * Trinity finds that probing a hole which tmpfs is punching can + * prevent the hole-punch from ever completing: which in turn + * locks writers out with its hold on i_mutex. So refrain from + * faulting pages into the hole while it's being punched. Although + * shmem_undo_range() does remove the additions, it may be unable to + * keep up, as each new page needs its own unmap_mapping_range() call, + * and the i_mmap tree grows ever slower to scan if new vmas are added. + * + * It does not matter if we sometimes reach this check just before the + * hole-punch begins, so that one fault then races with the punch: + * we just need to make racing faults a rare case. + * + * The implementation below would be much simpler if we just used a + * standard mutex or completion: but we cannot take i_mutex in fault, + * and bloating every shmem inode for this unlikely case would be sad. + */ + if (unlikely(inode->i_private)) { + struct shmem_falloc *shmem_falloc; + + spin_lock(&inode->i_lock); + shmem_falloc = inode->i_private; + if (shmem_falloc && + shmem_falloc->waitq && + vmf->pgoff >= shmem_falloc->start && + vmf->pgoff < shmem_falloc->next) { + wait_queue_head_t *shmem_falloc_waitq; + DEFINE_WAIT(shmem_fault_wait); + + ret = VM_FAULT_NOPAGE; + if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && + !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* It's polite to up mmap_sem if we can */ + up_read(&vma->vm_mm->mmap_sem); + ret = VM_FAULT_RETRY; + } + + shmem_falloc_waitq = shmem_falloc->waitq; + prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + schedule(); + + /* + * shmem_falloc_waitq points into the shmem_fallocate() + * stack of the hole-punching task: shmem_falloc_waitq + * is usually invalid by the time we reach here, but + * finish_wait() does not dereference it in that case; + * though i_lock needed lest racing with wake_up_all(). + */ + spin_lock(&inode->i_lock); + finish_wait(shmem_falloc_waitq, &shmem_fault_wait); + spin_unlock(&inode->i_lock); + return ret; + } + spin_unlock(&inode->i_lock); + } + error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); if (error) return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); @@ -1419,6 +1429,11 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode return inode; } +bool shmem_mapping(struct address_space *mapping) +{ + return mapping->backing_dev_info == &shmem_backing_dev_info; +} + #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_short_symlink_operations; @@ -1731,7 +1746,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, pagevec_init(&pvec, 0); pvec.nr = 1; /* start small: we may be there already */ while (!done) { - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, + pvec.nr = find_get_entries(mapping, index, pvec.nr, pvec.pages, indices); if (!pvec.nr) { if (whence == SEEK_DATA) @@ -1758,7 +1773,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping, break; } } - shmem_deswap_pagevec(&pvec); + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); pvec.nr = PAGEVEC_SIZE; cond_resched(); @@ -1819,12 +1834,25 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, struct address_space *mapping = file->f_mapping; loff_t unmap_start = round_up(offset, PAGE_SIZE); loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); + + shmem_falloc.waitq = &shmem_falloc_waitq; + shmem_falloc.start = unmap_start >> PAGE_SHIFT; + shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; + spin_lock(&inode->i_lock); + inode->i_private = &shmem_falloc; + spin_unlock(&inode->i_lock); if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); shmem_truncate_range(inode, offset, offset + len - 1); /* No need to unmap again: hole-punching leaves COWed pages */ + + spin_lock(&inode->i_lock); + inode->i_private = NULL; + wake_up_all(&shmem_falloc_waitq); + spin_unlock(&inode->i_lock); error = 0; goto out; } @@ -1842,6 +1870,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, goto out; } + shmem_falloc.waitq = NULL; shmem_falloc.start = start; shmem_falloc.next = start; shmem_falloc.nr_falloced = 0; @@ -2077,8 +2106,10 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct if (new_dentry->d_inode) { (void) shmem_unlink(new_dir, new_dentry); - if (they_are_dirs) + if (they_are_dirs) { + drop_nlink(new_dentry->d_inode); drop_nlink(old_dir); + } } else if (they_are_dirs) { drop_nlink(old_dir); inc_nlink(new_dir); @@ -930,7 +930,8 @@ static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, { if (unlikely(pfmemalloc_active)) { /* Some pfmemalloc slabs exist, check if this is one */ - struct page *page = virt_to_head_page(objp); + struct slab *slabp = virt_to_slab(objp); + struct page *page = virt_to_head_page(slabp->s_mem); if (PageSlabPfmemalloc(page)) set_obj_pfmemalloc(&objp); } @@ -1776,7 +1777,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) __SetPageSlab(page + i); if (page->pfmemalloc) - SetPageSlabPfmemalloc(page + i); + SetPageSlabPfmemalloc(page); } memcg_bind_pages(cachep, cachep->gfporder); @@ -1809,9 +1810,10 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) else sub_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_freed); + + __ClearPageSlabPfmemalloc(page); while (i--) { BUG_ON(!PageSlab(page)); - __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); page++; } @@ -2220,7 +2222,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) int __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) { - size_t left_over, slab_size, ralign; + size_t left_over, slab_size; + size_t ralign = BYTES_PER_WORD; gfp_t gfp; int err; size_t size = cachep->size; @@ -2253,14 +2256,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size &= ~(BYTES_PER_WORD - 1); } - /* - * Redzoning and user store require word alignment or possibly larger. - * Note this will be overridden by architecture or caller mandated - * alignment if either is greater than BYTES_PER_WORD. - */ - if (flags & SLAB_STORE_USER) - ralign = BYTES_PER_WORD; - if (flags & SLAB_RED_ZONE) { ralign = REDZONE_ALIGN; /* If redzoning, ensure that the second redzone is suitably @@ -3220,7 +3215,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = node_zonelist(slab_node(), flags); retry: @@ -3276,7 +3271,7 @@ retry: } } - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj)) + if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return obj; } diff --git a/mm/slab_common.c b/mm/slab_common.c index e2e98af..97e5f5e 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -56,7 +56,7 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, continue; } -#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) +#if !defined(CONFIG_SLUB) /* * For simplicity, we won't check this in the list of memcg * caches. We have control over memcg naming, and if there @@ -1664,7 +1664,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, return NULL; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = node_zonelist(slab_node(), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; @@ -1676,19 +1676,17 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, object = get_partial_node(s, n, c, flags); if (object) { /* - * Return the object even if - * put_mems_allowed indicated that - * the cpuset mems_allowed was - * updated in parallel. It's a - * harmless race between the alloc - * and the cpuset update. + * Don't check read_mems_allowed_retry() + * here - if mems_allowed was updated in + * parallel, that was a harmless race + * between allocation and the cpuset + * update */ - put_mems_allowed(cpuset_mems_cookie); return object; } } } - } while (!put_mems_allowed(cpuset_mems_cookie)); + } while (read_mems_allowed_retry(cpuset_mems_cookie)); #endif return NULL; } @@ -72,7 +72,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } static void __put_compound_page(struct page *page) @@ -441,7 +441,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, SetPageActive(page); lru += LRU_ACTIVE; add_page_to_lru_list(page, lruvec, lru); - trace_mm_lru_activate(page, page_to_pfn(page)); + trace_mm_lru_activate(page); __count_vm_event(PGACTIVATE); update_page_reclaim_stat(lruvec, file, 1); @@ -554,12 +554,17 @@ void mark_page_accessed(struct page *page) EXPORT_SYMBOL(mark_page_accessed); /* - * Queue the page for addition to the LRU via pagevec. The decision on whether - * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of __lru_cache_add() - * have the page added to the active list using mark_page_accessed(). + * Used to mark_page_accessed(page) that is not visible yet and when it is + * still safe to use non-atomic ops */ -void __lru_cache_add(struct page *page) +void init_page_accessed(struct page *page) +{ + if (!PageReferenced(page)) + __SetPageReferenced(page); +} +EXPORT_SYMBOL(init_page_accessed); + +static void __lru_cache_add(struct page *page) { struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); @@ -569,11 +574,34 @@ void __lru_cache_add(struct page *page) pagevec_add(pvec, page); put_locked_var(swapvec_lock, lru_add_pvec); } -EXPORT_SYMBOL(__lru_cache_add); + +/** + * lru_cache_add: add a page to the page lists + * @page: the page to add + */ +void lru_cache_add_anon(struct page *page) +{ + if (PageActive(page)) + ClearPageActive(page); + __lru_cache_add(page); +} + +void lru_cache_add_file(struct page *page) +{ + if (PageActive(page)) + ClearPageActive(page); + __lru_cache_add(page); +} +EXPORT_SYMBOL(lru_cache_add_file); /** * lru_cache_add - add a page to a page list * @page: the page to be added to the LRU. + * + * Queue the page for addition to the LRU via pagevec. The decision on whether + * to add the page to the [in]active [file|anon] list is deferred until the + * pagevec is drained. This gives a chance for the caller of lru_cache_add() + * have the page added to the active list using mark_page_accessed(). */ void lru_cache_add(struct page *page) { @@ -785,7 +813,7 @@ void lru_add_drain_all(void) * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() * will free it. */ -void release_pages(struct page **pages, int nr, int cold) +void release_pages(struct page **pages, int nr, bool cold) { int i; LIST_HEAD(pages_to_free); @@ -826,7 +854,7 @@ void release_pages(struct page **pages, int nr, int cold) } /* Clear Active bit in case of parallel mark_page_accessed */ - ClearPageActive(page); + __ClearPageActive(page); list_add(&page->lru, &pages_to_free); } @@ -908,7 +936,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, SetPageLRU(page); add_page_to_lru_list(page, lruvec, lru); update_page_reclaim_stat(lruvec, file, active); - trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); + trace_mm_lru_insertion(page, lru); } /* @@ -922,6 +950,57 @@ void __pagevec_lru_add(struct pagevec *pvec) EXPORT_SYMBOL(__pagevec_lru_add); /** + * pagevec_lookup_entries - gang pagecache lookup + * @pvec: Where the resulting entries are placed + * @mapping: The address_space to search + * @start: The starting entry index + * @nr_entries: The maximum number of entries + * @indices: The cache indices corresponding to the entries in @pvec + * + * pagevec_lookup_entries() will search for and return a group of up + * to @nr_entries pages and shadow entries in the mapping. All + * entries are placed in @pvec. pagevec_lookup_entries() takes a + * reference against actual pages in @pvec. + * + * The search returns a group of mapping-contiguous entries with + * ascending indexes. There may be holes in the indices due to + * not-present entries. + * + * pagevec_lookup_entries() returns the number of entries which were + * found. + */ +unsigned pagevec_lookup_entries(struct pagevec *pvec, + struct address_space *mapping, + pgoff_t start, unsigned nr_pages, + pgoff_t *indices) +{ + pvec->nr = find_get_entries(mapping, start, nr_pages, + pvec->pages, indices); + return pagevec_count(pvec); +} + +/** + * pagevec_remove_exceptionals - pagevec exceptionals pruning + * @pvec: The pagevec to prune + * + * pagevec_lookup_entries() fills both pages and exceptional radix + * tree entries into the pagevec. This function prunes all + * exceptionals from @pvec without leaving holes, so that it can be + * passed on to page-only pagevec operations. + */ +void pagevec_remove_exceptionals(struct pagevec *pvec) +{ + int i, j; + + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (!radix_tree_exceptional_entry(page)) + pvec->pages[j++] = page; + } + pvec->nr = j; +} + +/** * pagevec_lookup - gang pagecache lookup * @pvec: Where the resulting pages are placed * @mapping: The address_space to search diff --git a/mm/swap_state.c b/mm/swap_state.c index e6f15f8..4079edf 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -63,6 +63,8 @@ unsigned long total_swapcache_pages(void) return ret; } +static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); + void show_swap_cache_info(void) { printk("%lu pages in swap cache\n", total_swapcache_pages()); @@ -268,7 +270,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr) for (i = 0; i < todo; i++) free_swap_cache(pagep[i]); - release_pages(pagep, todo, 0); + release_pages(pagep, todo, false); pagep += todo; nr -= todo; } @@ -286,8 +288,11 @@ struct page * lookup_swap_cache(swp_entry_t entry) page = find_get_page(swap_address_space(entry), entry.val); - if (page) + if (page) { INC_CACHE_INFO(find_success); + if (TestClearPageReadahead(page)) + atomic_inc(&swapin_readahead_hits); + } INC_CACHE_INFO(find_total); return page; @@ -389,6 +394,50 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, return found_page; } +static unsigned long swapin_nr_pages(unsigned long offset) +{ + static unsigned long prev_offset; + unsigned int pages, max_pages, last_ra; + static atomic_t last_readahead_pages; + + max_pages = 1 << ACCESS_ONCE(page_cluster); + if (max_pages <= 1) + return 1; + + /* + * This heuristic has been found to work well on both sequential and + * random loads, swapping to hard disk or to SSD: please don't ask + * what the "+ 2" means, it just happens to work well, that's all. + */ + pages = atomic_xchg(&swapin_readahead_hits, 0) + 2; + if (pages == 2) { + /* + * We can have no readahead hits to judge by: but must not get + * stuck here forever, so check for an adjacent offset instead + * (and don't even bother to check whether swap type is same). + */ + if (offset != prev_offset + 1 && offset != prev_offset - 1) + pages = 1; + prev_offset = offset; + } else { + unsigned int roundup = 4; + while (roundup < pages) + roundup <<= 1; + pages = roundup; + } + + if (pages > max_pages) + pages = max_pages; + + /* Don't shrink readahead too fast */ + last_ra = atomic_read(&last_readahead_pages) / 2; + if (pages < last_ra) + pages = last_ra; + atomic_set(&last_readahead_pages, pages); + + return pages; +} + /** * swapin_readahead - swap in pages in hope we need them soon * @entry: swap entry of this memory @@ -412,11 +461,16 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, struct vm_area_struct *vma, unsigned long addr) { struct page *page; - unsigned long offset = swp_offset(entry); + unsigned long entry_offset = swp_offset(entry); + unsigned long offset = entry_offset; unsigned long start_offset, end_offset; - unsigned long mask = (1UL << page_cluster) - 1; + unsigned long mask; struct blk_plug plug; + mask = swapin_nr_pages(offset) - 1; + if (!mask) + goto skip; + /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; end_offset = offset | mask; @@ -430,10 +484,13 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, gfp_mask, vma, addr); if (!page) continue; + if (offset != entry_offset) + SetPageReadahead(page); page_cache_release(page); } blk_finish_plug(&plug); lru_add_drain(); /* Push any new pages onto the LRU now */ +skip: return read_swap_cache_async(entry, gfp_mask, vma, addr); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 0ec2eaf..660b9c0 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -51,14 +51,32 @@ atomic_long_t nr_swap_pages; /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */ long total_swap_pages; static int least_priority; -static atomic_t highest_priority_index = ATOMIC_INIT(-1); static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -struct swap_list_t swap_list = {-1, -1}; +/* + * all active swap_info_structs + * protected with swap_lock, and ordered by priority. + */ +PLIST_HEAD(swap_active_head); + +/* + * all available (active, not full) swap_info_structs + * protected with swap_avail_lock, ordered by priority. + * This is used by get_swap_page() instead of swap_active_head + * because swap_active_head includes all swap_info_structs, + * but get_swap_page() doesn't need to look at full ones. + * This uses its own lock instead of swap_lock because when a + * swap_info_struct changes between not-full/full, it needs to + * add/remove itself to/from this list, but the swap_info_struct->lock + * is held and the locking order requires swap_lock to be taken + * before any swap_info_struct->lock. + */ +static PLIST_HEAD(swap_avail_head); +static DEFINE_SPINLOCK(swap_avail_lock); struct swap_info_struct *swap_info[MAX_SWAPFILES]; @@ -591,6 +609,9 @@ checks: if (si->inuse_pages == si->pages) { si->lowest_bit = si->max; si->highest_bit = 0; + spin_lock(&swap_avail_lock); + plist_del(&si->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); } si->swap_map[offset] = usage; inc_cluster_info_page(si, si->cluster_info, offset); @@ -639,71 +660,65 @@ no_page: swp_entry_t get_swap_page(void) { - struct swap_info_struct *si; + struct swap_info_struct *si, *next; pgoff_t offset; - int type, next; - int wrapped = 0; - int hp_index; - spin_lock(&swap_lock); if (atomic_long_read(&nr_swap_pages) <= 0) goto noswap; atomic_long_dec(&nr_swap_pages); - for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { - hp_index = atomic_xchg(&highest_priority_index, -1); - /* - * highest_priority_index records current highest priority swap - * type which just frees swap entries. If its priority is - * higher than that of swap_list.next swap type, we use it. It - * isn't protected by swap_lock, so it can be an invalid value - * if the corresponding swap type is swapoff. We double check - * the flags here. It's even possible the swap type is swapoff - * and swapon again and its priority is changed. In such rare - * case, low prority swap type might be used, but eventually - * high priority swap will be used after several rounds of - * swap. - */ - if (hp_index != -1 && hp_index != type && - swap_info[type]->prio < swap_info[hp_index]->prio && - (swap_info[hp_index]->flags & SWP_WRITEOK)) { - type = hp_index; - swap_list.next = type; - } - - si = swap_info[type]; - next = si->next; - if (next < 0 || - (!wrapped && si->prio != swap_info[next]->prio)) { - next = swap_list.head; - wrapped++; - } + spin_lock(&swap_avail_lock); +start_over: + plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) { + /* requeue si to after same-priority siblings */ + plist_requeue(&si->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); spin_lock(&si->lock); - if (!si->highest_bit) { + if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) { + spin_lock(&swap_avail_lock); + if (plist_node_empty(&si->avail_list)) { + spin_unlock(&si->lock); + goto nextsi; + } + WARN(!si->highest_bit, + "swap_info %d in list but !highest_bit\n", + si->type); + WARN(!(si->flags & SWP_WRITEOK), + "swap_info %d in list but !SWP_WRITEOK\n", + si->type); + plist_del(&si->avail_list, &swap_avail_head); spin_unlock(&si->lock); - continue; + goto nextsi; } - if (!(si->flags & SWP_WRITEOK)) { - spin_unlock(&si->lock); - continue; - } - - swap_list.next = next; - spin_unlock(&swap_lock); /* This is called for allocating swap entry for cache */ offset = scan_swap_map(si, SWAP_HAS_CACHE); spin_unlock(&si->lock); if (offset) - return swp_entry(type, offset); - spin_lock(&swap_lock); - next = swap_list.next; + return swp_entry(si->type, offset); + pr_debug("scan_swap_map of si %d failed to find offset\n", + si->type); + spin_lock(&swap_avail_lock); +nextsi: + /* + * if we got here, it's likely that si was almost full before, + * and since scan_swap_map() can drop the si->lock, multiple + * callers probably all tried to get a page from the same si + * and it filled up before we could get one; or, the si filled + * up between us dropping swap_avail_lock and taking si->lock. + * Since we dropped the swap_avail_lock, the swap_avail_head + * list may have been modified; so if next is still in the + * swap_avail_head list then try it, otherwise start over. + */ + if (plist_node_empty(&next->avail_list)) + goto start_over; } + spin_unlock(&swap_avail_lock); + atomic_long_inc(&nr_swap_pages); noswap: - spin_unlock(&swap_lock); return (swp_entry_t) {0}; } @@ -765,27 +780,6 @@ out: return NULL; } -/* - * This swap type frees swap entry, check if it is the highest priority swap - * type which just frees swap entry. get_swap_page() uses - * highest_priority_index to search highest priority swap type. The - * swap_info_struct.lock can't protect us if there are multiple swap types - * active, so we use atomic_cmpxchg. - */ -static void set_highest_priority_index(int type) -{ - int old_hp_index, new_hp_index; - - do { - old_hp_index = atomic_read(&highest_priority_index); - if (old_hp_index != -1 && - swap_info[old_hp_index]->prio >= swap_info[type]->prio) - break; - new_hp_index = type; - } while (atomic_cmpxchg(&highest_priority_index, - old_hp_index, new_hp_index) != old_hp_index); -} - static unsigned char swap_entry_free(struct swap_info_struct *p, swp_entry_t entry, unsigned char usage) { @@ -827,9 +821,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, dec_cluster_info_page(p, p->cluster_info, offset); if (offset < p->lowest_bit) p->lowest_bit = offset; - if (offset > p->highest_bit) + if (offset > p->highest_bit) { + bool was_full = !p->highest_bit; p->highest_bit = offset; - set_highest_priority_index(p->type); + if (was_full && (p->flags & SWP_WRITEOK)) { + spin_lock(&swap_avail_lock); + WARN_ON(!plist_node_empty(&p->avail_list)); + if (plist_node_empty(&p->avail_list)) + plist_add(&p->avail_list, + &swap_avail_head); + spin_unlock(&swap_avail_lock); + } + } atomic_long_inc(&nr_swap_pages); p->inuse_pages--; frontswap_invalidate_page(p->type, offset); @@ -1764,30 +1767,37 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio, unsigned char *swap_map, struct swap_cluster_info *cluster_info) { - int i, prev; - if (prio >= 0) p->prio = prio; else p->prio = --least_priority; + /* + * the plist prio is negated because plist ordering is + * low-to-high, while swap ordering is high-to-low + */ + p->list.prio = -p->prio; + p->avail_list.prio = -p->prio; p->swap_map = swap_map; p->cluster_info = cluster_info; p->flags |= SWP_WRITEOK; atomic_long_add(p->pages, &nr_swap_pages); total_swap_pages += p->pages; - /* insert swap space into swap_list: */ - prev = -1; - for (i = swap_list.head; i >= 0; i = swap_info[i]->next) { - if (p->prio >= swap_info[i]->prio) - break; - prev = i; - } - p->next = i; - if (prev < 0) - swap_list.head = swap_list.next = p->type; - else - swap_info[prev]->next = p->type; + assert_spin_locked(&swap_lock); + /* + * both lists are plists, and thus priority ordered. + * swap_active_head needs to be priority ordered for swapoff(), + * which on removal of any swap_info_struct with an auto-assigned + * (i.e. negative) priority increments the auto-assigned priority + * of any lower-priority swap_info_structs. + * swap_avail_head needs to be priority ordered for get_swap_page(), + * which allocates swap pages from the highest available priority + * swap_info_struct. + */ + plist_add(&p->list, &swap_active_head); + spin_lock(&swap_avail_lock); + plist_add(&p->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); } static void enable_swap_info(struct swap_info_struct *p, int prio, @@ -1822,8 +1832,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) struct address_space *mapping; struct inode *inode; struct filename *pathname; - int i, type, prev; - int err; + int err, found = 0; unsigned int old_block_size; if (!capable(CAP_SYS_ADMIN)) @@ -1841,17 +1850,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) goto out; mapping = victim->f_mapping; - prev = -1; spin_lock(&swap_lock); - for (type = swap_list.head; type >= 0; type = swap_info[type]->next) { - p = swap_info[type]; + plist_for_each_entry(p, &swap_active_head, list) { if (p->flags & SWP_WRITEOK) { - if (p->swap_file->f_mapping == mapping) + if (p->swap_file->f_mapping == mapping) { + found = 1; break; + } } - prev = type; } - if (type < 0) { + if (!found) { err = -EINVAL; spin_unlock(&swap_lock); goto out_dput; @@ -1863,20 +1871,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); goto out_dput; } - if (prev < 0) - swap_list.head = p->next; - else - swap_info[prev]->next = p->next; - if (type == swap_list.next) { - /* just pick something that's safe... */ - swap_list.next = swap_list.head; - } + spin_lock(&swap_avail_lock); + plist_del(&p->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); spin_lock(&p->lock); if (p->prio < 0) { - for (i = p->next; i >= 0; i = swap_info[i]->next) - swap_info[i]->prio = p->prio--; + struct swap_info_struct *si = p; + + plist_for_each_entry_continue(si, &swap_active_head, list) { + si->prio++; + si->list.prio--; + si->avail_list.prio--; + } least_priority++; } + plist_del(&p->list, &swap_active_head); atomic_long_sub(p->pages, &nr_swap_pages); total_swap_pages -= p->pages; p->flags &= ~SWP_WRITEOK; @@ -1884,7 +1893,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); set_current_oom_origin(); - err = try_to_unuse(type, false, 0); /* force all pages to be unused */ + err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ clear_current_oom_origin(); if (err) { @@ -1926,7 +1935,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) frontswap_map_set(p, NULL); spin_unlock(&p->lock); spin_unlock(&swap_lock); - frontswap_invalidate_area(type); + frontswap_invalidate_area(p->type); mutex_unlock(&swapon_mutex); free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -1934,7 +1943,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) vfree(cluster_info); vfree(frontswap_map); /* Destroy swap account informatin */ - swap_cgroup_swapoff(type); + swap_cgroup_swapoff(p->type); inode = mapping->host; if (S_ISBLK(inode->i_mode)) { @@ -2141,8 +2150,9 @@ static struct swap_info_struct *alloc_swap_info(void) */ } INIT_LIST_HEAD(&p->first_swap_extent.list); + plist_node_init(&p->list, 0); + plist_node_init(&p->avail_list, 0); p->flags = SWP_USED; - p->next = -1; spin_unlock(&swap_lock); spin_lock_init(&p->lock); diff --git a/mm/truncate.c b/mm/truncate.c index 353b683..827ad8d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -20,8 +20,25 @@ #include <linux/buffer_head.h> /* grr. try_to_release_page, do_invalidatepage */ #include <linux/cleancache.h> +#include <linux/rmap.h> #include "internal.h" +static void clear_exceptional_entry(struct address_space *mapping, + pgoff_t index, void *entry) +{ + /* Handled by shmem itself */ + if (shmem_mapping(mapping)) + return; + + spin_lock_irq(&mapping->tree_lock); + /* + * Regular page slots are stabilized by the page lock even + * without the tree itself locked. These unlocked entries + * need verification under the tree lock. + */ + radix_tree_delete_item(&mapping->page_tree, index, entry); + spin_unlock_irq(&mapping->tree_lock); +} /** * do_invalidatepage - invalidate part or all of a page @@ -208,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping, unsigned int partial_start; /* inclusive */ unsigned int partial_end; /* exclusive */ struct pagevec pvec; + pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; @@ -238,17 +256,23 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_init(&pvec, 0); index = start; - while (index < end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE))) { + while (index < end && pagevec_lookup_entries(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE), + indices)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index >= end) break; + if (radix_tree_exceptional_entry(page)) { + clear_exceptional_entry(mapping, index, page); + continue; + } + if (!trylock_page(page)) continue; WARN_ON(page->index != index); @@ -259,6 +283,7 @@ void truncate_inode_pages_range(struct address_space *mapping, truncate_inode_page(mapping, page); unlock_page(page); } + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); @@ -307,14 +332,16 @@ void truncate_inode_pages_range(struct address_space *mapping, index = start; for ( ; ; ) { cond_resched(); - if (!pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE))) { + if (!pagevec_lookup_entries(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE), + indices)) { if (index == start) break; index = start; continue; } - if (index == start && pvec.pages[0]->index >= end) { + if (index == start && indices[0] >= end) { + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); break; } @@ -323,16 +350,22 @@ void truncate_inode_pages_range(struct address_space *mapping, struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index >= end) break; + if (radix_tree_exceptional_entry(page)) { + clear_exceptional_entry(mapping, index, page); + continue; + } + lock_page(page); WARN_ON(page->index != index); wait_on_page_writeback(page); truncate_inode_page(mapping, page); unlock_page(page); } + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); index++; @@ -375,6 +408,7 @@ EXPORT_SYMBOL(truncate_inode_pages); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { + pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; pgoff_t index = start; unsigned long ret; @@ -390,17 +424,23 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, */ pagevec_init(&pvec, 0); - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + indices)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index > end) break; + if (radix_tree_exceptional_entry(page)) { + clear_exceptional_entry(mapping, index, page); + continue; + } + if (!trylock_page(page)) continue; WARN_ON(page->index != index); @@ -414,6 +454,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, deactivate_page(page); count += ret; } + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); @@ -481,6 +522,7 @@ static int do_launder_page(struct address_space *mapping, struct page *page) int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { + pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; pgoff_t index; int i; @@ -491,17 +533,23 @@ int invalidate_inode_pages2_range(struct address_space *mapping, cleancache_invalidate_inode(mapping); pagevec_init(&pvec, 0); index = start; - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, + indices)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; /* We rely upon deletion not changing page->index */ - index = page->index; + index = indices[i]; if (index > end) break; + if (radix_tree_exceptional_entry(page)) { + clear_exceptional_entry(mapping, index, page); + continue; + } + lock_page(page); WARN_ON(page->index != index); if (page->mapping != mapping) { @@ -539,6 +587,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, ret = ret2; unlock_page(page); } + pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); @@ -613,12 +662,67 @@ EXPORT_SYMBOL(truncate_pagecache); */ void truncate_setsize(struct inode *inode, loff_t newsize) { + loff_t oldsize = inode->i_size; + i_size_write(inode, newsize); + if (newsize > oldsize) + pagecache_isize_extended(inode, oldsize, newsize); truncate_pagecache(inode, newsize); } EXPORT_SYMBOL(truncate_setsize); /** + * pagecache_isize_extended - update pagecache after extension of i_size + * @inode: inode for which i_size was extended + * @from: original inode size + * @to: new inode size + * + * Handle extension of inode size either caused by extending truncate or by + * write starting after current i_size. We mark the page straddling current + * i_size RO so that page_mkwrite() is called on the nearest write access to + * the page. This way filesystem can be sure that page_mkwrite() is called on + * the page before user writes to the page via mmap after the i_size has been + * changed. + * + * The function must be called after i_size is updated so that page fault + * coming after we unlock the page will already see the new i_size. + * The function must be called while we still hold i_mutex - this not only + * makes sure i_size is stable but also that userspace cannot observe new + * i_size value before we are prepared to store mmap writes at new inode size. + */ +void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) +{ + int bsize = 1 << inode->i_blkbits; + loff_t rounded_from; + struct page *page; + pgoff_t index; + + WARN_ON(to > inode->i_size); + + if (from >= to || bsize == PAGE_CACHE_SIZE) + return; + /* Page straddling @from will not have any hole block created? */ + rounded_from = round_up(from, bsize); + if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1))) + return; + + index = from >> PAGE_CACHE_SHIFT; + page = find_lock_page(inode->i_mapping, index); + /* Page not cached? Nothing to do */ + if (!page) + return; + /* + * See clear_page_dirty_for_io() for details why set_page_dirty() + * is needed. + */ + if (page_mkclean(page)) + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +} +EXPORT_SYMBOL(pagecache_isize_extended); + +/** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole @@ -272,17 +272,14 @@ pid_t vm_is_stack(struct task_struct *task, if (in_group) { struct task_struct *t; - rcu_read_lock(); - if (!pid_alive(task)) - goto done; - t = task; - do { + rcu_read_lock(); + for_each_thread(task, t) { if (vm_is_stack_for_task(t, vma)) { ret = t->pid; goto done; } - } while_each_thread(task, t); + } done: rcu_read_unlock(); } diff --git a/mm/vmacache.c b/mm/vmacache.c new file mode 100644 index 0000000..1037a3ba --- /dev/null +++ b/mm/vmacache.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2014 Davidlohr Bueso. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmacache.h> + +/* + * Flush vma caches for threads that share a given mm. + * + * The operation is safe because the caller holds the mmap_sem + * exclusively and other threads accessing the vma cache will + * have mmap_sem held at least for read, so no extra locking + * is required to maintain the vma cache. + */ +void vmacache_flush_all(struct mm_struct *mm) +{ + struct task_struct *g, *p; + + rcu_read_lock(); + for_each_process_thread(g, p) { + /* + * Only flush the vmacache pointers as the + * mm seqnum is already set and curr's will + * be set upon invalidation when the next + * lookup is done. + */ + if (mm == p->mm) + vmacache_flush(p); + } + rcu_read_unlock(); +} + +/* + * This task may be accessing a foreign mm via (for example) + * get_user_pages()->find_vma(). The vmacache is task-local and this + * task's vmacache pertains to a different mm (ie, its own). There is + * nothing we can do here. + * + * Also handle the case where a kernel thread has adopted this mm via use_mm(). + * That kernel thread's vmacache is not applicable to this mm. + */ +static bool vmacache_valid_mm(struct mm_struct *mm) +{ + return current->mm == mm && !(current->flags & PF_KTHREAD); +} + +void vmacache_update(unsigned long addr, struct vm_area_struct *newvma) +{ + if (vmacache_valid_mm(newvma->vm_mm)) + current->vmacache[VMACACHE_HASH(addr)] = newvma; +} + +static bool vmacache_valid(struct mm_struct *mm) +{ + struct task_struct *curr; + + if (!vmacache_valid_mm(mm)) + return false; + + curr = current; + if (mm->vmacache_seqnum != curr->vmacache_seqnum) { + /* + * First attempt will always be invalid, initialize + * the new cache for this task here. + */ + curr->vmacache_seqnum = mm->vmacache_seqnum; + vmacache_flush(curr); + return false; + } + return true; +} + +struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (!vma) + continue; + if (WARN_ON_ONCE(vma->vm_mm != mm)) + break; + if (vma->vm_start <= addr && vma->vm_end > addr) + return vma; + } + + return NULL; +} + +#ifndef CONFIG_MMU +struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (vma && vma->vm_start == start && vma->vm_end == end) + return vma; + } + + return NULL; +} +#endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d64289d..83e33b8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -359,6 +359,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (unlikely(!va)) return ERR_PTR(-ENOMEM); + /* + * Only scan the relevant parts containing pointers to other objects + * to avoid false negatives. + */ + kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask & GFP_RECLAIM_MASK); + retry: spin_lock(&vmap_area_lock); /* @@ -1649,11 +1655,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, clear_vm_uninitialized_flag(area); /* - * A ref_count = 3 is needed because the vm_struct and vmap_area - * structures allocated in the __get_vm_area_node() function contain - * references to the virtual address of the vmalloc'ed block. + * A ref_count = 2 is needed because vm_struct allocated in + * __get_vm_area_node() contains a reference to the virtual address of + * the vmalloc'ed block. */ - kmemleak_alloc(addr, real_size, 3, gfp_mask); + kmemleak_alloc(addr, real_size, 2, gfp_mask); return addr; @@ -2682,14 +2688,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi) prev_end = VMALLOC_START; - spin_lock(&vmap_area_lock); + rcu_read_lock(); if (list_empty(&vmap_area_list)) { vmi->largest_chunk = VMALLOC_TOTAL; goto out; } - list_for_each_entry(va, &vmap_area_list, list) { + list_for_each_entry_rcu(va, &vmap_area_list, list) { unsigned long addr = va->va_start; /* @@ -2716,7 +2722,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi) vmi->largest_chunk = VMALLOC_END - prev_end; out: - spin_unlock(&vmap_area_lock); + rcu_read_unlock(); } #endif diff --git a/mm/vmpressure.c b/mm/vmpressure.c index e0f6283..c98b14e 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -164,6 +164,7 @@ static void vmpressure_work_fn(struct work_struct *work) unsigned long scanned; unsigned long reclaimed; + spin_lock(&vmpr->sr_lock); /* * Several contexts might be calling vmpressure(), so it is * possible that the work was rescheduled again before the old @@ -172,11 +173,12 @@ static void vmpressure_work_fn(struct work_struct *work) * here. No need for any locks here since we don't care if * vmpr->reclaimed is in sync. */ - if (!vmpr->scanned) + scanned = vmpr->scanned; + if (!scanned) { + spin_unlock(&vmpr->sr_lock); return; + } - spin_lock(&vmpr->sr_lock); - scanned = vmpr->scanned; reclaimed = vmpr->reclaimed; vmpr->scanned = 0; vmpr->reclaimed = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 05e6095..ee8363f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -163,7 +163,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) bool zone_reclaimable(struct zone *zone) { - return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; + return zone_page_state(zone, NR_PAGES_SCANNED) < + zone_reclaimable_pages(zone) * 6; } static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) @@ -224,15 +225,15 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, unsigned long freed = 0; unsigned long long delta; long total_scan; - long max_pass; + long freeable; long nr; long new_nr; int nid = shrinkctl->nid; long batch_size = shrinker->batch ? shrinker->batch : SHRINK_BATCH; - max_pass = shrinker->count_objects(shrinker, shrinkctl); - if (max_pass == 0) + freeable = shrinker->count_objects(shrinker, shrinkctl); + if (freeable == 0) return 0; /* @@ -244,14 +245,14 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, total_scan = nr; delta = (4 * nr_pages_scanned) / shrinker->seeks; - delta *= max_pass; + delta *= freeable; do_div(delta, lru_pages + 1); total_scan += delta; if (total_scan < 0) { printk(KERN_ERR "shrink_slab: %pF negative objects to delete nr=%ld\n", shrinker->scan_objects, total_scan); - total_scan = max_pass; + total_scan = freeable; } /* @@ -260,38 +261,55 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, * shrinkers to return -1 all the time. This results in a large * nr being built up so when a shrink that can do some work * comes along it empties the entire cache due to nr >>> - * max_pass. This is bad for sustaining a working set in + * freeable. This is bad for sustaining a working set in * memory. * * Hence only allow the shrinker to scan the entire cache when * a large delta change is calculated directly. */ - if (delta < max_pass / 4) - total_scan = min(total_scan, max_pass / 2); + if (delta < freeable / 4) + total_scan = min(total_scan, freeable / 2); /* * Avoid risking looping forever due to too large nr value: * never try to free more than twice the estimate number of * freeable entries. */ - if (total_scan > max_pass * 2) - total_scan = max_pass * 2; + if (total_scan > freeable * 2) + total_scan = freeable * 2; trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, nr_pages_scanned, lru_pages, - max_pass, delta, total_scan); + freeable, delta, total_scan); - while (total_scan >= batch_size) { + /* + * Normally, we should not scan less than batch_size objects in one + * pass to avoid too frequent shrinker calls, but if the slab has less + * than batch_size objects in total and we are really tight on memory, + * we will try to reclaim all available objects, otherwise we can end + * up failing allocations although there are plenty of reclaimable + * objects spread over several slabs with usage less than the + * batch_size. + * + * We detect the "tight on memory" situations by looking at the total + * number of objects we want to scan (total_scan). If it is greater + * than the total number of objects on slab (freeable), we must be + * scanning at high prio and therefore should try to reclaim as much as + * possible. + */ + while (total_scan >= batch_size || + total_scan >= freeable) { unsigned long ret; + unsigned long nr_to_scan = min(batch_size, total_scan); - shrinkctl->nr_to_scan = batch_size; + shrinkctl->nr_to_scan = nr_to_scan; ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break; freed += ret; - count_vm_events(SLABS_SCANNED, batch_size); - total_scan -= batch_size; + count_vm_events(SLABS_SCANNED, nr_to_scan); + total_scan -= nr_to_scan; cond_resched(); } @@ -352,16 +370,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl, } list_for_each_entry(shrinker, &shrinker_list, list) { - for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { - if (!node_online(shrinkctl->nid)) - continue; - - if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && - (shrinkctl->nid != 0)) - break; - + if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) { + shrinkctl->nid = 0; freed += shrink_slab_node(shrinkctl, shrinker, - nr_pages_scanned, lru_pages); + nr_pages_scanned, lru_pages); + continue; + } + + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (node_online(shrinkctl->nid)) + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); } } @@ -1089,7 +1108,7 @@ keep: VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } - free_hot_cold_page_list(&free_pages, 1); + free_hot_cold_page_list(&free_pages, true); list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); @@ -1126,7 +1145,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); list_splice(&clean_pages, page_list); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); + mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); return ret; } @@ -1452,7 +1471,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); if (global_reclaim(sc)) { - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else @@ -1487,7 +1506,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, spin_unlock_irq(&zone->lru_lock); - free_hot_cold_page_list(&page_list, 1); + free_hot_cold_page_list(&page_list, true); /* * If reclaim is isolating dirty pages under writeback, it implies @@ -1522,19 +1541,18 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * If dirty pages are scanned that are not queued for IO, it * implies that flushers are not keeping up. In this case, flag * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing - * pages from reclaim context. It will forcibly stall in the - * next check. + * pages from reclaim context. */ if (nr_unqueued_dirty == nr_taken) zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); /* - * In addition, if kswapd scans pages marked marked for - * immediate reclaim and under writeback (nr_immediate), it - * implies that pages are cycling through the LRU faster than + * If kswapd scans pages marked marked for immediate + * reclaim and under writeback (nr_immediate), it implies + * that pages are cycling through the LRU faster than * they are written so also forcibly stall. */ - if (nr_unqueued_dirty == nr_taken || nr_immediate) + if (nr_immediate) congestion_wait(BLK_RW_ASYNC, HZ/10); } @@ -1642,7 +1660,7 @@ static void shrink_active_list(unsigned long nr_to_scan, nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); reclaim_stat->recent_scanned[file] += nr_taken; @@ -1708,7 +1726,7 @@ static void shrink_active_list(unsigned long nr_to_scan, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); - free_hot_cold_page_list(&l_hold, 1); + free_hot_cold_page_list(&l_hold, true); } #ifdef CONFIG_SWAP @@ -1830,7 +1848,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, struct zone *zone = lruvec_zone(lruvec); unsigned long anon_prio, file_prio; enum scan_balance scan_balance; - unsigned long anon, file, free; + unsigned long anon, file; bool force_scan = false; unsigned long ap, fp; enum lru_list lru; @@ -1878,11 +1896,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } - anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + - get_lru_size(lruvec, LRU_INACTIVE_ANON); - file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + - get_lru_size(lruvec, LRU_INACTIVE_FILE); - /* * If it's foreseeable that reclaiming the file cache won't be * enough to get the zone back into a desirable shape, we have @@ -1890,8 +1903,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * thrashing - remaining file pages alone. */ if (global_reclaim(sc)) { - free = zone_page_state(zone, NR_FREE_PAGES); - if (unlikely(file + free <= high_wmark_pages(zone))) { + unsigned long zonefile; + unsigned long zonefree; + + zonefree = zone_page_state(zone, NR_FREE_PAGES); + zonefile = zone_page_state(zone, NR_ACTIVE_FILE) + + zone_page_state(zone, NR_INACTIVE_FILE); + + if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) { scan_balance = SCAN_ANON; goto out; } @@ -1926,6 +1945,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * * anon in [0], file in [1] */ + + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; @@ -2001,13 +2026,27 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct blk_plug plug; - bool scan_adjusted = false; + bool scan_adjusted; get_scan_count(lruvec, sc, nr); /* Record the original scan target for proportional adjustments later */ memcpy(targets, nr, sizeof(nr)); + /* + * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal + * event that can occur when there is little memory pressure e.g. + * multiple streaming readers/writers. Hence, we do not abort scanning + * when the requested number of pages are reclaimed when scanning at + * DEF_PRIORITY on the assumption that the fact we are direct + * reclaiming implies that kswapd is not keeping up and it is best to + * do a batch of work at once. For memcg reclaim one check is made to + * abort proportional reclaim if either the file or anon lru has already + * dropped to zero at the first pass. + */ + scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); + blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { @@ -2028,17 +2067,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) continue; /* - * For global direct reclaim, reclaim only the number of pages - * requested. Less care is taken to scan proportionally as it - * is more important to minimise direct reclaim stall latency - * than it is to properly age the LRU lists. - */ - if (global_reclaim(sc) && !current_is_kswapd()) - break; - - /* * For kswapd and memcg, reclaim at least the number of pages - * requested. Ensure that the anon and file LRUs shrink + * requested. Ensure that the anon and file LRUs are scanned * proportionally what was requested by get_scan_count(). We * stop reclaiming one LRU and reduce the amount scanning * proportional to the original scan target. @@ -2046,6 +2076,15 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + /* + * It's just vindictive to attack the larger once the smaller + * has gone to zero. And given the way we stop scanning the + * smaller below, this makes sure that we only make one nudge + * towards proportionality once we've got nr_to_reclaim. + */ + if (!nr_file || !nr_anon) + break; + if (nr_file > nr_anon) { unsigned long scan_target = targets[LRU_INACTIVE_ANON] + targets[LRU_ACTIVE_ANON] + 1; @@ -2407,8 +2446,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, unsigned long lru_pages = 0; nodes_clear(shrink->nodes_to_scan); - for_each_zone_zonelist(zone, z, zonelist, - gfp_zone(sc->gfp_mask)) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_zone(sc->gfp_mask), sc->nodemask) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; @@ -2484,10 +2523,17 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; + if (!populated_zone(zone)) + continue; + pfmemalloc_reserve += min_wmark_pages(zone); free_pages += zone_page_state(zone, NR_FREE_PAGES); } + /* If there are no reserves (unexpected config) then do not throttle */ + if (!pfmemalloc_reserve) + return true; + wmark_ok = free_pages > pfmemalloc_reserve / 2; /* kswapd must be awake if processes are being throttled */ @@ -2512,9 +2558,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, nodemask_t *nodemask) { + struct zoneref *z; struct zone *zone; - int high_zoneidx = gfp_zone(gfp_mask); - pg_data_t *pgdat; + pg_data_t *pgdat = NULL; /* * Kernel threads should not be throttled as they may be indirectly @@ -2533,10 +2579,34 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, if (fatal_signal_pending(current)) goto out; - /* Check if the pfmemalloc reserves are ok */ - first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone); - pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + /* + * Check if the pfmemalloc reserves are ok by finding the first node + * with a usable ZONE_NORMAL or lower zone. The expectation is that + * GFP_KERNEL will be required for allocating network buffers when + * swapping over the network so ZONE_HIGHMEM is unusable. + * + * Throttling is based on the first usable node and throttled processes + * wait on a queue until kswapd makes progress and wakes them. There + * is an affinity then between processes waking up and where reclaim + * progress has been made assuming the process wakes on the same node. + * More importantly, processes running on remote nodes will not compete + * for remote pfmemalloc reserves and processes on different nodes + * should make reasonable progress. + */ + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_mask, nodemask) { + if (zone_idx(zone) > ZONE_NORMAL) + continue; + + /* Throttle based on the first usable node */ + pgdat = zone->zone_pgdat; + if (pfmemalloc_watermark_ok(pgdat)) + goto out; + break; + } + + /* If no zone was usable by the allocation flags then do not throttle */ + if (!pgdat) goto out; /* Account for the throttling */ @@ -2798,18 +2868,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, return false; /* - * There is a potential race between when kswapd checks its watermarks - * and a process gets throttled. There is also a potential race if - * processes get throttled, kswapd wakes, a large process exits therby - * balancing the zones that causes kswapd to miss a wakeup. If kswapd - * is going to sleep, no process should be sleeping on pfmemalloc_wait - * so wake them now if necessary. If necessary, processes will wake - * kswapd and get throttled again + * The throttled processes are normally woken up in balance_pgdat() as + * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * race between when kswapd checks the watermarks and a process gets + * throttled. There is also a potential race if processes get + * throttled, kswapd wakes, a large process exits thereby balancing the + * zones, which causes kswapd to exit balance_pgdat() before reaching + * the wake up checks. If kswapd is going to sleep, no process should + * be sleeping on pfmemalloc_wait, so wake them now if necessary. If + * the wake up is premature, processes will wake kswapd and get + * throttled again. The difference from wake ups in balance_pgdat() is + * that here we are under prepare_to_wait(). */ - if (waitqueue_active(&pgdat->pfmemalloc_wait)) { - wake_up(&pgdat->pfmemalloc_wait); - return false; - } + if (waitqueue_active(&pgdat->pfmemalloc_wait)) + wake_up_all(&pgdat->pfmemalloc_wait); return pgdat_balanced(pgdat, order, classzone_idx); } @@ -3267,7 +3339,10 @@ static int kswapd(void *p) } } + tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); current->reclaim_state = NULL; + lockdep_clear_current_reclaim_state(); + return 0; } diff --git a/mm/vmstat.c b/mm/vmstat.c index efea337..9f16fd3 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, continue; threshold = (*calculate_pressure)(zone); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; } @@ -767,6 +767,7 @@ const char * const vmstat_text[] = { "nr_shmem", "nr_dirtied", "nr_written", + "nr_pages_scanned", #ifdef CONFIG_NUMA "numa_hit", @@ -857,12 +858,14 @@ const char * const vmstat_text[] = { "thp_zero_page_alloc", "thp_zero_page_alloc_failed", #endif +#ifdef CONFIG_DEBUG_TLBFLUSH #ifdef CONFIG_SMP "nr_tlb_remote_flush", "nr_tlb_remote_flush_received", -#endif +#endif /* CONFIG_SMP */ "nr_tlb_local_flush_all", "nr_tlb_local_flush_one", +#endif /* CONFIG_DEBUG_TLBFLUSH */ #endif /* CONFIG_VM_EVENTS_COUNTERS */ }; @@ -1059,7 +1062,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), - zone->pages_scanned, + zone_page_state(zone, NR_PAGES_SCANNED), zone->spanned_pages, zone->present_pages, zone->managed_pages); @@ -1069,10 +1072,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone_page_state(zone, i)); seq_printf(m, - "\n protection: (%lu", + "\n protection: (%ld", zone->lowmem_reserve[0]); for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) - seq_printf(m, ", %lu", zone->lowmem_reserve[i]); + seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_printf(m, ")" "\n pagesets"); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 6ee48aa..5d56e05 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -106,56 +106,6 @@ u16 vlan_dev_vlan_id(const struct net_device *dev) } EXPORT_SYMBOL(vlan_dev_vlan_id); -static struct sk_buff *vlan_reorder_header(struct sk_buff *skb) -{ - if (skb_cow(skb, skb_headroom(skb)) < 0) - return NULL; - memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); - skb->mac_header += VLAN_HLEN; - return skb; -} - -struct sk_buff *vlan_untag(struct sk_buff *skb) -{ - struct vlan_hdr *vhdr; - u16 vlan_tci; - - if (unlikely(vlan_tx_tag_present(skb))) { - /* vlan_tci is already set-up so leave this for another time */ - return skb; - } - - skb = skb_share_check(skb, GFP_ATOMIC); - if (unlikely(!skb)) - goto err_free; - - if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) - goto err_free; - - vhdr = (struct vlan_hdr *) skb->data; - vlan_tci = ntohs(vhdr->h_vlan_TCI); - __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); - - skb_pull_rcsum(skb, VLAN_HLEN); - vlan_set_encap_proto(skb, vhdr); - - skb = vlan_reorder_header(skb); - if (unlikely(!skb)) - goto err_free; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_reset_mac_len(skb); - - return skb; - -err_free: - kfree_skb(skb); - return NULL; -} -EXPORT_SYMBOL(vlan_untag); - - /* * vlan info and vid list */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index d1537dc..0c21361 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -512,10 +512,48 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } +static int vlan_calculate_locking_subclass(struct net_device *real_dev) +{ + int subclass = 0; + + while (is_vlan_dev(real_dev)) { + subclass++; + real_dev = vlan_dev_priv(real_dev)->real_dev; + } + + return subclass; +} + +static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) +{ + int err = 0, subclass; + + subclass = vlan_calculate_locking_subclass(to); + + spin_lock_nested(&to->addr_list_lock, subclass); + err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + spin_unlock(&to->addr_list_lock); +} + +static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) +{ + int err = 0, subclass; + + subclass = vlan_calculate_locking_subclass(to); + + spin_lock_nested(&to->addr_list_lock, subclass); + err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); + if (!err) + __dev_set_rx_mode(to); + spin_unlock(&to->addr_list_lock); +} + static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -624,9 +662,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - if (is_vlan_dev(real_dev)) - subclass = 1; - + subclass = vlan_calculate_locking_subclass(dev); vlan_dev_set_lockdep_class(dev, subclass); vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats); @@ -658,9 +694,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; netdev_features_t old_features = features; - features &= real_dev->vlan_features; + features = netdev_intersect_features(features, real_dev->vlan_features); features |= NETIF_F_RXCSUM; - features &= real_dev->features; + features = netdev_intersect_features(features, real_dev->features); features |= old_features & NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 7d424ac..43e875c 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1489,8 +1489,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, goto drop; /* Queue packet (standard) */ - skb->sk = sock; - if (sock_queue_rcv_skb(sock, skb) < 0) goto drop; @@ -1644,7 +1642,6 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (!skb) goto out; - skb->sk = sk; skb_reserve(skb, ddp_dl->header_length); skb_reserve(skb, dev->hard_header_len); skb->dev = dev; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 1ce4b87..0679960 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -38,8 +38,10 @@ static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { - if (atomic_dec_and_test(&gw_node->refcount)) + if (atomic_dec_and_test(&gw_node->refcount)) { + batadv_orig_node_free_ref(gw_node->orig_node); kfree_rcu(gw_node, rcu); + } } static struct batadv_gw_node * @@ -344,9 +346,14 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, struct batadv_gw_node *gw_node; int down, up; + if (!atomic_inc_not_zero(&orig_node->refcount)) + return; + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); - if (!gw_node) + if (!gw_node) { + batadv_orig_node_free_ref(orig_node); return; + } INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index c478e6b..75f8c72 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == net_dev->ifindex) + if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) return false; /* recurse over the parent device */ diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f081712..3d33941 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -690,14 +690,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { struct hci_cp_auth_requested cp; - /* encrypt must be pending if auth is also pending */ - set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); - cp.handle = cpu_to_le16(conn->handle); hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - if (conn->key_type != 0xff) + + /* If we're already encrypted set the REAUTH_PEND flag, + * otherwise set the ENCRYPT_PEND. + */ + if (conn->link_mode & HCI_LM_ENCRYPT) set_bit(HCI_CONN_REAUTH_PEND, &conn->flags); + else + set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); } return 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a3af2b7..729f516 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -47,6 +47,10 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + hci_conn_check_pending(hdev); } @@ -2993,6 +2997,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev, if (!conn) goto unlock; + /* For BR/EDR the necessary steps are taken through the + * auth_complete event. + */ + if (conn->type != LE_LINK) + goto unlock; + if (!ev->status) conn->sec_level = conn->pending_sec_level; @@ -3158,8 +3168,11 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, /* If we're not the initiators request authorization to * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). */ - if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags)) { + * confirm_hint set to 1). The exception is if neither + * side had MITM in which case we do auto-accept. + */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && + (loc_mitm || rem_mitm)) { BT_DBG("Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 0098af8..2710e85 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -631,11 +631,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, /*change security for LE channels */ if (chan->scid == L2CAP_CID_ATT) { - if (!conn->hcon->out) { - err = -EINVAL; - break; - } - if (smp_conn_security(conn->hcon, sec.level)) break; sk->sk_state = BT_CONFIG; @@ -887,7 +882,8 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) l2cap_chan_close(chan, 0); lock_sock(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } @@ -949,13 +945,16 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); + release_sock(parent); return NULL; } sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); - if (!sk) + if (!sk) { + release_sock(parent); return NULL; + } bt_sock_reclassify_lock(sk, BTPROTO_L2CAP); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fedc539..211fffb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2319,8 +2319,13 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, } if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) { - /* Continue with pairing via SMP */ + /* Continue with pairing via SMP. The hdev lock must be + * released as SMP may try to recquire it for crypto + * purposes. + */ + hci_dev_unlock(hdev); err = smp_user_confirm_reply(conn, mgmt_op, passkey); + hci_dev_lock(hdev); if (!err) err = cmd_complete(sk, hdev->id, mgmt_op, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ca957d3..19ba192 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1857,10 +1857,13 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s) /* Get data directly from socket receive queue without copying it. */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); - if (!skb_linearize(skb)) + if (!skb_linearize(skb)) { s = rfcomm_recv_frame(s, skb); - else + if (!s) + break; + } else { kfree_skb(skb); + } } if (s && (sk->sk_state == BT_CLOSED)) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index c1c6028..7ca014d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -887,7 +887,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) sk->sk_shutdown = SHUTDOWN_MASK; __rfcomm_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } release_sock(sk); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d021e44..4f5f01b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -913,7 +913,8 @@ static int sco_sock_shutdown(struct socket *sock, int how) sco_sock_clear_timer(sk); __sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } @@ -933,7 +934,8 @@ static int sco_sock_release(struct socket *sock) sco_sock_close(sk); - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) { + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && + !(current->flags & PF_EXITING)) { lock_sock(sk); err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); release_sock(sk); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a2fd37e..1f59299 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -72,7 +72,7 @@ int br_handle_frame_finish(struct sk_buff *skb) goto drop; if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid)) - goto drop; + goto out; /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; @@ -146,8 +146,8 @@ static int br_handle_local_finish(struct sk_buff *skb) struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; - br_vlan_get_tag(skb, &vid); - if (p->flags & BR_LEARNING) + /* check if vlan is allowed, to avoid spoofing */ + if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f75d92e..b47b344e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -446,6 +446,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } +static int br_dev_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(dev); + + if (tb[IFLA_ADDRESS]) { + spin_lock_bh(&br->lock); + br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS])); + spin_unlock_bh(&br->lock); + } + + return register_netdevice(dev); +} + static size_t br_get_link_af_size(const struct net_device *dev) { struct net_port_vlans *pv; @@ -474,6 +488,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { .priv_size = sizeof(struct net_bridge), .setup = br_dev_setup, .validate = br_validate, + .newlink = br_dev_newlink, .dellink = br_dev_delete, }; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9a63c42..f02acd7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -309,6 +309,9 @@ struct br_input_skb_cb { int igmp; int mrouters_only; #endif +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + bool vlan_filtered; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -605,6 +608,7 @@ extern bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, extern bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v, const struct sk_buff *skb); +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); extern struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb); @@ -671,6 +675,12 @@ static inline bool br_allowed_egress(struct net_bridge *br, return true; } +static inline bool br_should_learn(struct net_bridge_port *p, + struct sk_buff *skb, u16 *vid) +{ + return true; +} + static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *v, struct sk_buff *skb) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 53f0990..f0db99f 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -136,7 +136,7 @@ static struct sk_buff *br_vlan_untag(struct sk_buff *skb) } skb->vlan_tci = 0; - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (skb) skb->vlan_tci = 0; @@ -149,7 +149,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; /* At this point, we know that the frame was filtered and contains @@ -194,14 +195,18 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ - if (!br->vlan_enabled) + if (!br->vlan_enabled) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; + } /* If there are no vlan in the permitted list, all packets are * rejected. */ if (!v) - return false; + goto drop; + + BR_INPUT_SKB_CB(skb)->vlan_filtered = true; err = br_vlan_get_tag(skb, vid); if (!*vid) { @@ -212,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * vlan untagged or priority-tagged traffic belongs to. */ if (pvid == VLAN_N_VID) - return false; + goto drop; /* PVID is set on this port. Any untagged or priority-tagged * ingress frame is considered to belong to this vlan. @@ -235,7 +240,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* Frame had a valid vlan tag. See if vlan is allowed */ if (test_bit(*vid, v->vlan_bitmap)) return true; - +drop: + kfree_skb(skb); return false; } @@ -246,7 +252,8 @@ bool br_allowed_egress(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; if (!v) @@ -259,6 +266,35 @@ bool br_allowed_egress(struct net_bridge *br, return false; } +/* Called under RCU */ +bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) +{ + struct net_bridge *br = p->br; + struct net_port_vlans *v; + + /* If filtering was disabled at input, let it pass. */ + if (!br->vlan_enabled) + return true; + + v = rcu_dereference(p->vlan_info); + if (!v) + return false; + + br_vlan_get_tag(skb, vid); + if (!*vid) { + *vid = br_get_pvid(v); + if (*vid == VLAN_N_VID) + return false; + + return true; + } + + if (test_bit(*vid, v->vlan_bitmap)) + return true; + + return false; +} + /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. */ diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ac78024..b166fc2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, if (repl->num_counters && copy_to_user(repl->counters, counterstmp, repl->num_counters * sizeof(struct ebt_counter))) { - ret = -EFAULT; + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); } - else - ret = 0; /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, diff --git a/net/can/gw.c b/net/can/gw.c index 3f9b0f3..233ce53 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) @@ -900,7 +900,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) u8 limhops = 0; int err = 0; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (nlmsg_len(nlh) < sizeof(*r)) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 96238ba..de6662b 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -13,8 +13,6 @@ #include "auth_x.h" #include "auth_x_protocol.h" -#define TEMP_TICKET_BUF_LEN 256 - static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); static int ceph_x_is_authenticated(struct ceph_auth_client *ac) @@ -64,7 +62,7 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, } static int ceph_x_decrypt(struct ceph_crypto_key *secret, - void **p, void *end, void *obuf, size_t olen) + void **p, void *end, void **obuf, size_t olen) { struct ceph_x_encrypt_header head; size_t head_len = sizeof(head); @@ -75,8 +73,14 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, return -EINVAL; dout("ceph_x_decrypt len %d\n", len); - ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, - *p, len); + if (*obuf == NULL) { + *obuf = kmalloc(len, GFP_NOFS); + if (!*obuf) + return -ENOMEM; + olen = len; + } + + ret = ceph_decrypt2(secret, &head, &head_len, *obuf, &olen, *p, len); if (ret) return ret; if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) @@ -129,139 +133,120 @@ static void remove_ticket_handler(struct ceph_auth_client *ac, kfree(th); } -static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, - struct ceph_crypto_key *secret, - void *buf, void *end) +static int process_one_ticket(struct ceph_auth_client *ac, + struct ceph_crypto_key *secret, + void **p, void *end) { struct ceph_x_info *xi = ac->private; - int num; - void *p = buf; + int type; + u8 tkt_struct_v, blob_struct_v; + struct ceph_x_ticket_handler *th; + void *dbuf = NULL; + void *dp, *dend; + int dlen; + char is_enc; + struct timespec validity; + struct ceph_crypto_key old_key; + void *ticket_buf = NULL; + void *tp, *tpend; + struct ceph_timespec new_validity; + struct ceph_crypto_key new_session_key; + struct ceph_buffer *new_ticket_blob; + unsigned long new_expires, new_renew_after; + u64 new_secret_id; int ret; - char *dbuf; - char *ticket_buf; - u8 reply_struct_v; - dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!dbuf) - return -ENOMEM; + ceph_decode_need(p, end, sizeof(u32) + 1, bad); - ret = -ENOMEM; - ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); - if (!ticket_buf) - goto out_dbuf; + type = ceph_decode_32(p); + dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - ceph_decode_need(&p, end, 1 + sizeof(u32), bad); - reply_struct_v = ceph_decode_8(&p); - if (reply_struct_v != 1) + tkt_struct_v = ceph_decode_8(p); + if (tkt_struct_v != 1) goto bad; - num = ceph_decode_32(&p); - dout("%d tickets\n", num); - while (num--) { - int type; - u8 tkt_struct_v, blob_struct_v; - struct ceph_x_ticket_handler *th; - void *dp, *dend; - int dlen; - char is_enc; - struct timespec validity; - struct ceph_crypto_key old_key; - void *tp, *tpend; - struct ceph_timespec new_validity; - struct ceph_crypto_key new_session_key; - struct ceph_buffer *new_ticket_blob; - unsigned long new_expires, new_renew_after; - u64 new_secret_id; - - ceph_decode_need(&p, end, sizeof(u32) + 1, bad); - - type = ceph_decode_32(&p); - dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); - - tkt_struct_v = ceph_decode_8(&p); - if (tkt_struct_v != 1) - goto bad; - - th = get_ticket_handler(ac, type); - if (IS_ERR(th)) { - ret = PTR_ERR(th); - goto out; - } - /* blob for me */ - dlen = ceph_x_decrypt(secret, &p, end, dbuf, - TEMP_TICKET_BUF_LEN); - if (dlen <= 0) { - ret = dlen; - goto out; - } - dout(" decrypted %d bytes\n", dlen); - dend = dbuf + dlen; - dp = dbuf; + th = get_ticket_handler(ac, type); + if (IS_ERR(th)) { + ret = PTR_ERR(th); + goto out; + } - tkt_struct_v = ceph_decode_8(&dp); - if (tkt_struct_v != 1) - goto bad; + /* blob for me */ + dlen = ceph_x_decrypt(secret, p, end, &dbuf, 0); + if (dlen <= 0) { + ret = dlen; + goto out; + } + dout(" decrypted %d bytes\n", dlen); + dp = dbuf; + dend = dp + dlen; - memcpy(&old_key, &th->session_key, sizeof(old_key)); - ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); - if (ret) - goto out; + tkt_struct_v = ceph_decode_8(&dp); + if (tkt_struct_v != 1) + goto bad; - ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); - ceph_decode_timespec(&validity, &new_validity); - new_expires = get_seconds() + validity.tv_sec; - new_renew_after = new_expires - (validity.tv_sec / 4); - dout(" expires=%lu renew_after=%lu\n", new_expires, - new_renew_after); + memcpy(&old_key, &th->session_key, sizeof(old_key)); + ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); + if (ret) + goto out; - /* ticket blob for service */ - ceph_decode_8_safe(&p, end, is_enc, bad); - tp = ticket_buf; - if (is_enc) { - /* encrypted */ - dout(" encrypted ticket\n"); - dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, - TEMP_TICKET_BUF_LEN); - if (dlen < 0) { - ret = dlen; - goto out; - } - dlen = ceph_decode_32(&tp); - } else { - /* unencrypted */ - ceph_decode_32_safe(&p, end, dlen, bad); - ceph_decode_need(&p, end, dlen, bad); - ceph_decode_copy(&p, ticket_buf, dlen); + ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); + ceph_decode_timespec(&validity, &new_validity); + new_expires = get_seconds() + validity.tv_sec; + new_renew_after = new_expires - (validity.tv_sec / 4); + dout(" expires=%lu renew_after=%lu\n", new_expires, + new_renew_after); + + /* ticket blob for service */ + ceph_decode_8_safe(p, end, is_enc, bad); + if (is_enc) { + /* encrypted */ + dout(" encrypted ticket\n"); + dlen = ceph_x_decrypt(&old_key, p, end, &ticket_buf, 0); + if (dlen < 0) { + ret = dlen; + goto out; } - tpend = tp + dlen; - dout(" ticket blob is %d bytes\n", dlen); - ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - blob_struct_v = ceph_decode_8(&tp); - new_secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); - if (ret) + tp = ticket_buf; + dlen = ceph_decode_32(&tp); + } else { + /* unencrypted */ + ceph_decode_32_safe(p, end, dlen, bad); + ticket_buf = kmalloc(dlen, GFP_NOFS); + if (!ticket_buf) { + ret = -ENOMEM; goto out; - - /* all is well, update our ticket */ - ceph_crypto_key_destroy(&th->session_key); - if (th->ticket_blob) - ceph_buffer_put(th->ticket_blob); - th->session_key = new_session_key; - th->ticket_blob = new_ticket_blob; - th->validity = new_validity; - th->secret_id = new_secret_id; - th->expires = new_expires; - th->renew_after = new_renew_after; - dout(" got ticket service %d (%s) secret_id %lld len %d\n", - type, ceph_entity_type_name(type), th->secret_id, - (int)th->ticket_blob->vec.iov_len); - xi->have_keys |= th->service; + } + tp = ticket_buf; + ceph_decode_need(p, end, dlen, bad); + ceph_decode_copy(p, ticket_buf, dlen); } + tpend = tp + dlen; + dout(" ticket blob is %d bytes\n", dlen); + ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); + blob_struct_v = ceph_decode_8(&tp); + new_secret_id = ceph_decode_64(&tp); + ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + if (ret) + goto out; + + /* all is well, update our ticket */ + ceph_crypto_key_destroy(&th->session_key); + if (th->ticket_blob) + ceph_buffer_put(th->ticket_blob); + th->session_key = new_session_key; + th->ticket_blob = new_ticket_blob; + th->validity = new_validity; + th->secret_id = new_secret_id; + th->expires = new_expires; + th->renew_after = new_renew_after; + dout(" got ticket service %d (%s) secret_id %lld len %d\n", + type, ceph_entity_type_name(type), th->secret_id, + (int)th->ticket_blob->vec.iov_len); + xi->have_keys |= th->service; - ret = 0; out: kfree(ticket_buf); -out_dbuf: kfree(dbuf); return ret; @@ -270,6 +255,34 @@ bad: goto out; } +static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, + struct ceph_crypto_key *secret, + void *buf, void *end) +{ + void *p = buf; + u8 reply_struct_v; + u32 num; + int ret; + + ceph_decode_8_safe(&p, end, reply_struct_v, bad); + if (reply_struct_v != 1) + return -EINVAL; + + ceph_decode_32_safe(&p, end, num, bad); + dout("%d tickets\n", num); + + while (num--) { + ret = process_one_ticket(ac, secret, &p, end); + if (ret) + return ret; + } + + return 0; + +bad: + return -EINVAL; +} + static int ceph_x_build_authorizer(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th, struct ceph_x_authorizer *au) @@ -583,13 +596,14 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, struct ceph_x_ticket_handler *th; int ret = 0; struct ceph_x_authorize_reply reply; + void *preply = &reply; void *p = au->reply_buf; void *end = p + sizeof(au->reply_buf); th = get_ticket_handler(ac, au->service); if (IS_ERR(th)) return PTR_ERR(th); - ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); + ret = ceph_x_decrypt(&th->session_key, &p, end, &preply, sizeof(reply)); if (ret < 0) return ret; if (ret != sizeof(reply)) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 6e7a236..06f19b9 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -89,11 +89,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; +/* + * Should be used for buffers allocated with ceph_kvmalloc(). + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt + * in-buffer (msg front). + * + * Dispose of @sgt with teardown_sgtable(). + * + * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() + * in cases where a single sg is sufficient. No attempt to reduce the + * number of sgs by squeezing physically contiguous pages together is + * made though, for simplicity. + */ +static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, + const void *buf, unsigned int buf_len) +{ + struct scatterlist *sg; + const bool is_vmalloc = is_vmalloc_addr(buf); + unsigned int off = offset_in_page(buf); + unsigned int chunk_cnt = 1; + unsigned int chunk_len = PAGE_ALIGN(off + buf_len); + int i; + int ret; + + if (buf_len == 0) { + memset(sgt, 0, sizeof(*sgt)); + return -EINVAL; + } + + if (is_vmalloc) { + chunk_cnt = chunk_len >> PAGE_SHIFT; + chunk_len = PAGE_SIZE; + } + + if (chunk_cnt > 1) { + ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); + if (ret) + return ret; + } else { + WARN_ON(chunk_cnt != 1); + sg_init_table(prealloc_sg, 1); + sgt->sgl = prealloc_sg; + sgt->nents = sgt->orig_nents = 1; + } + + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { + struct page *page; + unsigned int len = min(chunk_len - off, buf_len); + + if (is_vmalloc) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + sg_set_page(sg, page, len, off); + + off = 0; + buf += len; + buf_len -= len; + } + WARN_ON(buf_len != 0); + + return 0; +} + +static void teardown_sgtable(struct sg_table *sgt) +{ + if (sgt->orig_nents > 1) + sg_free_table(sgt); +} + static int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[2], sg_out[1]; + struct scatterlist sg_in[2], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -109,16 +180,18 @@ static int ceph_aes_encrypt(const void *key, int key_len, *dst_len = src_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], src, src_len); sg_set_buf(&sg_in[1], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -127,16 +200,22 @@ static int ceph_aes_encrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, @@ -144,7 +223,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, const void *src1, size_t src1_len, const void *src2, size_t src2_len) { - struct scatterlist sg_in[3], sg_out[1]; + struct scatterlist sg_in[3], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -160,17 +240,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, *dst_len = src1_len + src2_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 3); sg_set_buf(&sg_in[0], src1, src1_len); sg_set_buf(&sg_in[1], src2, src2_len); sg_set_buf(&sg_in[2], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -181,23 +263,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src1_len + src2_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt2 failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[2]; + struct sg_table sg_in; + struct scatterlist sg_out[2], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -209,16 +298,16 @@ static int ceph_aes_decrypt(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - crypto_blkcipher_setkey((void *)tfm, key, key_len); - sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); - sg_set_buf(sg_in, src, src_len); sg_set_buf(&sg_out[0], dst, *dst_len); sg_set_buf(&sg_out[1], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -227,12 +316,10 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst_len) @@ -250,7 +337,12 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt2(const void *key, int key_len, @@ -258,7 +350,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len, void *dst2, size_t *dst2_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[3]; + struct sg_table sg_in; + struct scatterlist sg_out[3], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -270,17 +363,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - sg_init_table(sg_in, 1); - sg_set_buf(sg_in, src, src_len); sg_init_table(sg_out, 3); sg_set_buf(&sg_out[0], dst1, *dst1_len); sg_set_buf(&sg_out[1], dst2, *dst2_len); sg_set_buf(&sg_out[2], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -289,12 +382,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst1_len) @@ -324,7 +415,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len, dst2, *dst2_len, 1); */ - return 0; +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 464303f..057017b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -290,7 +290,11 @@ int ceph_msgr_init(void) if (ceph_msgr_slab_init()) return -ENOMEM; - ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0); + /* + * The number of active work items is limited by the number of + * connections, so leave @max_active at default. + */ + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0); if (ceph_msgr_wq) return 0; @@ -556,7 +560,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, return r; } -static int ceph_tcp_sendpage(struct socket *sock, struct page *page, +static int __ceph_tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, bool more) { int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); @@ -569,6 +573,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, return ret; } +static int ceph_tcp_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, bool more) +{ + int ret; + struct kvec iov; + + /* sendpage cannot properly handle pages with page_count == 0, + * we need to fallback to sendmsg if that's the case */ + if (page_count(page) >= 1) + return __ceph_tcp_sendpage(sock, page, offset, size, more); + + iov.iov_base = kmap(page) + offset; + iov.iov_len = size; + ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more); + kunmap(page); + + return ret; +} /* * Shutdown/close the socket for the given connection. @@ -886,7 +908,7 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor, BUG_ON(page_count > (int)USHRT_MAX); cursor->page_count = (unsigned short)page_count; BUG_ON(length > SIZE_MAX - cursor->page_offset); - cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE; + cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE; } static struct page * diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 2ac9ef3..dbcbf5a 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1041,7 +1041,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, if (!m) { pr_info("alloc_msg unknown type %d\n", type); *skip = 1; + } else if (front_len > m->front_alloc_len) { + pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n", + front_len, m->front_alloc_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); + ceph_msg_put(m); + m = ceph_msg_new(type, front_len, GFP_NOFS, false); } + return m; } diff --git a/net/compat.c b/net/compat.c index f50161f..cbc1a2a 100644 --- a/net/compat.c +++ b/net/compat.c @@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, { int tot_len; - if (kern_msg->msg_namelen) { + if (kern_msg->msg_name && kern_msg->msg_namelen) { if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, @@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - if (kern_msg->msg_name) - kern_msg->msg_name = kern_address; - } else + kern_msg->msg_name = kern_address; + } else { kern_msg->msg_name = NULL; + kern_msg->msg_namelen = 0; + } tot_len = iov_from_user_compat_to_kern(kern_iov, (struct compat_iovec __user *)kern_msg->msg_iov, diff --git a/net/core/dev.c b/net/core/dev.c index a1d035a..647ec24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1208,7 +1208,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = 0; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } } @@ -1700,6 +1704,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return netif_rx(skb); } @@ -2508,20 +2513,29 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + if (!vlan_tx_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + return harmonize_features(skb, dev, features); + } } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + features = netdev_intersect_features(features, + dev->vlan_features | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) - features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; + features = netdev_intersect_features(features, + NETIF_F_SG | + NETIF_F_HIGHDMA | + NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); return harmonize_features(skb, dev, features); } @@ -3569,7 +3583,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || skb->protocol == cpu_to_be16(ETH_P_8021AD)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto unlock; } @@ -4018,6 +4032,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); napi->skb = skb; } @@ -5036,6 +5051,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } +EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { @@ -5314,7 +5330,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); -static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { @@ -5702,13 +5718,8 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Turn on no cache copy if HW is doing checksum */ if (!(dev->flags & IFF_LOOPBACK)) { dev->hw_features |= NETIF_F_NOCACHE_COPY; - if (dev->features & NETIF_F_ALL_CSUM) { - dev->wanted_features |= NETIF_F_NOCACHE_COPY; - dev->features |= NETIF_F_NOCACHE_COPY; - } } /* Make NETIF_F_HIGHDMA inheritable to VLAN devices. @@ -6274,6 +6285,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue); /** * unregister_netdevice_many - unregister many devices * @head: list of devices + * + * Note: As most callers use a stack allocated list_head, + * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) { @@ -6283,6 +6297,7 @@ void unregister_netdevice_many(struct list_head *head) rollback_registered_many(head); list_for_each_entry(dev, head, unreg_list) net_set_todo(dev); + list_del(head); } } EXPORT_SYMBOL(unregister_netdevice_many); @@ -6742,7 +6757,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); - list_del(&dev_kill_list); rtnl_unlock(); } diff --git a/net/core/dst.c b/net/core/dst.c index ca4231e..15b6792 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -267,6 +267,15 @@ again: } EXPORT_SYMBOL(dst_destroy); +static void dst_destroy_rcu(struct rcu_head *head) +{ + struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); + + dst = dst_destroy(dst); + if (dst) + __dst_free(dst); +} + void dst_release(struct dst_entry *dst) { if (dst) { @@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst) newrefcnt = atomic_dec_return(&dst->__refcnt); WARN_ON(newrefcnt < 0); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) { - dst = dst_destroy(dst); - if (dst) - __dst_free(dst); - } + if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + call_rcu(&dst->rcu_head, dst_destroy_rcu); } } EXPORT_SYMBOL(dst_release); diff --git a/net/core/filter.c b/net/core/filter.c index ad30d62..ebce437 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -355,6 +355,8 @@ load_b: if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; if (A > skb->len - sizeof(struct nlattr)) return 0; @@ -371,11 +373,13 @@ load_b: if (skb_is_nonlinear(skb)) return 0; + if (skb->len < sizeof(struct nlattr)) + return 0; if (A > skb->len - sizeof(struct nlattr)) return 0; nla = (struct nlattr *)&skb->data[A]; - if (nla->nla_len > A - skb->len) + if (nla->nla_len > skb->len - A) return 0; nla = nla_find_nested(nla, X); diff --git a/net/core/iovec.c b/net/core/iovec.c index 7d84ea1..8254497 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a { int size, ct, err; - if (m->msg_namelen) { + if (m->msg_name && m->msg_namelen) { if (mode == VERIFY_READ) { void __user *namep; namep = (void __user __force *) m->msg_name; @@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - if (m->msg_name) - m->msg_name = address; + m->msg_name = address; } else { m->msg_name = NULL; + m->msg_namelen = 0; } size = m->msg_iovlen * sizeof(struct iovec); @@ -107,6 +107,10 @@ EXPORT_SYMBOL(memcpy_toiovecend); int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, int offset, int len) { + /* No data? Done! */ + if (len == 0) + return 0; + /* Skip over the finished iovecs */ while (offset >= iov->iov_len) { offset -= iov->iov_len; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81d3a9a..7c8ffd9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cddb745..e9e79df 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -790,7 +790,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) } if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto out; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 37b492e..f322475 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); +/* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace. + */ +static void rtnl_lock_unregistering_all(void) +{ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + for_each_net(net) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - rtnl_lock(); + /* Close the race with cleanup_net() */ + mutex_lock(&net_mutex); + rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); + mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); @@ -708,13 +739,15 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + - nla_total_size(sizeof(struct ifla_vf_spoofchk))); + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_link_state))); return size; } else return 0; } -static size_t rtnl_port_size(const struct net_device *dev) +static size_t rtnl_port_size(const struct net_device *dev, + u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ @@ -730,7 +763,8 @@ static size_t rtnl_port_size(const struct net_device *dev) size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + @@ -765,7 +799,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ - + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */ @@ -827,11 +861,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) return 0; } -static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) +static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, + u32 ext_filter_mask) { int err; - if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) + if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || + !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); @@ -1016,7 +1052,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_nest_end(skb, vfinfo); } - if (rtnl_port_fill(skb, dev)) + if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (dev->rtnl_link_ops) { @@ -1070,6 +1106,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_head *head; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; + int err; + int hdrlen; s_h = cb->args[0]; s_idx = cb->args[1]; @@ -1077,8 +1115,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); cb->seq = net->dev_base_seq; - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + /* A hack to preserve kernel<->userspace interface. + * The correct header is ifinfomsg. It is consistent with rtnl_getlink. + * However, before Linux v3.9 the code here assumed rtgenmsg and that's + * what iproute2 < v3.9.0 used. + * We can detect the old iproute2. Even including the IFLA_EXT_MASK + * attribute, its netlink message is shorter than struct ifinfomsg. + */ + hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); + + if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1090,11 +1137,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) + err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask); + /* If we ran out of room on the first message, + * we're in trouble + */ + WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); + + if (err <= 0) goto out; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); @@ -1324,7 +1377,8 @@ static int do_set_master(struct net_device *dev, int ifindex) return 0; } -static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, +static int do_setlink(const struct sk_buff *skb, + struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1336,7 +1390,8 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); err = -EPERM; goto errout; } @@ -1590,7 +1645,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - err = do_setlink(dev, ifm, tb, ifname, 0); + err = do_setlink(skb, dev, ifm, tb, ifname, 0); errout: return err; } @@ -1630,7 +1685,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); - list_del(&list_kill); return 0; } @@ -1708,7 +1762,8 @@ err: } EXPORT_SYMBOL(rtnl_create_link); -static int rtnl_group_changelink(struct net *net, int group, +static int rtnl_group_changelink(const struct sk_buff *skb, + struct net *net, int group, struct ifinfomsg *ifm, struct nlattr **tb) { @@ -1717,7 +1772,7 @@ static int rtnl_group_changelink(struct net *net, int group, for_each_netdev(net, dev) { if (dev->group == group) { - err = do_setlink(dev, ifm, tb, NULL, 0); + err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) return err; } @@ -1819,12 +1874,12 @@ replay: modified = 1; } - return do_setlink(dev, ifm, tb, ifname, modified); + return do_setlink(skb, dev, ifm, tb, ifname, modified); } if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) - return rtnl_group_changelink(net, + return rtnl_group_changelink(skb, net, nla_get_u32(tb[IFLA_GROUP]), ifm, tb); return -ENODEV; @@ -1936,9 +1991,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; u16 min_ifinfo_dump_size = 0; + int hdrlen; + + /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ + hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? + sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); - if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { + if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) { if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); } @@ -2205,7 +2264,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) int err = -EINVAL; __u8 *addr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); @@ -2657,7 +2716,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 8d9d05e..d0afc32 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -95,31 +95,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -__u32 secure_ip_id(__be32 daddr) -{ - u32 hash[MD5_DIGEST_WORDS]; - - net_secret_init(); - hash[0] = (__force __u32) daddr; - hash[1] = net_secret[13]; - hash[2] = net_secret[14]; - hash[3] = net_secret[15]; - - md5_transform(hash, net_secret); - - return hash[0]; -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - __u32 hash[4]; - - net_secret_init(); - memcpy(hash, daddr, 16); - md5_transform(hash, net_secret); - - return hash[0]; -} __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 41cd152..86d6ffa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -62,6 +62,7 @@ #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> +#include <linux/if_vlan.h> #include <linux/locallock.h> #include <net/protocol.h> @@ -2766,81 +2767,85 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment + * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, + netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *skb_frag = skb_shinfo(skb)->frags; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); + struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; + skb_frag_t *frag = skb_shinfo(head_skb)->frags; + unsigned int mss = skb_shinfo(head_skb)->gso_size; + unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; - unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; unsigned int len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); - int nfrags = skb_shinfo(skb)->nr_frags; + int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; - proto = skb_network_protocol(skb); + __skb_push(head_skb, doffset); + proto = skb_network_protocol(head_skb); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); + + headroom = skb_headroom(head_skb); + pos = skb_headlen(head_skb); do { struct sk_buff *nskb; - skb_frag_t *frag; + skb_frag_t *nskb_frag; int hsize; int size; - len = skb->len - offset; + len = head_skb->len - offset; if (len > mss) len = mss; - hsize = skb_headlen(skb) - offset; + hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags && skb_headlen(fskb) && - (skb_headlen(fskb) == len || sg)) { - BUG_ON(skb_headlen(fskb) > len); + if (!hsize && i >= nfrags && skb_headlen(list_skb) && + (skb_headlen(list_skb) == len || sg)) { + BUG_ON(skb_headlen(list_skb) > len); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; - pos += skb_headlen(fskb); + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; + pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); - size = skb_frag_size(skb_frag); + size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; - skb_frag++; + frag++; } - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; + nskb = skb_clone(list_skb, GFP_ATOMIC); + list_skb = list_skb->next; if (unlikely(!nskb)) goto err; @@ -2861,7 +2866,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC, skb_alloc_rx_flag(skb), + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) @@ -2877,19 +2882,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; + __copy_skb_header(nskb, head_skb); /* nskb and skb might have different headroom */ if (nskb->ip_summed == CHECKSUM_PARTIAL) nskb->csum_start += skb_headroom(nskb) - headroom; skb_reset_mac_header(nskb); - skb_set_network_header(nskb, skb->mac_len); + skb_set_network_header(nskb, head_skb->mac_len); nskb->transport_header = (nskb->network_header + - skb_network_header_len(skb)); + skb_network_header_len(head_skb)); + skb_reset_mac_len(nskb); - skb_copy_from_linear_data_offset(skb, -tnl_hlen, + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); @@ -2898,30 +2903,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (!sg) { nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, + nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); continue; } - frag = skb_shinfo(nskb)->frags; + nskb_frag = skb_shinfo(nskb)->frags; - skb_copy_from_linear_data_offset(skb, offset, + skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(fskb)); + BUG_ON(skb_headlen(list_skb)); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; BUG_ON(!nfrags); - fskb = fskb->next; + list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= @@ -2932,27 +2939,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *frag = *skb_frag; - __skb_frag_ref(frag); - size = skb_frag_size(frag); + if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) + goto err; + + *nskb_frag = *frag; + __skb_frag_ref(nskb_frag); + size = skb_frag_size(nskb_frag); if (pos < offset) { - frag->page_offset += offset - pos; - skb_frag_size_sub(frag, offset - pos); + nskb_frag->page_offset += offset - pos; + skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; if (pos + size <= offset + len) { i++; - skb_frag++; + frag++; pos += size; } else { - skb_frag_size_sub(frag, pos + size - (offset + len)); + skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } - frag++; + nskb_frag++; } skip_fraglist: @@ -2966,15 +2976,12 @@ perform_csum_check: nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; } - } while ((offset += len) < skb->len); + } while ((offset += len) < head_skb->len); return segs; err: - while ((skb = segs)) { - segs = skb->next; - kfree_skb(skb); - } + kfree_skb_list(segs); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); @@ -3567,6 +3574,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); @@ -3586,12 +3594,66 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int hdr_len; if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len = tcp_hdrlen(skb); - else - hdr_len = sizeof(struct udphdr); - return hdr_len + shinfo->gso_size; + return tcp_hdrlen(skb) + shinfo->gso_size; + + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); + +static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) +{ + if (skb_cow(skb, skb_headroom(skb)) < 0) { + kfree_skb(skb); + return NULL; + } + + memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN); + skb->mac_header += VLAN_HLEN; + return skb; +} + +struct sk_buff *skb_vlan_untag(struct sk_buff *skb) +{ + struct vlan_hdr *vhdr; + u16 vlan_tci; + + if (unlikely(vlan_tx_tag_present(skb))) { + /* vlan_tci is already set-up so leave this for another time */ + return skb; + } + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto err_free; + + if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) + goto err_free; + + vhdr = (struct vlan_hdr *)skb->data; + vlan_tci = ntohs(vhdr->h_vlan_TCI); + __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci); + + skb_pull_rcsum(skb, VLAN_HLEN); + vlan_set_encap_proto(skb, vhdr); + + skb = skb_reorder_vlan_header(skb); + if (unlikely(!skb)) + goto err_free; + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_reset_mac_len(skb); + + return skb; + +err_free: + kfree_skb(skb); + return NULL; +} +EXPORT_SYMBOL(skb_vlan_untag); diff --git a/net/core/sock.c b/net/core/sock.c index 410bb4c..6f096a8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,55 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +/** + * sk_ns_capable - General socket capability test + * @sk: Socket to use a capability on or through + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in the user + * namespace @user_ns. + */ +bool sk_ns_capable(const struct sock *sk, + struct user_namespace *user_ns, int cap) +{ + return file_ns_capable(sk->sk_socket->file, user_ns, cap) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(sk_ns_capable); + +/** + * sk_capable - Socket global capability test + * @sk: Socket to use a capability on or through + * @cap: The global capbility to use + * + * Test to see if the opener of the socket had when the socket was + * created and the current process has the capability @cap in all user + * namespaces. + */ +bool sk_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, &init_user_ns, cap); +} +EXPORT_SYMBOL(sk_capable); + +/** + * sk_net_capable - Network namespace socket capability test + * @sk: Socket to use a capability on or through + * @cap: The capability to use + * + * Test to see if the opener of the socket had when the socke was created + * and the current process has the capability @cap over the network namespace + * the socket is a member of. + */ +bool sk_net_capable(const struct sock *sk, int cap) +{ + return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); +} +EXPORT_SYMBOL(sk_net_capable); + + #ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a0e9cf6..c38e7a2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype) } EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); -int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk, struct sk_buff *skb, int attrtype) { struct nlattr *attr; @@ -57,7 +57,7 @@ int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, unsigned int len; int err = 0; - if (!ns_capable(user_ns, CAP_NET_ADMIN)) { + if (!may_report_filterinfo) { nla_reserve(skb, attrtype, 0); return 0; } diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 40d5829..1074ffb 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1670,7 +1670,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlmsghdr *reply_nlh = NULL; const struct reply_func *fn; - if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN)) + if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index dd0dfb2..70f2549 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -573,7 +573,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr __rcu **ifap; int err = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -617,7 +617,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct dn_ifaddr *ifa; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 57dc159..d332aef 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) @@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) struct nlattr *attrs[RTA_MAX+1]; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!net_eq(net, &init_net)) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 2a7efe3..f3dc69a 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) RCV_SKB_FAIL(-EPERM); /* Eventually we might send routing messages too */ diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index c32be29..ede0e2d 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -150,7 +150,9 @@ int dns_query(const char *type, const char *name, size_t namelen, if (!*_result) goto put; - memcpy(*_result, upayload->data, len + 1); + memcpy(*_result, upayload->data, len); + *_result[len] = '\0'; + if (_expiry) *_expiry = rkey->expiry; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 19e3637..5f3dc1d 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -86,18 +86,26 @@ out: } EXPORT_SYMBOL(ip4_datagram_connect); +/* Because UDP xmit path can manipulate sk_dst_cache without holding + * socket lock, we need to use sk_dst_set() here, + * even if we own the socket lock. + */ void ip4_datagram_release_cb(struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; + struct dst_entry *dst; struct flowi4 fl4; struct rtable *rt; - if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0)) - return; - rcu_read_lock(); + + dst = __sk_dst_get(sk); + if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) { + rcu_read_unlock(); + return; + } inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; @@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk) inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if); - if (!IS_ERR(rt)) - __sk_dst_set(sk, &rt->dst); + + dst = !IS_ERR(rt) ? &rt->dst : NULL; + sk_dst_set(sk, dst); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ip4_datagram_release_cb); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f2e1573..8f7bd56 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) else res->tclassid = 0; #endif + + if (err == -ESRCH) + err = -ENETUNREACH; + return err; } EXPORT_SYMBOL_GPL(__fib_lookup); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d5dbca5..ec12b16 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -534,7 +534,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) return 1; attrlen = rtnh_attrlen(rtnh); - if (attrlen < 0) { + if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); @@ -819,13 +819,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); if (!fi->fib_metrics) goto failure; } else fi->fib_metrics = (u32 *) dst_default_metrics; - fib_info_cnt++; fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1f0c7e0..9d12181 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -706,8 +706,6 @@ static void icmp_unreach(struct sk_buff *skb) &iph->daddr); } else { info = ntohs(icmph->un.frag.mtu); - if (!info) - goto out; } break; case ICMP_SR_FAILED: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7defdc9..94d40cc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -1952,6 +1952,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); + if (!in_dev) { + ret = -ENODEV; + goto out; + } ifindex = imr->imr_ifindex; for (imlp = &inet->mc_list; (iml = rtnl_dereference(*imlp)) != NULL; @@ -1969,16 +1973,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) *imlp = iml->next_rcu; - if (in_dev) - ip_mc_dec_group(in_dev, group); + ip_mc_dec_group(in_dev, group); rtnl_unlock(); /* decrease mem now to avoid the memleak warning */ atomic_sub(sizeof(*iml), &sk->sk_omem_alloc); kfree_rcu(iml, rcu); return 0; } - if (!in_dev) - ret = -ENODEV; +out: rtnl_unlock(); return ret; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 33d5537..67140ef 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -26,20 +26,7 @@ * Theory of operations. * We keep one entry for each peer IP address. The nodes contains long-living * information about the peer which doesn't depend on routes. - * At this moment this information consists only of ID field for the next - * outgoing IP packet. This field is incremented with each packet as encoded - * in inet_getid() function (include/net/inetpeer.h). - * At the moment of writing this notes identifier of IP packets is generated - * to be unpredictable using this code only for packets subjected - * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size when local fragmentation is disabled use a constant ID and do - * not use this code (see ip_select_ident() in include/net/ip.h). * - * Route cache entries hold references to our nodes. - * New cache entries get references via lookup by destination IP address in - * the avl tree. The reference is grabbed only when it's needed i.e. only - * when we try to output IP packet which needs an unpredictable ID (see - * __ip_select_ident() in net/ipv4/route.c). * Nodes are removed only when reference counter goes to 0. * When it's happened the node may be removed when a sufficient amount of * time has been passed since its last use. The less-recently-used entry can @@ -62,7 +49,6 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable - * ip_id_count: atomic value (no lock needed) */ static struct kmem_cache *peer_cachep __read_mostly; @@ -504,10 +490,6 @@ relookup: p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, - (daddr->family == AF_INET) ? - secure_ip_id(daddr->addr.a4) : - secure_ipv6_id(daddr->addr.a6)); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 4582ea3..29e55b6 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,12 +42,12 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - !skb->local_df; + skb->local_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d306360..b3becd0 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -463,6 +463,7 @@ static const struct net_device_ops ipgre_netdev_ops = { static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; + dev->type = ARPHRD_IPGRE; ip_tunnel_setup(dev, ipgre_net_id); } @@ -501,7 +502,6 @@ static int ipgre_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); - dev->type = ARPHRD_IPGRE; dev->flags = IFF_NOARP; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->addr_len = 4; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index d1f8e10..c7da06e 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -288,6 +288,10 @@ int ip_options_compile(struct net *net, optptr++; continue; } + if (unlikely(l < 2)) { + pp_ptr = optptr; + goto error; + } optlen = optptr[1]; if (optlen<2 || optlen>l) { pp_ptr = optptr; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8bb3b4a..c20bff8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -149,7 +149,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -387,8 +387,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(skb, &rt->dst, sk, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); + ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -1330,7 +1329,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(skb, &rt->dst, sk); + ip_select_ident(skb, sk); if (opt) { iph->ihl += opt->optlen>>2; @@ -1483,6 +1482,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, struct sk_buff *nskb; struct sock *sk; struct inet_sock *inet; + int err; if (ip_options_echo(&replyopts.opt.opt, skb)) return; @@ -1519,8 +1519,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, sock_net_set(sk, net); __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; - ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, &rt, MSG_DONTWAIT); + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { + ip_flush_pending_frames(sk); + goto out; + } + nskb = skb_peek(&sk->sk_write_queue); if (nskb) { if (arg->csumoffset >= 0) @@ -1532,7 +1537,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } - +out: put_locked_var(unicast_lock, unicast_sock); ip_rt_put(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 3bedb26..edd5a81 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -166,6 +166,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (remote != t->parms.iph.daddr || + t->parms.iph.saddr != 0 || !(t->dev->flags & IFF_UP)) continue; @@ -182,10 +183,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) + if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && + (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) + continue; + + if (!(t->dev->flags & IFF_UP)) continue; if (!ip_tunnel_key_match(&t->parms, flags, key)) @@ -202,6 +204,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, hlist_for_each_entry_rcu(t, head, hash_node) { if (t->parms.i_key != key || + t->parms.iph.saddr != 0 || + t->parms.iph.daddr != 0 || !(t->dev->flags & IFF_UP)) continue; @@ -433,6 +437,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -853,6 +859,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, */ if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); } rtnl_unlock(); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index c31e3ad..ff3f84f 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); + __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) @@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb_pull_rcsum(skb, hdr_len); if (inner_proto == htons(ETH_P_TEB)) { - struct ethhdr *eh = (struct ethhdr *)skb->data; + struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; + eh = (struct ethhdr *)skb->data; if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) skb->protocol = eh->h_proto; else diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 26847e1..33e2bf8 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -271,6 +271,7 @@ static const struct net_device_ops vti_netdev_ops = { static void vti_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &vti_netdev_ops; + dev->type = ARPHRD_TUNNEL; ip_tunnel_setup(dev, vti_net_id); } @@ -282,7 +283,6 @@ static int vti_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &iph->saddr, 4); memcpy(dev->broadcast, &iph->daddr, 4); - dev->type = ARPHRD_TUNNEL; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7f80fb4..077f900 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPIP, 0); + t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPIP, 0); err = 0; goto out; @@ -485,4 +485,5 @@ static void __exit ipip_fini(void) module_init(ipip_init); module_exit(ipip_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ipip"); MODULE_ALIAS_NETDEV("tunl0"); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6fbf339..648ba5e 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1661,7 +1661,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(skb, skb_dst(skb), NULL); + ip_select_ident(skb, NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21..c8abe31 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1039,8 +1039,10 @@ static int __do_replace(struct net *net, const char *name, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cb91101..2e86cbc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1308,8 +1308,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cbc2215..9cb993c 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ if (skb->tstamp.tv64 == 0) @@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net, } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 && skb->mac_header != skb->network_header && @@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net, if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 7428155..4cfb3bd 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -22,7 +22,6 @@ #endif #include <net/netfilter/nf_conntrack_zones.h> -/* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { int err; @@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) err = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (!err) { ip_send_check(ip_hdr(skb)); + skb->local_df = 1; + } return err; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index c211607..8bd51f4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -214,6 +214,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) &ipv6_hdr(skb)->daddr)) continue; #endif + } else { + continue; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d3db78..6183d36 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d000b34..15bd37d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -89,6 +89,7 @@ #include <linux/rcupdate.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/jhash.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/protocol.h> @@ -473,39 +474,53 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -/* - * Peer allocation may fail only in serious out-of-memory conditions. However - * we still can generate some output. - * Random ID selection looks a bit dangerous because we have no chances to - * select ID being unique in a reasonable period of time. - * But broken packet identifier may be better than no packet at all. +#define IP_IDENTS_SZ 2048u +struct ip_ident_bucket { + atomic_t id; + u32 stamp32; +}; + +static struct ip_ident_bucket *ip_idents __read_mostly; + +/* In order to protect privacy, we add a perturbation to identifiers + * if one generator is seldom used. This makes hard for an attacker + * to infer how many packets were sent between two points in time. */ -static void ip_select_fb_ident(struct iphdr *iph) +u32 ip_idents_reserve(u32 hash, int segs) { - static DEFINE_SPINLOCK(ip_fb_id_lock); - static u32 ip_fallback_id; - u32 salt; + struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; + u32 old = ACCESS_ONCE(bucket->stamp32); + u32 now = (u32)jiffies; + u32 delta = 0; + + if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) { + u64 x = prandom_u32(); + + x *= (now - old); + delta = (u32)(x >> 32); + } - spin_lock_bh(&ip_fb_id_lock); - salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); - iph->id = htons(salt & 0xFFFF); - ip_fallback_id = salt; - spin_unlock_bh(&ip_fb_id_lock); + return atomic_add_return(segs + delta, &bucket->id) - segs; } +EXPORT_SYMBOL(ip_idents_reserve); -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) +void __ip_select_ident(struct iphdr *iph, int segs) { - struct net *net = dev_net(dst->dev); - struct inet_peer *peer; + static u32 ip_idents_hashrnd __read_mostly; + static bool hashrnd_initialized = false; + u32 hash, id; - peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1); - if (peer) { - iph->id = htons(inet_getid(peer, more)); - inet_putpeer(peer); - return; + if (unlikely(!hashrnd_initialized)) { + hashrnd_initialized = true; + get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); } - ip_select_fb_ident(iph); + hash = jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, + iph->protocol, + ip_idents_hashrnd); + id = ip_idents_reserve(hash, segs); + iph->id = htons(id); } EXPORT_SYMBOL(__ip_select_ident); @@ -1040,20 +1055,21 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) const struct iphdr *iph = (const struct iphdr *) skb->data; struct flowi4 fl4; struct rtable *rt; - struct dst_entry *dst; + struct dst_entry *odst = NULL; bool new = false; bh_lock_sock(sk); - rt = (struct rtable *) __sk_dst_get(sk); + odst = sk_dst_get(sk); - if (sock_owned_by_user(sk) || !rt) { + if (sock_owned_by_user(sk) || !odst) { __ipv4_sk_update_pmtu(skb, sk, mtu); goto out; } __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); - if (!__sk_dst_check(sk, 0)) { + rt = (struct rtable *)odst; + if (odst->obsolete && odst->ops->check(odst, 0) == NULL) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; @@ -1063,8 +1079,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu); - dst = dst_check(&rt->dst, 0); - if (!dst) { + if (!dst_check(&rt->dst, 0)) { if (new) dst_release(&rt->dst); @@ -1076,10 +1091,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) } if (new) - __sk_dst_set(sk, &rt->dst); + sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); + dst_release(odst); } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); @@ -1533,7 +1549,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *out_dev; unsigned int flags = 0; bool do_cache; - u32 itag; + u32 itag = 0; /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); @@ -2270,9 +2286,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, return rt; if (flp4->flowi4_proto) - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, - flowi4_to_flowi(flp4), - sk, 0); + rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); return rt; } @@ -2374,7 +2390,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, } } else #endif - if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) + if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) goto nla_put_failure; } @@ -2727,6 +2743,12 @@ int __init ip_rt_init(void) { int rc = 0; + ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); + if (!ip_idents) + panic("IP: failed to allocate ip_idents\n"); + + prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); + #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b8f542..e403405 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1064,7 +1064,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { copied = tcp_send_rcvq(sk, msg, size); - goto out; + goto out_nopush; } err = -EINVAL; @@ -1237,6 +1237,7 @@ wait_for_memory: out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle); +out_nopush: release_sock(sk); return copied + copied_syn; @@ -2908,61 +2909,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2976,13 +2958,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return __this_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index b6ae92a..894b7ce 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -408,7 +408,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; ratio += cnt; - ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); + ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); } /* Some calls are for duplicates without timetamps */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 068c8fb..172cd99 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1064,7 +1064,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && tp->undo_marker && tp->undo_retrans && + if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1120,7 +1120,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, unsigned int new_len = (pkt_len / mss) * mss; if (!in_sack && new_len < pkt_len) { new_len += mss; - if (new_len > skb->len) + if (new_len >= skb->len) return 0; } pkt_len = new_len; @@ -1144,7 +1144,7 @@ static u8 tcp_sacktag_one(struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (tp->undo_marker && tp->undo_retrans && + if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) @@ -1845,7 +1845,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; - tp->undo_retrans = 0; + tp->undo_retrans = -1; } void tcp_clear_retrans(struct tcp_sock *tp) @@ -2613,7 +2613,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; + tp->undo_retrans = tp->retrans_out ? : -1; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (!ece_ack) @@ -2628,18 +2628,16 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) */ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) { - struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); bool recovered = !before(tp->snd_una, tp->high_seq); if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ - if (flag & FLAG_ORIG_SACK_ACKED) { - /* Step 3.b. A timeout is spurious if not all data are - * lost, i.e., never-retransmitted data are (s)acked. - */ - tcp_try_undo_loss(sk, true); + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED)) return; - } + if (after(tp->snd_nxt, tp->high_seq) && (flag & FLAG_DATA_SACKED || is_dupack)) { tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ @@ -2655,12 +2653,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) if (recovered) { /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ - icsk->icsk_retransmits = 0; tcp_try_undo_recovery(sk); return; } - if (flag & FLAG_DATA_ACKED) - icsk->icsk_retransmits = 0; if (tcp_is_reno(tp)) { /* A Reno DUPACK means new data in F-RTO step 2.b above are * delivered. Lower inflight to clock out (re)tranmissions. @@ -3349,8 +3344,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); - if (after(ack, prior_snd_una)) + if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; + icsk->icsk_retransmits = 0; + } prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5031f68..45f3703 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect); * It can be called through tcp_release_cb() if socket was owned by user * at the time tcp_v4_err() was called to handle ICMP message. */ -static void tcp_v4_mtu_reduced(struct sock *sk) +void tcp_v4_mtu_reduced(struct sock *sk) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); @@ -299,6 +299,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } +EXPORT_SYMBOL(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -2117,6 +2118,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .compat_setsockopt = compat_ip_setsockopt, .compat_getsockopt = compat_ip_getsockopt, #endif + .mtu_reduced = tcp_v4_mtu_reduced, }; EXPORT_SYMBOL(ipv4_specific); @@ -2796,7 +2798,6 @@ struct proto tcp_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, - .mtu_reduced = tcp_v4_mtu_reduced, .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 826fc6f..b4435ae 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -785,7 +785,7 @@ void tcp_release_cb(struct sock *sk) __sock_put(sk); } if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { - sk->sk_prot->mtu_reduced(sk); + inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } } @@ -1871,7 +1871,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; - if (tso_segs == 1) { + if (tso_segs == 1 || !sk->sk_gso_max_segs) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) @@ -1908,7 +1908,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) + if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, @@ -2045,9 +2045,7 @@ void tcp_send_loss_probe(struct sock *sk) if (WARN_ON(!skb || !tcp_skb_pcount(skb))) goto rearm_timer; - /* Probe with zero data doesn't trigger fast recovery. */ - if (skb->len > 0) - err = __tcp_retransmit_skb(sk, skb); + err = __tcp_retransmit_skb(sk, skb); /* Record snd_nxt for loss detection. */ if (likely(!err)) @@ -2437,8 +2435,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans += tcp_skb_pcount(skb); - /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). */ @@ -2446,6 +2442,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } else { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } + + if (tp->undo_retrans < 0) + tp->undo_retrans = 0; + tp->undo_retrans += tcp_skb_pcount(skb); return err; } diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 80fa2bf..c042e52 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -218,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * This is: * (actual rate in segments) * baseRTT */ - target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; + target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; + do_div(target_cwnd, rtt); /* Calculate the difference between the window we had, * and the window we would like to have. This quantity diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ac43cd7..b4d1858 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) rtt = veno->minrtt; - target_cwnd = (tp->snd_cwnd * veno->basertt); + target_cwnd = (u64)tp->snd_cwnd * veno->basertt; target_cwnd <<= V_PARAM_SHIFT; do_div(target_cwnd, rtt); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index b5663c3..e3f6483 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -117,12 +117,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; + ip_select_ident(skb, NULL); return 0; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5bec666..5e30677 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1418,7 +1418,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->skip) { w->skip--; - continue; + goto skip; } err = w->func(w); @@ -1428,6 +1428,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bf4a9a0..7d640f2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -511,11 +511,11 @@ static int ip6gre_rcv(struct sk_buff *skb) skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP + * - Change protocol to IPv6 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } @@ -790,7 +790,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv4_get_dsfield(iph); @@ -840,7 +840,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = IPPROTO_GRE; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) @@ -965,8 +965,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - /* Precalculate GRE options length */ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { if (t->parms.o_flags&GRE_CSUM) @@ -1269,6 +1267,8 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + dev->iflink = tunnel->parms.link; + return 0; } @@ -1285,7 +1285,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) dev_hold(dev); } - static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = ip6gre_rcv, .err_handler = ip6gre_err, @@ -1462,6 +1461,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + dev->iflink = tunnel->parms.link; + return 0; } @@ -1554,6 +1555,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6gre_get_size(const struct net_device *dev) { return @@ -1631,6 +1641,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2eeb13a..dc67e01 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -323,12 +323,16 @@ static inline int ip6_forward_finish(struct sk_buff *skb) static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; + /* ipv6 conntrack defrag sets max_frag_size + local_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; + if (skb->local_df) + return false; + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) return false; @@ -513,6 +517,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + static bool hashrnd_initialized = false; + u32 hash, id; + + if (unlikely(!hashrnd_initialized)) { + hashrnd_initialized = true; + get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + } + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -974,7 +995,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (can_sleep) fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1010,7 +1031,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, if (can_sleep) fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c1e11b5..f8a70a1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -62,6 +62,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG @@ -265,9 +266,6 @@ static int ip6_tnl_create2(struct net_device *dev) int err; t = netdev_priv(dev); - err = ip6_tnl_dev_init(dev); - if (err < 0) - goto out; err = register_netdevice(dev); if (err < 0) @@ -1447,6 +1445,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, @@ -1531,16 +1530,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - int err = ip6_tnl_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - ip6_tnl_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } @@ -1549,7 +1542,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1d..d38e6a8 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 167cb5a..bba4791 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1316,8 +1316,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 827f795..4bd870a 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,38 +3,48 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include <linux/export.h> +#include <linux/random.h> +#include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) { - static atomic_t ipv6_fragmentation_id; - int old, new; - -#if IS_ENABLED(CONFIG_IPV6) - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + static bool done = false; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + if (!done) { + get_random_bytes(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + done = true; } -#endif - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); } -EXPORT_SYMBOL(ipv6_select_ident); +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 33e9434..ddb4e3d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1339,7 +1339,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1349,7 +1349,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b433884..8e8fc32 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->iflink == t->parms.link) && + (!dev || !t->parms.link || dev->ifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->iflink == t->parms.link) && + (!dev || !t->parms.link || dev->ifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && - (!dev || !t->parms.link || dev->iflink == t->parms.link) && + (!dev || !t->parms.link || dev->ifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } @@ -195,10 +195,8 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - err = ipip6_tunnel_init(dev); - if (err < 0) - goto out; - ipip6_tunnel_clone_6rd(dev, sitn); + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; @@ -207,7 +205,8 @@ static int ipip6_tunnel_create(struct net_device *dev) if (err < 0) goto out; - strcpy(t->parms.name, dev->name); + ipip6_tunnel_clone_6rd(dev, sitn); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -530,12 +529,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; @@ -1279,6 +1278,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops ipip6_netdev_ops = { + .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, @@ -1313,9 +1313,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); dev->tstats = alloc_percpu(struct pcpu_tstats); @@ -1334,7 +1332,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->protocol = IPPROTO_IPV6; @@ -1770,4 +1767,5 @@ xfrm_tunnel_failed: module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c71501..3058c4a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1651,6 +1651,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v6_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1682,6 +1683,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .compat_setsockopt = compat_ipv6_setsockopt, .compat_getsockopt = compat_ipv6_getsockopt, #endif + .mtu_reduced = tcp_v4_mtu_reduced, }; #ifdef CONFIG_TCP_MD5SIG @@ -1919,7 +1921,6 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .mtu_reduced = tcp_v6_mtu_reduced, .hash = tcp_v6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index e096025..6857ae4 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1778,6 +1778,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, rc; + bool locked = true; lock_sock(sk); /* put the autobinding in */ @@ -1804,6 +1805,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_ZAPPED)) goto out; + release_sock(sk); + locked = false; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); if (!skb) @@ -1837,7 +1840,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - release_sock(sk); + if (locked) + release_sock(sk); return rc; } diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c4b7218..1465363 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1829,7 +1829,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { + if (msg->tag == IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 44441c0..c3ae241 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -754,9 +754,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->deref = pppol2tp_session_sock_put; /* If PMTU discovery was enabled, use the MTU that was discovered */ - dst = sk_dst_get(sk); + dst = sk_dst_get(tunnel->sock); if (dst != NULL) { - u32 pmtu = dst_mtu(__sk_dst_get(sk)); + u32 pmtu = dst_mtu(dst); + if (pmtu != 0) session->mtu = session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; @@ -1365,7 +1366,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, int err; if (level != SOL_PPPOL2TP) - return udp_prot.setsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (optlen < sizeof(int)) return -EINVAL; @@ -1491,7 +1492,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, struct pppol2tp_session *ps; if (level != SOL_PPPOL2TP) - return udp_prot.getsockopt(sk, level, optname, optval, optlen); + return -EINVAL; if (get_user(len, optlen)) return -EFAULT; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cafe614..8e41f01 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -34,8 +34,7 @@ static ssize_t ieee80211_if_read( ssize_t ret = -EINVAL; read_lock(&dev_base_lock); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*format)(sdata, buf, sizeof(buf)); + ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); if (ret >= 0) @@ -62,8 +61,7 @@ static ssize_t ieee80211_if_write( ret = -ENODEV; rtnl_lock(); - if (sdata->dev->reg_state == NETREG_REGISTERED) - ret = (*write)(sdata, buf, count); + ret = (*write)(sdata, buf, count); rtnl_unlock(); return ret; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a12afe7..f69cac4 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1203,6 +1203,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.control_port = params->control_port; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->u.ibss.last_scan_completed = jiffies; /* fix basic_rates if channel does not support these rates */ rate_flags = ieee80211_chandef_rate_flags(¶ms->chandef); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 611abfc..23c1316 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -311,6 +311,7 @@ struct ieee80211_roc_work { bool started, abort, hw_begun, notified; bool to_be_freed; + bool on_channel; unsigned long hw_start_time; @@ -1320,6 +1321,7 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata); void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata, __le16 fc, bool acked); +void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); /* IBSS code */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fcecd63..31da72c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -749,10 +749,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, u32 hw_reconf_flags = 0; int i, flushed; struct ps_data *ps; + bool cancel_scan; clear_bit(SDATA_STATE_RUNNING, &sdata->state); - if (rcu_access_pointer(local->scan_sdata) == sdata) + cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; + if (cancel_scan) ieee80211_scan_cancel(local); /* @@ -959,6 +961,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); + if (cancel_scan) + flush_delayed_work(&local->scan_work); + if (local->open_count == 0) { ieee80211_stop_device(local); @@ -1766,7 +1771,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); - list_del(&unreg_list); list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { list_del(&sdata->list); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 620677e..23dfd24 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -615,7 +615,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, int i; mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) { key = key_mtx_dereference(local, sta->gtk[i]); if (!key) continue; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e765f77..2c5f21c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -148,6 +148,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!rcu_access_pointer(sdata->vif.chanctx_conf)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + continue; power = min(power, sdata->vif.bss_conf.txpower); } rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d7f4ab..023bc33 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -131,13 +131,13 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) if (unlikely(!sdata->u.mgd.associated)) return; + ifmgd->probe_send_count = 0; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) return; mod_timer(&sdata->u.mgd.conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); - - ifmgd->probe_send_count = 0; } static int ecw2cw(int ecw) @@ -1265,7 +1265,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - TU_TO_EXP_TIME(count * cbss->beacon_interval)); + TU_TO_EXP_TIME((count - 1) * + cbss->beacon_interval)); } static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, @@ -2881,8 +2882,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); if (bss) { - ieee80211_rx_bss_put(local, bss); sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; + ieee80211_rx_bss_put(local, bss); } } @@ -3684,6 +3685,38 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) } #ifdef CONFIG_PM +void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + + sdata_lock(sdata); + + if (ifmgd->auth_data || ifmgd->assoc_data) { + const u8 *bssid = ifmgd->auth_data ? + ifmgd->auth_data->bss->bssid : + ifmgd->assoc_data->bss->bssid; + + /* + * If we are trying to authenticate / associate while suspending, + * cfg80211 won't know and won't actually abort those attempts, + * thus we need to do that ourselves. + */ + ieee80211_send_deauth_disassoc(sdata, bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + false, frame_buf); + if (ifmgd->assoc_data) + ieee80211_destroy_assoc_data(sdata, false); + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, false); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); + } + + sdata_unlock(sdata); +} + void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -4308,8 +4341,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (bss->wmm_used && bss->uapsd_supported && - (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) && - sdata->wmm_acm != 0xff) { + (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { assoc_data->uapsd = true; ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; } else { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 0c2a294..7a17dec 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) container_of(work, struct ieee80211_roc_work, work.work); struct ieee80211_sub_if_data *sdata = roc->sdata; struct ieee80211_local *local = sdata->local; - bool started; + bool started, on_channel; mutex_lock(&local->mtx); @@ -354,13 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (!roc->started) { struct ieee80211_roc_work *dep; - /* start this ROC */ + WARN_ON(local->use_chanctx); + + /* If actually operating on the desired channel (with at least + * 20 MHz channel width) don't stop all the operations but still + * treat it as though the ROC operation started properly, so + * other ROC operations won't interfere with this one. + */ + roc->on_channel = roc->chan == local->_oper_chandef.chan && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; - /* switch channel etc */ + /* start this ROC */ ieee80211_recalc_idle(local); - local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + if (!roc->on_channel) { + ieee80211_offchannel_stop_vifs(local); + + local->tmp_channel = roc->chan; + ieee80211_hw_config(local, 0); + } /* tell userspace or send frame */ ieee80211_handle_roc_started(roc); @@ -379,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; + on_channel = roc->on_channel; ieee80211_roc_notify_destroy(roc, !roc->abort); - if (started) { + if (started && !on_channel) { ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 3401262..efb510e 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -101,10 +101,18 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata) || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (!ieee80211_sdata_running(sdata)) continue; + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + continue; + case NL80211_IFTYPE_STATION: + ieee80211_mgd_quiesce(sdata); + break; + default: + break; + } drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index e126605..8753b77 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -454,7 +454,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, */ if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { u32 basic_rates = vif->bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 62fba17..abe6e29 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1646,11 +1646,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ - goto out; + if (is_multicast_ether_addr(hdr->addr1)) { + rx->local->dot11MulticastReceivedFrameCount++; + goto out_no_led; } + + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1741,12 +1744,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) status->rx_flags |= IEEE80211_RX_FRAGMENTED; out: + ieee80211_led_rx(rx->local); + out_no_led: if (rx->sta) rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); return RX_CONTINUE; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index db41c19..3702572 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -271,6 +271,7 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d6a47e7..c2785b2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -413,6 +413,9 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (ieee80211_is_probe_req(hdr->frame_control)) + return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) info->hw_queue = tx->sdata->vif.cab_queue; @@ -463,6 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; if (unlikely(!sta)) @@ -473,6 +477,15 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) !(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) { int ac = skb_get_queue_mapping(tx->skb); + /* only deauth, disassoc and action are bufferable MMPDUs */ + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_deauth(hdr->frame_control) && + !ieee80211_is_disassoc(hdr->frame_control) && + !ieee80211_is_action(hdr->frame_control)) { + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + return TX_CONTINUE; + } + ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -530,22 +543,8 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) return TX_CONTINUE; - - /* only deauth, disassoc and action are bufferable MMPDUs */ - if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_deauth(hdr->frame_control) && - !ieee80211_is_disassoc(hdr->frame_control) && - !ieee80211_is_action(hdr->frame_control)) { - if (tx->flags & IEEE80211_TX_UNICAST) - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; - return TX_CONTINUE; - } - if (tx->flags & IEEE80211_TX_UNICAST) return ieee80211_tx_h_unicast_ps_buf(tx); else @@ -2806,7 +2805,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP) sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, skb)) break; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f2e30fb..4fb68dc 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1753,6 +1753,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d585626..d9a7f00 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_control_del(cp); if (cp->flags & IP_VS_CONN_F_NFCT) { - ip_vs_conn_drop_conntrack(cp); /* Do not access conntracks during subsys cleanup * because nf_conntrack_find_get can not be used after * conntrack cleanup for the net. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3581736..f7a758f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1392,15 +1392,19 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (ipip) { __be32 info = ic->un.gateway; + __u8 type = ic->type; + __u8 code = ic->code; /* Update the MTU */ if (ic->type == ICMP_DEST_UNREACH && ic->code == ICMP_FRAG_NEEDED) { struct ip_vs_dest *dest = cp->dest; u32 mtu = ntohs(ic->un.frag.mtu); + __be16 frag_off = cih->frag_off; /* Strip outer IP and ICMP, go to IPIP header */ - __skb_pull(skb, ihl + sizeof(_icmph)); + if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL) + goto ignore_ipip; offset2 -= ihl + sizeof(_icmph); skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", @@ -1408,7 +1412,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ipv4_update_pmtu(skb, dev_net(skb->dev), mtu, 0, 0, 0, 0); /* Client uses PMTUD? */ - if (!(cih->frag_off & htons(IP_DF))) + if (!(frag_off & htons(IP_DF))) goto ignore_ipip; /* Prefer the resulting PMTU */ if (dest) { @@ -1427,12 +1431,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* Strip outer IP, ICMP and IPIP, go to IP header of * original request. */ - __skb_pull(skb, offset2); + if (pskb_pull(skb, offset2) == NULL) + goto ignore_ipip; skb_reset_network_header(skb); IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, - ic->type, ic->code, ntohl(info)); - icmp_send(skb, ic->type, ic->code, info); + type, code, ntohl(info)); + icmp_send(skb, type, code, info); /* ICMP can be shorter but anyways, account it */ ip_vs_out_stats(cp, skb); @@ -1901,7 +1906,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { { .hook = ip_vs_local_reply6, .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, + .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bd..f956865 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3765,6 +3765,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); + ip_vs_stop_estimator(net, &ipvs->tot_stats); } #else @@ -3825,7 +3826,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) */ rcu_barrier(); ip_vs_trash_cleanup(net); - ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); remove_proc_entry("ip_vs_stats_percpu", net->proc_net); remove_proc_entry("ip_vs_stats", net->proc_net); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77c1732..4a662f1 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -183,6 +183,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct nf_conn *ct; struct net *net; + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -191,8 +193,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; @@ -321,6 +321,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; struct net *net; + /* no diff required for incoming packets */ + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -329,9 +332,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - /* no diff required for incoming packets */ - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index c8beafd..5a355a4 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -63,6 +63,7 @@ #include <net/ip_vs.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_zones.h> @@ -97,6 +98,11 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return; + /* Applications may adjust TCP seqs */ + if (cp->app && nf_ct_protonum(ct) == IPPROTO_TCP && + !nfct_seqadj(ct) && !nfct_seqadj_ext_add(ct)) + return; + /* * The connection is not yet in the hashtable, so we update it. * CIP->VIP will remain the same, so leave the tuple in diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index c47444e..1692e75 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(skb, &rt->dst, NULL); + ip_select_ident(skb, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, iph->nexthdr = IPPROTO_IPV6; iph->payload_len = old_iph->payload_len; be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); - iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph)); iph->daddr = cp->daddr.in6; iph->saddr = saddr; iph->hop_limit = old_iph->hop_limit; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6f0f4f7..13deb61 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -491,6 +491,39 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) return i->status & IPS_NAT_MASK ? 1 : 0; } +static int nf_nat_proto_clean(struct nf_conn *ct, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + + if (nf_nat_proto_remove(ct, data)) + return 1; + + if (!nat || !nat->ct) + return 0; + + /* This netns is being destroyed, and conntrack has nat null binding. + * Remove it from bysource hash, as the table will be freed soon. + * + * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() + * will delete entry from already-freed table. + */ + if (!del_timer(&ct->timeout)) + return 1; + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + ct->status &= ~IPS_NAT_DONE_MASK; + nat->ct = NULL; + spin_unlock_bh(&nf_nat_lock); + + add_timer(&ct->timeout); + + /* don't delete conntrack. Although that would make things a lot + * simpler, we'd end up flushing all conntracks on nat rmmod. + */ + return 0; +} + static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { @@ -753,7 +786,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { struct nf_nat_proto_clean clean = {}; - nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0); + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); synchronize_rcu(); nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 572d87d..0a03662 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -147,7 +147,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct nfnetlink_subsystem *ss; int type, err; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; /* All the messages must at least contain nfgenmsg */ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d92cc31..09172d7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -45,7 +45,8 @@ #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -255,6 +256,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -345,26 +348,25 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void @@ -651,7 +653,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -680,8 +683,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; @@ -694,8 +696,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6135635..0059013 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -204,7 +204,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, if (nskb) { nskb->dev = dev; nskb->protocol = htons((u16) sk->sk_protocol); - + skb_reset_network_header(nskb); ret = dev_queue_xmit(nskb); if (unlikely(ret > 0)) ret = net_xmit_errno(ret); @@ -502,14 +502,14 @@ out: return err; } -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) { #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 struct page *p_start, *p_end; /* First page is flushed through netlink_{get,set}_status */ p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); while (p_start <= p_end) { flush_dcache_page(p_start); p_start++; @@ -527,9 +527,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) static void netlink_set_status(struct nl_mmap_hdr *hdr, enum nl_mmap_status status) { + smp_mb(); hdr->nm_status = status; flush_dcache_page(pgvec_to_page(hdr)); - smp_wmb(); } static struct nl_mmap_hdr * @@ -628,7 +628,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock, while (nlk->cb_running && netlink_dump_space(nlk)) { err = netlink_dump(sk); if (err < 0) { - sk->sk_err = err; + sk->sk_err = -err; sk->sk_error_report(sk); break; } @@ -691,24 +691,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, struct nl_mmap_hdr *hdr; struct sk_buff *skb; unsigned int maxlen; - bool excl = true; int err = 0, len = 0; - /* Netlink messages are validated by the receiver before processing. - * In order to avoid userspace changing the contents of the message - * after validation, the socket and the ring may only be used by a - * single process, otherwise we fall back to copying. - */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || - atomic_read(&nlk->mapped) > 1) - excl = false; - mutex_lock(&nlk->pg_vec_lock); ring = &nlk->tx_ring; maxlen = ring->frame_size - NL_MMAP_HDRLEN; do { + unsigned int nm_len; + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); if (hdr == NULL) { if (!(msg->msg_flags & MSG_DONTWAIT) && @@ -716,35 +708,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, schedule(); continue; } - if (hdr->nm_len > maxlen) { + + nm_len = ACCESS_ONCE(hdr->nm_len); + if (nm_len > maxlen) { err = -EINVAL; goto out; } - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, nm_len); - if (likely(dst_portid == 0 && dst_group == 0 && excl)) { - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - sock_hold(sk); - netlink_ring_setup_skb(skb, sk, ring, hdr); - NETLINK_CB(skb).flags |= NETLINK_SKB_TX; - __skb_put(skb, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - } else { - skb = alloc_skb(hdr->nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, hdr->nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + skb = alloc_skb(nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; } + __skb_put(skb, nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); netlink_increment_head(ring); @@ -790,7 +770,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) hdr->nm_pid = NETLINK_CB(skb).creds.pid; hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; @@ -1352,7 +1332,74 @@ retry: return err; } -static inline int netlink_capable(const struct socket *sock, unsigned int flag) +/** + * __netlink_ns_capable - General netlink message capability test + * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace. + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool __netlink_ns_capable(const struct netlink_skb_parms *nsp, + struct user_namespace *user_ns, int cap) +{ + return ((nsp->flags & NETLINK_SKB_DST) || + file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) && + ns_capable(user_ns, cap); +} +EXPORT_SYMBOL(__netlink_ns_capable); + +/** + * netlink_ns_capable - General netlink message capability test + * @skb: socket buffer holding a netlink command from userspace + * @user_ns: The user namespace of the capability to use + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in the user namespace @user_ns. + */ +bool netlink_ns_capable(const struct sk_buff *skb, + struct user_namespace *user_ns, int cap) +{ + return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap); +} +EXPORT_SYMBOL(netlink_ns_capable); + +/** + * netlink_capable - Netlink global message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap in all user namespaces. + */ +bool netlink_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, &init_user_ns, cap); +} +EXPORT_SYMBOL(netlink_capable); + +/** + * netlink_net_capable - Netlink network namespace message capability test + * @skb: socket buffer holding a netlink command from userspace + * @cap: The capability to use + * + * Test to see if the opener of the socket we received the message + * from had when the netlink socket was created and the sender of the + * message has has the capability @cap over the network namespace of + * the socket we received the message from. + */ +bool netlink_net_capable(const struct sk_buff *skb, int cap) +{ + return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap); +} +EXPORT_SYMBOL(netlink_net_capable); + +static inline int netlink_allowed(const struct socket *sock, unsigned int flag) { return (nl_table[sock->sk->sk_protocol].flags & flag) || ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN); @@ -1420,7 +1467,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, /* Only superuser is allowed to listen multicasts */ if (nladdr->nl_groups) { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -1482,7 +1529,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + if (nladdr->nl_groups && !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) @@ -2088,7 +2135,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, break; case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: { - if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV)) + if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV)) return -EPERM; err = netlink_realloc_groups(sk); if (err) @@ -2220,6 +2267,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; int err; struct scm_cookie scm; + u32 netlink_skb_flags = 0; if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; @@ -2239,8 +2287,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_group = ffs(addr->nl_groups); err = -EPERM; if ((dst_group || dst_portid) && - !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND)) goto out; + netlink_skb_flags |= NETLINK_SKB_DST; } else { dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; @@ -2270,6 +2319,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; + NETLINK_CB(skb).flags = netlink_skb_flags; err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -2370,7 +2420,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = ret; + sk->sk_err = -ret; sk->sk_error_report(sk); } } diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 0c741ce..c7408dd 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -592,7 +592,7 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EOPNOTSUPP; if ((ops->flags & GENL_ADMIN_PERM) && - !capable(CAP_NET_ADMIN)) + !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 65cfaa8..07c4ae3 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -42,6 +42,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, static int make_writable(struct sk_buff *skb, int write_len) { + if (!pskb_may_pull(skb, write_len)) + return -ENOMEM; + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; @@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; + if (skb_network_offset(skb) < ETH_HLEN) + skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d1705d0..9dc1891 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -566,6 +566,7 @@ static void init_prb_bdqc(struct packet_sock *po, p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po, tx_ring); prb_open_block(p1, pbd); @@ -1815,6 +1816,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if ((int)snaplen < 0) snaplen = 0; } + } else if (unlikely(macoff + snaplen > + GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { + u32 nval; + + nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; + pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", + snaplen, nval, macoff); + snaplen = nval; + if (unlikely((int)snaplen < 0)) { + snaplen = 0; + macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, @@ -3611,6 +3624,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) goto out; + if (po->tp_version >= TPACKET_V3 && + (int)(req->tp_block_size - + BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) goto out; diff --git a/net/packet/diag.c b/net/packet/diag.c index a9584a2..674b0a6 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -127,6 +127,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + bool may_report_filterinfo, struct user_namespace *user_ns, u32 portid, u32 seq, u32 flags, int sk_ino) { @@ -171,7 +172,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, goto out_nlmsg_trim; if ((req->pdiag_show & PACKET_SHOW_FILTER) && - sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) + sock_diag_put_filterinfo(may_report_filterinfo, sk, skb, + PACKET_DIAG_FILTER)) goto out_nlmsg_trim; return nlmsg_end(skb, nlh); @@ -187,9 +189,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) struct packet_diag_req *req; struct net *net; struct sock *sk; + bool may_report_filterinfo; net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); + may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN); mutex_lock(&net->packet.sklist_lock); sk_for_each(sk, &net->packet.sklist) { @@ -199,6 +203,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (sk_diag_fill(sk, skb, req, + may_report_filterinfo, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, diff --git a/net/packet/internal.h b/net/packet/internal.h index 1035fa2..ca086c0 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -29,6 +29,7 @@ struct tpacket_kbdq_core { char *pkblk_start; char *pkblk_end; int kblk_size; + unsigned int max_frame_len; unsigned int knum_blocks; uint64_t knxt_seq_num; char *prev; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index dc15f43..b64151a 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 pnaddr; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); @@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) int err; u8 dst; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; ASSERT_RTNL(); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd70728..15d46b9 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -989,7 +989,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; - if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e118af..2ea40d1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -138,7 +138,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) int err; int tp_created = 0; - if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2adda7f..3f5fe03 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1076,7 +1076,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1143,7 +1143,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q, *p; int err; - if (!capable(CAP_NET_ADMIN)) + if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1483,7 +1483,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index cef5099..737050f 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -375,7 +375,7 @@ void sctp_association_free(struct sctp_association *asoc) /* Only real associations count against the endpoint, so * don't bother for if this is a temporary association. */ - if (!asoc->temp) { + if (!list_empty(&asoc->asocs)) { list_del(&asoc->asocs); /* Decrement the backlog value for a TCP-style listening @@ -1198,6 +1198,7 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->c = new->c; asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; + asoc->peer.auth_capable = new->peer.auth_capable; asoc->peer.i = new->peer.i; sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, asoc->peer.i.initial_tsn, GFP_ATOMIC); @@ -1644,6 +1645,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( * ack chunk whose serial number matches that of the request. */ list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) { + if (sctp_chunk_pending(ack)) + continue; if (ack->subh.addip_hdr->serial == serial) { sctp_chunk_hold(ack); return ack; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 8c4fa5d..4b842e9 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -387,14 +387,13 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { - struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!net->sctp.auth_enable || !asoc->peer.auth_capable) + if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -441,16 +440,16 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { - struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; - /* if the transforms are already allocted, we are done */ - if (!net->sctp.auth_enable) { + /* If AUTH extension is disabled, we are done */ + if (!ep->auth_enable) { ep->auth_hmacs = NULL; return 0; } + /* If the transforms are already allocated, we are done */ if (ep->auth_hmacs) return 0; @@ -671,12 +670,10 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - struct net *net; if (!asoc) return 0; - net = sock_net(asoc->base.sk); - if (!net->sctp.auth_enable || !asoc->peer.auth_capable) + if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -685,12 +682,10 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - struct net *net; if (!asoc) return 0; - net = sock_net(asoc->base.sk); - if (!net->sctp.auth_enable) + if (!asoc->ep->auth_enable) return 0; return __sctp_auth_cid(chunk, @@ -873,8 +868,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 09b8daa..477dd23 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -69,7 +69,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (net->sctp.auth_enable) { + ep->auth_enable = net->sctp.auth_enable; + if (ep->auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5856932..560cd41 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -141,18 +141,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) } else { /* Nothing to do. Next chunk in the packet, please. */ ch = (sctp_chunkhdr_t *) chunk->chunk_end; - /* Force chunk->skb->data to chunk->chunk_end. */ - skb_pull(chunk->skb, - chunk->chunk_end - chunk->skb->data); - - /* Verify that we have at least chunk headers - * worth of buffer left. - */ - if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - } + skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); + /* We are guaranteed to pull a SCTP header. */ } } @@ -188,24 +179,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < + skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { - /* RFC 2960, Section 6.10 Bundling - * - * Partial chunks MUST NOT be placed in an SCTP packet. - * If the receiver detects a partial chunk, it MUST drop - * the chunk. - * - * Since the end of the chunk is past the end of our buffer - * (which contains the whole packet, we can freely discard - * the whole packet. - */ - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - - return NULL; + /* Discard inside state machine. */ + chunk->pdiscard = 1; + chunk->chunk_end = skb_tail_pointer(chunk->skb); } else { /* We are at the end of the packet, so mark the chunk * in case we need to send a SACK. diff --git a/net/sctp/output.c b/net/sctp/output.c index 3191373..69faf79 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -403,12 +403,12 @@ int sctp_packet_transmit(struct sctp_packet *packet) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); if (!nskb) goto nomem; /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); + skb_reserve(nskb, packet->overhead + MAX_HEADER); /* Set the owning socket so that we know where to get the * destination IP address. @@ -606,7 +606,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5e17092..2b216f1 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -492,8 +492,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl4->saddr = laddr->a.v4.sin_addr.s_addr; fl4->fl4_sport = laddr->a.v4.sin_port; + flowi4_update_output(fl4, + asoc->base.sk->sk_bound_dev_if, + RT_CONN_FLAGS(asoc->base.sk), + daddr->v4.sin_addr.s_addr, + laddr->a.v4.sin_addr.s_addr); + rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) { dst = &rt->dst; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 26be077..d800160 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -218,6 +218,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, gfp_t gfp, int vparam_len) { struct net *net = sock_net(asoc->base.sk); + struct sctp_endpoint *ep = asoc->ep; sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -277,7 +278,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (net->sctp.auth_enable) { + if (ep->auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -362,7 +363,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (net->sctp.auth_enable) { + if (ep->auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -2023,7 +2024,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (net->sctp.auth_enable) + if (asoc->ep->auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: @@ -2115,6 +2116,7 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_NO_ERROR - continue with the chunk */ static sctp_ierror_t sctp_verify_param(struct net *net, + const struct sctp_endpoint *ep, const struct sctp_association *asoc, union sctp_params param, sctp_cid_t cid, @@ -2165,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, goto fallthrough; case SCTP_PARAM_RANDOM: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2182,7 +2184,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_CHUNKS: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2198,7 +2200,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_HMAC_ALGO: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2233,10 +2235,9 @@ fallthrough: } /* Verify the INIT packet before we process it. */ -int sctp_verify_init(struct net *net, const struct sctp_association *asoc, - sctp_cid_t cid, - sctp_init_chunk_t *peer_init, - struct sctp_chunk *chunk, +int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, sctp_cid_t cid, + sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk, struct sctp_chunk **errp) { union sctp_params param; @@ -2277,8 +2278,8 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc, /* Verify all the variable length parameters */ sctp_walk_params(param, peer_init, init_hdr.params) { - - result = sctp_verify_param(net, asoc, param, cid, chunk, errp); + result = sctp_verify_param(net, ep, asoc, param, cid, + chunk, errp); switch (result) { case SCTP_IERROR_ABORT: case SCTP_IERROR_NOMEM: @@ -2510,6 +2511,7 @@ static int sctp_process_param(struct sctp_association *asoc, struct sctp_af *af; union sctp_addr_param *addr_param; struct sctp_transport *t; + struct sctp_endpoint *ep = asoc->ep; /* We maintain all INIT parameters in network byte order all the * time. This allows us to not worry about whether the parameters @@ -2620,6 +2622,9 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); @@ -2649,7 +2654,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2662,7 +2667,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2678,7 +2683,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!net->sctp.auth_enable) + if (!ep->auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, @@ -3121,50 +3126,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_NO_ERROR; } -/* Verify the ASCONF packet before we process it. */ -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp) { - sctp_addip_param_t *asconf_param; +/* Verify the ASCONF packet before we process it. */ +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp) +{ + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; union sctp_params param; - int length, plen; + bool addr_param_seen = false; - param.v = (sctp_paramhdr_t *) param_hdr; - while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { - length = ntohs(param.p->length); - *errp = param.p; - - if (param.v > chunk_end - length || - length < sizeof(sctp_paramhdr_t)) - return 0; + sctp_walk_params(param, addip, addip_hdr.params) { + size_t length = ntohs(param.p->length); + *errp = param.p; switch (param.p->type) { + case SCTP_PARAM_ERR_CAUSE: + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (length != sizeof(sctp_ipv4addr_param_t)) + return false; + addr_param_seen = true; + break; + case SCTP_PARAM_IPV6_ADDRESS: + if (length != sizeof(sctp_ipv6addr_param_t)) + return false; + addr_param_seen = true; + break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: case SCTP_PARAM_SET_PRIMARY: - asconf_param = (sctp_addip_param_t *)param.v; - plen = ntohs(asconf_param->param_hdr.length); - if (plen < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) - return 0; + /* In ASCONF chunks, these need to be first. */ + if (addr_param_needed && !addr_param_seen) + return false; + length = ntohs(param.addip->param_hdr.length); + if (length < sizeof(sctp_addip_param_t) + + sizeof(sctp_paramhdr_t)) + return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: if (length != sizeof(sctp_addip_param_t)) - return 0; - + return false; break; default: - break; + /* This is unkown to us, reject! */ + return false; } - - param.v += WORD_ROUND(length); } - if (param.v != chunk_end) - return 0; + /* Remaining sanity checks. */ + if (addr_param_needed && !addr_param_seen) + return false; + if (!addr_param_needed && addr_param_seen) + return false; + if (param.v != chunk->chunk_end) + return false; - return 1; + return true; } /* Process an incoming ASCONF chunk with the next expected serial no. and @@ -3173,16 +3191,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + bool all_param_pass = true; + union sctp_params param; sctp_addiphdr_t *hdr; union sctp_addr_param *addr_param; sctp_addip_param_t *asconf_param; struct sctp_chunk *asconf_ack; - __be16 err_code; int length = 0; int chunk_len; __u32 serial; - int all_param_pass = 1; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); hdr = (sctp_addiphdr_t *)asconf->skb->data; @@ -3210,9 +3229,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, goto done; /* Process the TLVs contained within the ASCONF chunk. */ - while (chunk_len > 0) { + sctp_walk_params(param, addip, addip_hdr.params) { + /* Skip preceeding address parameters. */ + if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || + param.p->type == SCTP_PARAM_IPV6_ADDRESS) + continue; + err_code = sctp_process_asconf_param(asoc, asconf, - asconf_param); + param.addip); /* ADDIP 4.1 A7) * If an error response is received for a TLV parameter, * all TLVs with no response before the failed TLV are @@ -3220,28 +3244,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * the failed response are considered unsuccessful unless * a specific success indication is present for the parameter. */ - if (SCTP_ERROR_NO_ERROR != err_code) - all_param_pass = 0; - + if (err_code != SCTP_ERROR_NO_ERROR) + all_param_pass = false; if (!all_param_pass) - sctp_add_asconf_response(asconf_ack, - asconf_param->crr_id, err_code, - asconf_param); + sctp_add_asconf_response(asconf_ack, param.addip->crr_id, + err_code, param.addip); /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add * an IP address sends an 'Out of Resource' in its response, it * MUST also fail any subsequent add or delete requests bundled * in the ASCONF. */ - if (SCTP_ERROR_RSRC_LOW == err_code) + if (err_code == SCTP_ERROR_RSRC_LOW) goto done; - - /* Move to the next ASCONF param. */ - length = ntohs(asconf_param->param_hdr.length); - asconf_param = (void *)asconf_param + length; - chunk_len -= length; } - done: asoc->peer.addip_serial++; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0a5f050..bf12098 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -171,6 +171,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); + /* Previously already marked? */ + if (unlikely(chunk->pdiscard)) + return 0; if (unlikely(chunk_length < required_length)) return 0; @@ -358,7 +361,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. @@ -525,7 +528,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { @@ -1431,7 +1434,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( /* Verify the INIT chunk before processing it. */ err_chunk = NULL; - if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type, + if (!sctp_verify_init(net, ep, asoc, chunk->chunk_hdr->type, (sctp_init_chunk_t *)chunk->chunk_hdr, chunk, &err_chunk)) { /* This chunk contains fatal error. It is to be discarded. @@ -1776,9 +1779,22 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, - SCTP_STATE(SCTP_STATE_ESTABLISHED)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + if (sctp_state(asoc, SHUTDOWN_PENDING) && + (sctp_sstate(asoc->base.sk, CLOSING) || + sock_flag(asoc->base.sk, SOCK_DEAD))) { + /* if were currently in SHUTDOWN_PENDING, but the socket + * has been closed by user, don't transition to ESTABLISHED. + * Instead trigger SHUTDOWN bundled with COOKIE_ACK. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, + SCTP_ST_CHUNK(0), NULL, + commands); + } else { + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + } return SCTP_DISPOSITION_CONSUME; nomem_ev: @@ -3579,9 +3595,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, struct sctp_chunk *asconf_ack = NULL; struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *hdr; - union sctp_addr_param *addr_param; __u32 serial; - int length; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3606,17 +3620,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, hdr = (sctp_addiphdr_t *)chunk->skb->data; serial = ntohl(hdr->serial); - addr_param = (union sctp_addr_param *)hdr->params; - length = ntohs(addr_param->p.length); - if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, - (void *)addr_param, commands); - /* Verify the ASCONF chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)((void *)addr_param + length), - (void *)chunk->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, chunk, true, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); @@ -3734,10 +3739,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)addip_hdr->params, - (void *)asconf_ack->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 14c8015..e00a041 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3296,10 +3296,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunk val; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3316,7 +3316,7 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, } /* add this chunk id to the endpoint */ - return sctp_auth_ep_add_chunkid(sctp_sk(sk)->ep, val.sauth_chunk); + return sctp_auth_ep_add_chunkid(ep, val.sauth_chunk); } /* @@ -3329,12 +3329,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3351,7 +3351,7 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, goto out; } - err = sctp_auth_ep_set_hmacs(sctp_sk(sk)->ep, hmacs); + err = sctp_auth_ep_set_hmacs(ep, hmacs); out: kfree(hmacs); return err; @@ -3367,12 +3367,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3393,7 +3393,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } - ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey); + ret = sctp_auth_set_key(ep, asoc, authkey); out: kzfree(authkey); return ret; @@ -3409,11 +3409,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3425,8 +3425,7 @@ static int sctp_setsockopt_active_key(struct sock *sk, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_set_active_key(sctp_sk(sk)->ep, asoc, - val.scact_keynumber); + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); } /* @@ -3438,11 +3437,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3454,8 +3453,7 @@ static int sctp_setsockopt_del_key(struct sock *sk, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_del_key_id(sctp_sk(sk)->ep, asoc, - val.scact_keynumber); + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); } @@ -5353,16 +5351,16 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; - hmacs = sctp_sk(sk)->ep->auth_hmacs_list; + hmacs = ep->auth_hmacs_list; data_len = ntohs(hmacs->param_hdr.length) - sizeof(sctp_paramhdr_t); if (len < sizeof(struct sctp_hmacalgo) + data_len) @@ -5383,11 +5381,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkeyid val; struct sctp_association *asoc; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5402,7 +5400,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, if (asoc) val.scact_keynumber = asoc->active_key_id; else - val.scact_keynumber = sctp_sk(sk)->ep->active_key_id; + val.scact_keynumber = ep->active_key_id; len = sizeof(struct sctp_authkeyid); if (put_user(len, optlen)) @@ -5416,7 +5414,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5424,7 +5422,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5460,7 +5458,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct net *net = sock_net(sk); + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5468,7 +5466,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!net->sctp.auth_enable) + if (!ep->auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5485,7 +5483,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, if (asoc) ch = (struct sctp_chunks_param*)asoc->c.auth_chunks; else - ch = sctp_sk(sk)->ep->auth_chunk_list; + ch = ep->auth_chunk_list; if (!ch) goto num; @@ -6564,6 +6562,46 @@ static void __sctp_write_space(struct sctp_association *asoc) } } +static void sctp_wake_up_waiters(struct sock *sk, + struct sctp_association *asoc) +{ + struct sctp_association *tmp = asoc; + + /* We do accounting for the sndbuf space per association, + * so we only need to wake our own association. + */ + if (asoc->ep->sndbuf_policy) + return __sctp_write_space(asoc); + + /* If association goes down and is just flushing its + * outq, then just normally notify others. + */ + if (asoc->base.dead) + return sctp_write_space(sk); + + /* Accounting for the sndbuf space is per socket, so we + * need to wake up others, try to be fair and in case of + * other associations, let them have a go first instead + * of just doing a sctp_write_space() call. + * + * Note that we reach sctp_wake_up_waiters() only when + * associations free up queued chunks, thus we are under + * lock and the list of associations on a socket is + * guaranteed not to change. + */ + for (tmp = list_next_entry(tmp, asocs); 1; + tmp = list_next_entry(tmp, asocs)) { + /* Manually skip the head element. */ + if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs)) + continue; + /* Wake up association. */ + __sctp_write_space(tmp); + /* We've reached the end. */ + if (tmp == asoc) + break; + } +} + /* Do accounting for the sndbuf space. * Decrement the used sndbuf space of the corresponding association by the * data size which was just transmitted(freed). @@ -6591,7 +6629,7 @@ static void sctp_wfree(struct sk_buff *skb) sk_mem_uncharge(sk, skb->truesize); sock_wfree(skb); - __sctp_write_space(asoc); + sctp_wake_up_waiters(sk, asoc); sctp_association_put(asoc); } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b36561..968355f 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -59,8 +59,11 @@ extern int sysctl_sctp_wmem[3]; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, - loff_t *ppos); +static int proc_sctp_do_auth(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -261,7 +264,7 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_sctp_do_auth, }, { .procname = "addr_scope_policy", @@ -300,41 +303,40 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; - char tmp[8]; struct ctl_table tbl; - int ret; - int changed = 0; + bool changed = false; char *none = "none"; + char tmp[8]; + int ret; memset(&tbl, 0, sizeof(struct ctl_table)); if (write) { tbl.data = tmp; - tbl.maxlen = 8; + tbl.maxlen = sizeof(tmp); } else { tbl.data = net->sctp.sctp_hmac_alg ? : none; tbl.maxlen = strlen(tbl.data); } - ret = proc_dostring(&tbl, write, buffer, lenp, ppos); - if (write) { + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { #ifdef CONFIG_CRYPTO_MD5 if (!strncmp(tmp, "md5", 3)) { net->sctp.sctp_hmac_alg = "md5"; - changed = 1; + changed = true; } #endif #ifdef CONFIG_CRYPTO_SHA1 if (!strncmp(tmp, "sha1", 4)) { net->sctp.sctp_hmac_alg = "sha1"; - changed = 1; + changed = true; } #endif if (!strncmp(tmp, "none", 4)) { net->sctp.sctp_hmac_alg = NULL; - changed = 1; + changed = true; } - if (!changed) ret = -EINVAL; } @@ -342,6 +344,36 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, return ret; } +static int proc_sctp_do_auth(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + struct ctl_table tbl; + int new_value, ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.auth_enable; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + struct sock *sk = net->sctp.ctl_sock; + + net->sctp.auth_enable = new_value; + /* Update the value in the control socket */ + lock_sock(sk); + sctp_sk(sk)->ep->auth_enable = new_value; + release_sock(sk); + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 81089ed..12c37ce 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -367,9 +367,10 @@ fail: * specification [SCTP] and any extensions for a list of possible * error formats. */ -struct sctp_ulpevent *sctp_ulpevent_make_remote_error( - const struct sctp_association *asoc, struct sctp_chunk *chunk, - __u16 flags, gfp_t gfp) +struct sctp_ulpevent * +sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, + struct sctp_chunk *chunk, __u16 flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sctp_remote_error *sre; @@ -388,8 +389,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( /* Copy the skb to a new skb with room for us to prepend * notification with. */ - skb = skb_copy_expand(chunk->skb, sizeof(struct sctp_remote_error), - 0, gfp); + skb = skb_copy_expand(chunk->skb, sizeof(*sre), 0, gfp); /* Pull off the rest of the cause TLV from the chunk. */ skb_pull(chunk->skb, elen); @@ -400,62 +400,21 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error( event = sctp_skb2event(skb); sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize); - sre = (struct sctp_remote_error *) - skb_push(skb, sizeof(struct sctp_remote_error)); + sre = (struct sctp_remote_error *) skb_push(skb, sizeof(*sre)); /* Trim the buffer to the right length. */ - skb_trim(skb, sizeof(struct sctp_remote_error) + elen); + skb_trim(skb, sizeof(*sre) + elen); - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_type: - * It should be SCTP_REMOTE_ERROR. - */ + /* RFC6458, Section 6.1.3. SCTP_REMOTE_ERROR */ + memset(sre, 0, sizeof(*sre)); sre->sre_type = SCTP_REMOTE_ERROR; - - /* - * Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_flags: 16 bits (unsigned integer) - * Currently unused. - */ sre->sre_flags = 0; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_length: sizeof (__u32) - * - * This field is the total length of the notification data, - * including the notification header. - */ sre->sre_length = skb->len; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_error: 16 bits (unsigned integer) - * This value represents one of the Operational Error causes defined in - * the SCTP specification, in network byte order. - */ sre->sre_error = cause; - - /* Socket Extensions for SCTP - * 5.3.1.3 SCTP_REMOTE_ERROR - * - * sre_assoc_id: sizeof (sctp_assoc_t) - * - * The association id field, holds the identifier for the association. - * All notifications for a given association have the same association - * identifier. For TCP style socket, this field is ignored. - */ sctp_ulpevent_set_owner(event, asoc); sre->sre_assoc_id = sctp_assoc2id(asoc); return event; - fail: return NULL; } @@ -900,7 +859,9 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event) return notification->sn_header.sn_type; } -/* Copy out the sndrcvinfo into a msghdr. */ +/* RFC6458, Section 5.3.2. SCTP Header Information Structure + * (SCTP_SNDRCV, DEPRECATED) + */ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *msghdr) { @@ -909,74 +870,21 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, if (sctp_ulpevent_is_notification(event)) return; - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_stream: 16 bits (unsigned integer) - * - * For recvmsg() the SCTP stack places the message's stream number in - * this value. - */ + memset(&sinfo, 0, sizeof(sinfo)); sinfo.sinfo_stream = event->stream; - /* sinfo_ssn: 16 bits (unsigned integer) - * - * For recvmsg() this value contains the stream sequence number that - * the remote endpoint placed in the DATA chunk. For fragmented - * messages this is the same number for all deliveries of the message - * (if more than one recvmsg() is needed to read the message). - */ sinfo.sinfo_ssn = event->ssn; - /* sinfo_ppid: 32 bits (unsigned integer) - * - * In recvmsg() this value is - * the same information that was passed by the upper layer in the peer - * application. Please note that byte order issues are NOT accounted - * for and this information is passed opaquely by the SCTP stack from - * one end to the other. - */ sinfo.sinfo_ppid = event->ppid; - /* sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - * - * recvmsg() flags: - * - * SCTP_UNORDERED - This flag is present when the message was sent - * non-ordered. - */ sinfo.sinfo_flags = event->flags; - /* sinfo_tsn: 32 bit (unsigned integer) - * - * For the receiving side, this field holds a TSN that was - * assigned to one of the SCTP Data Chunks. - */ sinfo.sinfo_tsn = event->tsn; - /* sinfo_cumtsn: 32 bit (unsigned integer) - * - * This field will hold the current cumulative TSN as - * known by the underlying SCTP layer. Note this field is - * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to SCTP_UNORDERED. - */ sinfo.sinfo_cumtsn = event->cumtsn; - /* sinfo_assoc_id: sizeof (sctp_assoc_t) - * - * The association handle field, sinfo_assoc_id, holds the identifier - * for the association announced in the COMMUNICATION_UP notification. - * All notifications for a given association have the same identifier. - * Ignored for one-to-one style sockets. - */ sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); - - /* context value that is set via SCTP_CONTEXT socket option. */ + /* Context value that is set via SCTP_CONTEXT socket option. */ sinfo.sinfo_context = event->asoc->default_rcv_context; - /* These fields are not used while receiving. */ sinfo.sinfo_timetolive = 0; put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, - sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); + sizeof(sinfo), &sinfo); } /* Do accounting for bytes received and hold a reference to the association diff --git a/net/socket.c b/net/socket.c index dc57dae..c8ca896 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3023,19 +3023,16 @@ static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), IFNAMSIZ)) return -EFAULT; - if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ)) return -EFAULT; - if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) return -EFAULT; return dev_ioctl(net, cmd, u_ifreq64); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 80a6640..b9aad47 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -730,6 +730,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) svc_add_new_temp_xprt(serv, newxpt); + else + module_put(xprt->xpt_class->xcl_owner); } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c9caaa..8c6e9c7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -683,6 +683,7 @@ static struct svc_xprt_class svc_udp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_udp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, + .xcl_ident = XPRT_TRANSPORT_UDP, }; static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) @@ -1277,6 +1278,7 @@ static struct svc_xprt_class svc_tcp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_tcp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_TCP, }; void svc_init_xprt_sock(void) @@ -1395,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } +bool svc_alien_sock(struct net *net, int fd) +{ + int err; + struct socket *sock = sockfd_lookup(fd, &err); + bool ret = false; + + if (!sock) + goto out; + if (sock_net(sock->sk) != net) + ret = true; + sockfd_put(sock); +out: + return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); + /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 095363e..42ce6bf 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1290,7 +1290,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) } } spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + dprintk("RPC: transport (%d) not supported\n", args->ident); return ERR_PTR(-EIO); found: diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 62e4f9b..ed36cb5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -89,6 +89,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_RDMA, }; struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 716de1a..6ef8925 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -531,6 +531,7 @@ receive: buf = node->bclink.deferred_head; node->bclink.deferred_head = buf->next; + buf->next = NULL; node->bclink.deferred_size--; goto receive; } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ced60e2..1e76d91 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -76,10 +76,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, unsigned int total_len, int max_size, struct sk_buff **buf) { - int dsz, sz, hsz, pos, res, cnt; + int dsz, sz, hsz; + unsigned char *to; dsz = total_len; - pos = hsz = msg_hdr_sz(hdr); + hsz = msg_hdr_sz(hdr); sz = hsz + dsz; msg_set_size(hdr, sz); if (unlikely(sz > max_size)) { @@ -91,16 +92,11 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, if (!(*buf)) return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); - for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - pos += msg_sect[cnt].iov_len; + to = (*buf)->data + hsz; + if (total_len && memcpy_fromiovecend(to, msg_sect, 0, dsz)) { + kfree_skb(*buf); + *buf = NULL; + return -EFAULT; } - if (likely(res)) - return dsz; - - kfree_skb(*buf); - *buf = NULL; - return -EFAULT; + return dsz; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 299e45af..ec2ecbd 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -962,6 +962,7 @@ static void tipc_purge_publications(struct name_seq *seq) list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + kfree(publ); } } @@ -986,7 +987,6 @@ void tipc_nametbl_stop(void) hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { tipc_purge_publications(seq); } - continue; } kfree(table.types); table.types = NULL; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index e49c726..d64486e 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; - if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) + if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN))) cmd = TIPC_CMD_NOT_NET_ADMIN; else cmd = req_userhdr->cmd; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5adfd94..85d232b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = { .fops = &vsock_device_ops, }; -static int __vsock_core_init(void) +int __vsock_core_init(const struct vsock_transport *t, struct module *owner) { - int err; + int err = mutex_lock_interruptible(&vsock_register_mutex); + + if (err) + return err; + + if (transport) { + err = -EBUSY; + goto err_busy; + } + + /* Transport must be the owner of the protocol so that it can't + * unload while there are open sockets. + */ + vsock_proto.owner = owner; + transport = t; vsock_init_tables(); @@ -1951,36 +1965,19 @@ static int __vsock_core_init(void) goto err_unregister_proto; } + mutex_unlock(&vsock_register_mutex); return 0; err_unregister_proto: proto_unregister(&vsock_proto); err_misc_deregister: misc_deregister(&vsock_device); - return err; -} - -int vsock_core_init(const struct vsock_transport *t) -{ - int retval = mutex_lock_interruptible(&vsock_register_mutex); - if (retval) - return retval; - - if (transport) { - retval = -EBUSY; - goto out; - } - - transport = t; - retval = __vsock_core_init(); - if (retval) - transport = NULL; - -out: + transport = NULL; +err_busy: mutex_unlock(&vsock_register_mutex); - return retval; + return err; } -EXPORT_SYMBOL_GPL(vsock_core_init); +EXPORT_SYMBOL_GPL(__vsock_core_init); void vsock_core_exit(void) { @@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); -MODULE_VERSION("1.0.0.0-k"); +MODULE_VERSION("1.0.1.0-k"); MODULE_LICENSE("GPL v2"); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c2853bb..c3ef31a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6797,6 +6797,9 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; + /* clear CB data for netlink core to own from now on */ + memset(skb->cb, 0, sizeof(skb->cb)); + nla_nest_end(skb, data); genlmsg_end(skb, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index de06d5d..8eedb15 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1432,7 +1432,7 @@ static enum reg_request_treatment __regulatory_hint(struct wiphy *wiphy, struct regulatory_request *pending_request) { - const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *regd, *tmp; bool intersect = false; enum reg_request_treatment treatment; struct regulatory_request *lr; @@ -1448,7 +1448,9 @@ __regulatory_hint(struct wiphy *wiphy, kfree(pending_request); return PTR_ERR(regd); } + tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); } intersect = true; break; @@ -1468,7 +1470,9 @@ __regulatory_hint(struct wiphy *wiphy, return REG_REQ_IGNORE; } treatment = REG_REQ_ALREADY_SET; + tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); goto new_request; } kfree(pending_request); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 20e86a9..2f844ee 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -242,7 +242,6 @@ void cfg80211_conn_work(struct work_struct *work) NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - cfg80211_sme_free(wdev); } wdev_unlock(wdev); } @@ -646,6 +645,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); } + cfg80211_sme_free(wdev); return; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index ba5f0d6..064b471 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2029,7 +2029,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure, MAC_ASSIGN(addr, addr); __entry->key_type = key_type; __entry->key_id = key_id; - memcpy(__entry->tsc, tsc, 6); + if (tsc) + memcpy(__entry->tsc, tsc, 6); ), TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm", NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6ff7c54..823e48c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,6 +46,11 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); static struct dst_entry *xfrm_policy_sk_bundles; static DEFINE_RWLOCK(xfrm_policy_lock); +struct xfrm_flo { + struct dst_entry *dst_orig; + u8 flags; +}; + static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; @@ -1913,13 +1918,14 @@ static int xdst_queue_output(struct sk_buff *skb) } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, - struct dst_entry *dst, + struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; + struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; @@ -1927,10 +1933,13 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || + if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || + net->xfrm.sysctl_larval_drop || + num_xfrms <= 0 || (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) return xdst; + dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; @@ -1972,7 +1981,7 @@ static struct flow_cache_object * xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { - struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; @@ -2013,7 +2022,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, goto make_dummy_bundle; } - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xflo->dst_orig); if (IS_ERR(new_xdst)) { err = PTR_ERR(new_xdst); if (err != -EAGAIN) @@ -2047,7 +2057,7 @@ make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); @@ -2147,13 +2157,18 @@ restart: } if (xdst == NULL) { + struct xfrm_flo xflo; + + xflo.dst_orig = dst_orig; + xflo.flags = flags; + /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, dst_orig); + xfrm_bundle_lookup, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2181,7 +2196,7 @@ restart: xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return make_blackhole(net, family, dst_orig); + return ERR_PTR(-EREMOTE); } if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); @@ -2253,6 +2268,23 @@ dropdst: } EXPORT_SYMBOL(xfrm_lookup); +/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). + * Otherwise we may send out blackholed packets. + */ +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) +{ + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, + flags | XFRM_LOOKUP_QUEUE); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); + + return dst; +} +EXPORT_SYMBOL(xfrm_lookup_route); + static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { @@ -2518,7 +2550,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) skb_dst_force(skb); - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f3b13d3..716ee00 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -930,6 +930,20 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, return skb; } +/* A wrapper for nlmsg_multicast() checking that nlsk is still available. + * Must be called with RCU read lock. + */ +static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, + u32 pid, unsigned int group) +{ + struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); + + if (nlsk) + return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); + else + return -1; +} + static inline size_t xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) @@ -2253,7 +2267,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, @@ -2363,7 +2377,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || @@ -2440,7 +2454,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) return -EMSGSIZE; } - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) @@ -2455,7 +2469,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } static int xfrm_notify_sa_flush(const struct km_event *c) @@ -2481,7 +2495,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2568,7 +2582,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); out_free_skb: kfree_skb(skb); @@ -2659,7 +2673,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2773,7 +2787,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) @@ -2835,7 +2849,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2863,7 +2877,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); @@ -2932,7 +2946,7 @@ static int xfrm_send_report(struct net *net, u8 proto, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } static inline size_t xfrm_mapping_msgsize(void) @@ -2984,7 +2998,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (build_mapping(skb, x, ipaddr, sport) < 0) BUG(); - return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); + return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } static bool xfrm_is_alive(const struct km_event *c) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 4305b2f..8c0e07b 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1750,7 +1750,7 @@ sub dump_struct($$) { # strip kmemcheck_bitfield_{begin,end}.*; $members =~ s/kmemcheck_bitfield_.*?;//gos; # strip attributes - $members =~ s/__aligned\s*\(.+\)//gos; + $members =~ s/__aligned\s*\([^;]*\)//gos; create_parameterlist($members, ';', $file); check_sections($file, $declaration_name, "struct", $sectcheck, $struct_actual, $nested); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 25e5cb0..ce16404 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -42,7 +42,7 @@ typedef unsigned char __u8; /* This array collects all instances that use the generic do_table */ struct devtable { - const char *device_id; /* name of table, __mod_<name>_device_table. */ + const char *device_id; /* name of table, __mod_<name>__*_device_table. */ unsigned long id_size; void *function; }; @@ -146,7 +146,8 @@ static void device_id_check(const char *modname, const char *device_id, if (size % id_size || size < id_size) { fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo " - "of the size of section __mod_%s_device_table=%lu.\n" + "of the size of " + "section __mod_%s__<identifier>_device_table=%lu.\n" "Fix definition of struct %s_device_id " "in mod_devicetable.h\n", modname, device_id, id_size, device_id, size, device_id); @@ -1206,7 +1207,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, { void *symval; char *zeros = NULL; - const char *name; + const char *name, *identifier; unsigned int namelen; /* We're looking for a section relative symbol */ @@ -1217,7 +1218,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, if (ELF_ST_TYPE(sym->st_info) != STT_OBJECT) return; - /* All our symbols are of form <prefix>__mod_XXX_device_table. */ + /* All our symbols are of form <prefix>__mod_<name>__<identifier>_device_table. */ name = strstr(symname, "__mod_"); if (!name) return; @@ -1227,7 +1228,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, return; if (strcmp(name + namelen - strlen("_device_table"), "_device_table")) return; - namelen -= strlen("_device_table"); + identifier = strstr(name, "__"); + if (!identifier) + return; + namelen = identifier - name; /* Handle all-NULL symbols allocated into .bss */ if (info->sechdrs[get_secindex(info, sym)].sh_type & SHT_NOBITS) { diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 8247979..78c2169 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -573,12 +573,16 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) if (strncmp(symname, "_restgpr_", sizeof("_restgpr_") - 1) == 0 || strncmp(symname, "_savegpr_", sizeof("_savegpr_") - 1) == 0 || strncmp(symname, "_rest32gpr_", sizeof("_rest32gpr_") - 1) == 0 || - strncmp(symname, "_save32gpr_", sizeof("_save32gpr_") - 1) == 0) + strncmp(symname, "_save32gpr_", sizeof("_save32gpr_") - 1) == 0 || + strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 || + strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0) return 1; if (info->hdr->e_machine == EM_PPC64) /* Special register function linked on all modules during final link of .ko */ if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 || - strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0) + strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 || + strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 || + strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0) return 1; /* Do not ignore this symbol */ return 0; diff --git a/scripts/package/builddeb b/scripts/package/builddeb index c1bb9be..6d02fd5d 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -155,11 +155,11 @@ if grep -q '^CONFIG_MODULES=y' $KCONFIG_CONFIG ; then for module in $(find lib/modules/ -name *.ko); do mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module) # only keep debug symbols in the debug file - objcopy --only-keep-debug $module $dbg_dir/usr/lib/debug/$module + $OBJCOPY --only-keep-debug $module $dbg_dir/usr/lib/debug/$module # strip original module from debug symbols - objcopy --strip-debug $module + $OBJCOPY --strip-debug $module # then add a link to those - objcopy --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module + $OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module done ) fi diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 9d1421e..49b582a 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -163,11 +163,11 @@ static int mcount_adjust = 0; static int MIPS_is_fake_mcount(Elf_Rel const *rp) { - static Elf_Addr old_r_offset; + static Elf_Addr old_r_offset = ~(Elf_Addr)0; Elf_Addr current_r_offset = _w(rp->r_offset); int is_fake; - is_fake = old_r_offset && + is_fake = (old_r_offset != ~(Elf_Addr)0) && (current_r_offset - old_r_offset == MIPS_FAKEMCOUNT_OFFSET); old_r_offset = current_r_offset; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index a674fd5..a27134f 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -262,7 +262,6 @@ if ($arch eq "x86_64") { # force flags for this arch $ld .= " -m shlelf_linux"; $objcopy .= " -O elf32-sh-linux"; - $cc .= " -m32"; } elsif ($arch eq "powerpc") { $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; diff --git a/security/commoncap.c b/security/commoncap.c index b9d613e..963dc59 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -421,6 +421,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); } + cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + return 0; } diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 7c2a0a7..8595f0b 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -309,57 +309,139 @@ static int devcgroup_seq_read(struct cgroup_subsys_state *css, } /** - * may_access - verifies if a new exception is part of what is allowed - * by a dev cgroup based on the default policy + - * exceptions. This is used to make sure a child cgroup - * won't have more privileges than its parent or to - * verify if a certain access is allowed. - * @dev_cgroup: dev cgroup to be tested against - * @refex: new exception - * @behavior: behavior of the exception + * match_exception - iterates the exception list trying to match a rule + * based on type, major, minor and access type. It is + * considered a match if an exception is found that + * will contain the entire range of provided parameters. + * @exceptions: list of exceptions + * @type: device type (DEV_BLOCK or DEV_CHAR) + * @major: device file major number, ~0 to match all + * @minor: device file minor number, ~0 to match all + * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * + * returns: true in case it matches an exception completely */ -static bool may_access(struct dev_cgroup *dev_cgroup, - struct dev_exception_item *refex, - enum devcg_behavior behavior) +static bool match_exception(struct list_head *exceptions, short type, + u32 major, u32 minor, short access) { struct dev_exception_item *ex; - bool match = false; - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&devcgroup_mutex), - "device_cgroup::may_access() called without proper synchronization"); + list_for_each_entry_rcu(ex, exceptions, list) { + if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + continue; + if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + continue; + if (ex->major != ~0 && ex->major != major) + continue; + if (ex->minor != ~0 && ex->minor != minor) + continue; + /* provided access cannot have more than the exception rule */ + if (access & (~ex->access)) + continue; + return true; + } + return false; +} + +/** + * match_exception_partial - iterates the exception list trying to match a rule + * based on type, major, minor and access type. It is + * considered a match if an exception's range is + * found to contain *any* of the devices specified by + * provided parameters. This is used to make sure no + * extra access is being granted that is forbidden by + * any of the exception list. + * @exceptions: list of exceptions + * @type: device type (DEV_BLOCK or DEV_CHAR) + * @major: device file major number, ~0 to match all + * @minor: device file minor number, ~0 to match all + * @access: permission mask (ACC_READ, ACC_WRITE, ACC_MKNOD) + * + * returns: true in case the provided range mat matches an exception completely + */ +static bool match_exception_partial(struct list_head *exceptions, short type, + u32 major, u32 minor, short access) +{ + struct dev_exception_item *ex; - list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) { - if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) + list_for_each_entry_rcu(ex, exceptions, list) { + if ((type & DEV_BLOCK) && !(ex->type & DEV_BLOCK)) continue; - if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR)) + if ((type & DEV_CHAR) && !(ex->type & DEV_CHAR)) continue; - if (ex->major != ~0 && ex->major != refex->major) + /* + * We must be sure that both the exception and the provided + * range aren't masking all devices + */ + if (ex->major != ~0 && major != ~0 && ex->major != major) continue; - if (ex->minor != ~0 && ex->minor != refex->minor) + if (ex->minor != ~0 && minor != ~0 && ex->minor != minor) continue; - if (refex->access & (~ex->access)) + /* + * In order to make sure the provided range isn't matching + * an exception, all its access bits shouldn't match the + * exception's access bits + */ + if (!(access & ex->access)) continue; - match = true; - break; + return true; } + return false; +} + +/** + * verify_new_ex - verifies if a new exception is part of what is allowed + * by a dev cgroup based on the default policy + + * exceptions. This is used to make sure a child cgroup + * won't have more privileges than its parent + * @dev_cgroup: dev cgroup to be tested against + * @refex: new exception + * @behavior: behavior of the exception's dev_cgroup + */ +static bool verify_new_ex(struct dev_cgroup *dev_cgroup, + struct dev_exception_item *refex, + enum devcg_behavior behavior) +{ + bool match = false; + + rcu_lockdep_assert(rcu_read_lock_held() || + lockdep_is_held(&devcgroup_mutex), + "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { if (behavior == DEVCG_DEFAULT_ALLOW) { - /* the exception will deny access to certain devices */ + /* + * new exception in the child doesn't matter, only + * adding extra restrictions + */ return true; } else { - /* the exception will allow access to certain devices */ + /* + * new exception in the child will add more devices + * that can be acessed, so it can't match any of + * parent's exceptions, even slightly + */ + match = match_exception_partial(&dev_cgroup->exceptions, + refex->type, + refex->major, + refex->minor, + refex->access); + if (match) - /* - * a new exception allowing access shouldn't - * match an parent's exception - */ return false; return true; } } else { - /* only behavior == DEVCG_DEFAULT_DENY allowed here */ + /* + * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore + * the new exception will add access to more devices and must + * be contained completely in an parent's exception to be + * allowed + */ + match = match_exception(&dev_cgroup->exceptions, refex->type, + refex->major, refex->minor, + refex->access); + if (match) /* parent has an exception that matches the proposed */ return true; @@ -381,7 +463,38 @@ static int parent_has_perm(struct dev_cgroup *childcg, if (!parent) return 1; - return may_access(parent, ex, childcg->behavior); + return verify_new_ex(parent, ex, childcg->behavior); +} + +/** + * parent_allows_removal - verify if it's ok to remove an exception + * @childcg: child cgroup from where the exception will be removed + * @ex: exception being removed + * + * When removing an exception in cgroups with default ALLOW policy, it must + * be checked if removing it will give the child cgroup more access than the + * parent. + * + * Return: true if it's ok to remove exception, false otherwise + */ +static bool parent_allows_removal(struct dev_cgroup *childcg, + struct dev_exception_item *ex) +{ + struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css)); + + if (!parent) + return true; + + /* It's always allowed to remove access to devices */ + if (childcg->behavior == DEVCG_DEFAULT_DENY) + return true; + + /* + * Make sure you're not removing part or a whole exception existing in + * the parent cgroup + */ + return !match_exception_partial(&parent->exceptions, ex->type, + ex->major, ex->minor, ex->access); } /** @@ -619,17 +732,21 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, switch (filetype) { case DEVCG_ALLOW: - if (!parent_has_perm(devcgroup, &ex)) - return -EPERM; /* * If the default policy is to allow by default, try to remove * an matching exception instead. And be silent about it: we * don't want to break compatibility */ if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) { + /* Check if the parent allows removing it first */ + if (!parent_allows_removal(devcgroup, &ex)) + return -EPERM; dev_exception_rm(devcgroup, &ex); - return 0; + break; } + + if (!parent_has_perm(devcgroup, &ex)) + return -EPERM; rc = dev_exception_add(devcgroup, &ex); break; case DEVCG_DENY: @@ -709,18 +826,18 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, short access) { struct dev_cgroup *dev_cgroup; - struct dev_exception_item ex; - int rc; - - memset(&ex, 0, sizeof(ex)); - ex.type = type; - ex.major = major; - ex.minor = minor; - ex.access = access; + bool rc; rcu_read_lock(); dev_cgroup = task_devcgroup(current); - rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior); + if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) + /* Can't match any of the exceptions, even partially */ + rc = !match_exception_partial(&dev_cgroup->exceptions, + type, major, minor, access); + else + /* Need to match completely one exception to be allowed */ + rc = match_exception(&dev_cgroup->exceptions, type, major, + minor, access); rcu_read_unlock(); if (!rc) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index af9b685..d43b62c 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -285,12 +285,23 @@ out: * @xattr_value: pointer to the new extended attribute value * @xattr_value_len: pointer to the new extended attribute value length * - * Updating 'security.evm' requires CAP_SYS_ADMIN privileges and that - * the current value is valid. + * Before allowing the 'security.evm' protected xattr to be updated, + * verify the existing value is valid. As only the kernel should have + * access to the EVM encrypted key needed to calculate the HMAC, prevent + * userspace from writing HMAC value. Writing 'security.evm' requires + * requires CAP_SYS_ADMIN privileges. */ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) { + const struct evm_ima_xattr_data *xattr_data = xattr_value; + + if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { + if (!xattr_value_len) + return -EINVAL; + if (xattr_data->type != EVM_IMA_XATTR_DIGSIG) + return -EPERM; + } return evm_protect_xattr(dentry, xattr_name, xattr_value, xattr_value_len); } diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 1c03e8f1..4e1529e 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -140,6 +140,7 @@ int ima_must_measure(struct inode *inode, int mask, int function) int ima_collect_measurement(struct integrity_iint_cache *iint, struct file *file) { + const char *audit_cause = "failed"; struct inode *inode = file_inode(file); const char *filename = file->f_dentry->d_name.name; int result = 0; @@ -147,6 +148,11 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, if (!(iint->flags & IMA_COLLECTED)) { u64 i_version = file_inode(file)->i_version; + if (file->f_flags & O_DIRECT) { + audit_cause = "failed(directio)"; + result = -EACCES; + goto out; + } iint->ima_xattr.type = IMA_XATTR_DIGEST; result = ima_calc_file_hash(file, iint->ima_xattr.digest); if (!result) { @@ -154,9 +160,10 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, iint->flags |= IMA_COLLECTED; } } +out: if (result) integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, - filename, "collect_data", "failed", + filename, "collect_data", audit_cause, result, 0); return result; } diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index a02e079..9da974c 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -24,6 +24,36 @@ static struct crypto_shash *ima_shash_tfm; +/** + * ima_kernel_read - read file content + * + * This is a function for reading file content instead of kernel_read(). + * It does not perform locking checks to ensure it cannot be blocked. + * It does not perform security checks because it is irrelevant for IMA. + * + */ +static int ima_kernel_read(struct file *file, loff_t offset, + char *addr, unsigned long count) +{ + mm_segment_t old_fs; + char __user *buf = addr; + ssize_t ret; + + if (!(file->f_mode & FMODE_READ)) + return -EBADF; + if (!file->f_op->read && !file->f_op->aio_read) + return -EINVAL; + + old_fs = get_fs(); + set_fs(get_ds()); + if (file->f_op->read) + ret = file->f_op->read(file, buf, count, &offset); + else + ret = do_sync_read(file, buf, count, &offset); + set_fs(old_fs); + return ret; +} + int ima_init_crypto(void) { long rc; @@ -70,7 +100,7 @@ int ima_calc_file_hash(struct file *file, char *digest) while (offset < i_size) { int rbuf_len; - rbuf_len = kernel_read(file, offset, rbuf, PAGE_SIZE); + rbuf_len = ima_kernel_read(file, offset, rbuf, PAGE_SIZE); if (rbuf_len < 0) { rc = rbuf_len; break; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index e9508d5..03fb126 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -186,8 +186,11 @@ static int process_measurement(struct file *file, const char *filename, } rc = ima_collect_measurement(iint, file); - if (rc != 0) + if (rc != 0) { + if (file->f_flags & O_DIRECT) + rc = (iint->flags & IMA_PERMIT_DIRECTIO) ? 0 : -EACCES; goto out_digsig; + } pathname = !filename ? ima_d_path(&file->f_path, &pathbuf) : filename; if (!pathname) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index a9c3d3c..085c496 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -351,7 +351,7 @@ enum { Opt_obj_user, Opt_obj_role, Opt_obj_type, Opt_subj_user, Opt_subj_role, Opt_subj_type, Opt_func, Opt_mask, Opt_fsmagic, Opt_uid, Opt_fowner, - Opt_appraise_type, Opt_fsuuid + Opt_appraise_type, Opt_fsuuid, Opt_permit_directio }; static match_table_t policy_tokens = { @@ -373,6 +373,7 @@ static match_table_t policy_tokens = { {Opt_uid, "uid=%s"}, {Opt_fowner, "fowner=%s"}, {Opt_appraise_type, "appraise_type=%s"}, + {Opt_permit_directio, "permit_directio"}, {Opt_err, NULL} }; @@ -621,6 +622,9 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) else result = -EINVAL; break; + case Opt_permit_directio: + entry->flags |= IMA_PERMIT_DIRECTIO; + break; case Opt_err: ima_log_string(ab, "UNKNOWN", p); result = -EINVAL; diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h index c42fb7a..ecbb6f2 100644 --- a/security/integrity/integrity.h +++ b/security/integrity/integrity.h @@ -30,6 +30,7 @@ #define IMA_ACTION_FLAGS 0xff000000 #define IMA_DIGSIG 0x01000000 #define IMA_DIGSIG_REQUIRED 0x02000000 +#define IMA_PERMIT_DIRECTIO 0x04000000 #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_APPRAISE_SUBMASK) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 9e1e005..c4c8df4 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -1018,10 +1018,13 @@ static int __init init_encrypted(void) ret = encrypted_shash_alloc(); if (ret < 0) return ret; + ret = aes_get_sizes(); + if (ret < 0) + goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; - return aes_get_sizes(); + return 0; out: encrypted_shash_release(); return ret; diff --git a/security/keys/gc.c b/security/keys/gc.c index d67c97b..7978186 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -201,12 +201,12 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - key_user_put(key->user); - /* now throw away the key memory */ if (key->type->destroy) key->type->destroy(key); + key_user_put(key->user); + kfree(key->description); #ifdef KEY_DEBUGGING diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 630b8ad..3ba608a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -439,6 +439,7 @@ next_inode: list_entry(sbsec->isec_head.next, struct inode_security_struct, list); struct inode *inode = isec->inode; + list_del_init(&isec->list); spin_unlock(&sbsec->isec_lock); inode = igrab(inode); if (inode) { @@ -447,7 +448,6 @@ next_inode: iput(inode); } spin_lock(&sbsec->isec_lock); - list_del_init(&isec->list); goto next_inode; } spin_unlock(&sbsec->isec_lock); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ff42773..86f9694 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1190,7 +1190,7 @@ static void sel_remove_entries(struct dentry *de) spin_lock(&de->d_lock); node = de->d_subdirs.next; while (node != &de->d_subdirs) { - struct dentry *d = list_entry(node, struct dentry, d_u.d_child); + struct dentry *d = list_entry(node, struct dentry, d_child); spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); list_del_init(node); @@ -1664,12 +1664,12 @@ static void sel_remove_classes(void) list_for_each(class_node, &class_dir->d_subdirs) { struct dentry *class_subdir = list_entry(class_node, - struct dentry, d_u.d_child); + struct dentry, d_child); struct list_head *class_subdir_node; list_for_each(class_subdir_node, &class_subdir->d_subdirs) { struct dentry *d = list_entry(class_subdir_node, - struct dentry, d_u.d_child); + struct dentry, d_child); if (d->d_inode) if (d->d_inode->i_mode & S_IFDIR) diff --git a/sound/core/control.c b/sound/core/control.c index d8aa206..98a29b2 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -289,6 +289,10 @@ static bool snd_ctl_remove_numid_conflict(struct snd_card *card, { struct snd_kcontrol *kctl; + /* Make sure that the ids assigned to the control do not wrap around */ + if (card->last_numid >= UINT_MAX - count) + card->last_numid = 0; + list_for_each_entry(kctl, &card->controls, list) { if (kctl->id.numid < card->last_numid + 1 + count && kctl->id.numid + kctl->count > card->last_numid + 1) { @@ -331,6 +335,7 @@ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) { struct snd_ctl_elem_id id; unsigned int idx; + unsigned int count; int err = -EINVAL; if (! kcontrol) @@ -338,6 +343,9 @@ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) if (snd_BUG_ON(!card || !kcontrol->info)) goto error; id = kcontrol->id; + if (id.index > UINT_MAX - kcontrol->count) + goto error; + down_write(&card->controls_rwsem); if (snd_ctl_find_id(card, &id)) { up_write(&card->controls_rwsem); @@ -359,8 +367,9 @@ int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol) card->controls_count += kcontrol->count; kcontrol->id.numid = card->last_numid + 1; card->last_numid += kcontrol->count; + count = kcontrol->count; up_write(&card->controls_rwsem); - for (idx = 0; idx < kcontrol->count; idx++, id.index++, id.numid++) + for (idx = 0; idx < count; idx++, id.index++, id.numid++) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); return 0; @@ -389,6 +398,7 @@ int snd_ctl_replace(struct snd_card *card, struct snd_kcontrol *kcontrol, bool add_on_replace) { struct snd_ctl_elem_id id; + unsigned int count; unsigned int idx; struct snd_kcontrol *old; int ret; @@ -424,8 +434,9 @@ add: card->controls_count += kcontrol->count; kcontrol->id.numid = card->last_numid + 1; card->last_numid += kcontrol->count; + count = kcontrol->count; up_write(&card->controls_rwsem); - for (idx = 0; idx < kcontrol->count; idx++, id.index++, id.numid++) + for (idx = 0; idx < count; idx++, id.index++, id.numid++) snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id); return 0; @@ -898,9 +909,9 @@ static int snd_ctl_elem_write(struct snd_card *card, struct snd_ctl_file *file, result = kctl->put(kctl, control); } if (result > 0) { + struct snd_ctl_elem_id id = control->id; up_read(&card->controls_rwsem); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, - &control->id); + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, &id); return 0; } } @@ -992,6 +1003,7 @@ static int snd_ctl_elem_unlock(struct snd_ctl_file *file, struct user_element { struct snd_ctl_elem_info info; + struct snd_card *card; void *elem_data; /* element data */ unsigned long elem_data_size; /* size of element data in bytes */ void *tlv_data; /* TLV data */ @@ -1035,7 +1047,9 @@ static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol, { struct user_element *ue = kcontrol->private_data; + mutex_lock(&ue->card->user_ctl_lock); memcpy(&ucontrol->value, ue->elem_data, ue->elem_data_size); + mutex_unlock(&ue->card->user_ctl_lock); return 0; } @@ -1044,10 +1058,12 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol, { int change; struct user_element *ue = kcontrol->private_data; - + + mutex_lock(&ue->card->user_ctl_lock); change = memcmp(&ucontrol->value, ue->elem_data, ue->elem_data_size) != 0; if (change) memcpy(ue->elem_data, &ucontrol->value, ue->elem_data_size); + mutex_unlock(&ue->card->user_ctl_lock); return change; } @@ -1067,19 +1083,32 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, new_data = memdup_user(tlv, size); if (IS_ERR(new_data)) return PTR_ERR(new_data); + mutex_lock(&ue->card->user_ctl_lock); change = ue->tlv_data_size != size; if (!change) change = memcmp(ue->tlv_data, new_data, size); kfree(ue->tlv_data); ue->tlv_data = new_data; ue->tlv_data_size = size; + mutex_unlock(&ue->card->user_ctl_lock); } else { - if (! ue->tlv_data_size || ! ue->tlv_data) - return -ENXIO; - if (size < ue->tlv_data_size) - return -ENOSPC; + int ret = 0; + + mutex_lock(&ue->card->user_ctl_lock); + if (!ue->tlv_data_size || !ue->tlv_data) { + ret = -ENXIO; + goto err_unlock; + } + if (size < ue->tlv_data_size) { + ret = -ENOSPC; + goto err_unlock; + } if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size)) - return -EFAULT; + ret = -EFAULT; +err_unlock: + mutex_unlock(&ue->card->user_ctl_lock); + if (ret) + return ret; } return change; } @@ -1137,8 +1166,6 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, struct user_element *ue; int idx, err; - if (!replace && card->user_ctl_count >= MAX_USER_CONTROLS) - return -ENOMEM; if (info->count < 1) return -EINVAL; access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE : @@ -1147,21 +1174,16 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE)); info->id.numid = 0; memset(&kctl, 0, sizeof(kctl)); - down_write(&card->controls_rwsem); - _kctl = snd_ctl_find_id(card, &info->id); - err = 0; - if (_kctl) { - if (replace) - err = snd_ctl_remove(card, _kctl); - else - err = -EBUSY; - } else { - if (replace) - err = -ENOENT; + + if (replace) { + err = snd_ctl_remove_user_ctl(file, &info->id); + if (err) + return err; } - up_write(&card->controls_rwsem); - if (err < 0) - return err; + + if (card->user_ctl_count >= MAX_USER_CONTROLS) + return -ENOMEM; + memcpy(&kctl.id, &info->id, sizeof(info->id)); kctl.count = info->owner ? info->owner : 1; access |= SNDRV_CTL_ELEM_ACCESS_USER; @@ -1211,6 +1233,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, ue = kzalloc(sizeof(struct user_element) + private_size, GFP_KERNEL); if (ue == NULL) return -ENOMEM; + ue->card = card; ue->info = *info; ue->info.access = 0; ue->elem_data = (char *)ue + sizeof(*ue); @@ -1322,8 +1345,9 @@ static int snd_ctl_tlv_ioctl(struct snd_ctl_file *file, } err = kctl->tlv.c(kctl, op_flag, tlv.length, _tlv->tlv); if (err > 0) { + struct snd_ctl_elem_id id = kctl->id; up_read(&card->controls_rwsem); - snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_TLV, &kctl->id); + snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_TLV, &id); return 0; } } else { diff --git a/sound/core/info.c b/sound/core/info.c index e79baa1..08070e1 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -679,7 +679,7 @@ int snd_info_card_free(struct snd_card *card) * snd_info_get_line - read one line from the procfs buffer * @buffer: the procfs buffer * @line: the buffer to store - * @len: the max. buffer size - 1 + * @len: the max. buffer size * * Reads one line from the buffer and stores the string. * @@ -699,7 +699,7 @@ int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len) buffer->stop = 1; if (c == '\n') break; - if (len) { + if (len > 1) { len--; *line++ = c; } diff --git a/sound/core/init.c b/sound/core/init.c index d047851..b9268a5 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -215,6 +215,7 @@ int snd_card_create(int idx, const char *xid, INIT_LIST_HEAD(&card->devices); init_rwsem(&card->controls_rwsem); rwlock_init(&card->ctl_files_rwlock); + mutex_init(&card->user_ctl_lock); INIT_LIST_HEAD(&card->controls); INIT_LIST_HEAD(&card->ctl_files); spin_lock_init(&card->files_lock); diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index af49721..c4ac3c1 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -206,6 +206,8 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream, if (err < 0) return err; + if (clear_user(src, sizeof(*src))) + return -EFAULT; if (put_user(status.state, &src->state) || compat_put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) || compat_put_timespec(&status.tstamp, &src->tstamp) || diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index f1fe6e6..6dd3089 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1783,14 +1783,16 @@ static int snd_pcm_lib_ioctl_fifo_size(struct snd_pcm_substream *substream, { struct snd_pcm_hw_params *params = arg; snd_pcm_format_t format; - int channels, width; + int channels; + ssize_t frame_size; params->fifo_size = substream->runtime->hw.fifo_size; if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_FIFO_IN_FRAMES)) { format = params_format(params); channels = params_channels(params); - width = snd_pcm_format_physical_width(format); - params->fifo_size /= width * channels; + frame_size = snd_pcm_format_size(format, channels); + if (frame_size > 0) + params->fifo_size /= (unsigned)frame_size; } return 0; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a68d4c6..c882d07 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -3187,7 +3187,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = { #ifndef ARCH_HAS_DMA_MMAP_COHERENT /* This should be defined / handled globally! */ -#ifdef CONFIG_ARM +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) #define ARCH_HAS_DMA_MMAP_COHERENT #endif #endif diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index cc9fd67..be6277c 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -858,8 +858,8 @@ config SND_VIRTUOSO select SND_JACK if INPUT=y || INPUT=SND help Say Y here to include support for sound cards based on the - Asus AV66/AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, DS, - Essence ST (Deluxe), and Essence STX. + Asus AV66/AV100/AV200 chips, i.e., Xonar D1, DX, D2, D2X, DS, DSX, + Essence ST (Deluxe), and Essence STX (II). Support for the HDAV1.3 (Deluxe) and HDAV1.3 Slim is experimental; for the Xense, missing. diff --git a/sound/pci/emu10k1/emu10k1_callback.c b/sound/pci/emu10k1/emu10k1_callback.c index cae3659..0a34b5f 100644 --- a/sound/pci/emu10k1/emu10k1_callback.c +++ b/sound/pci/emu10k1/emu10k1_callback.c @@ -85,6 +85,8 @@ snd_emu10k1_ops_setup(struct snd_emux *emux) * get more voice for pcm * * terminate most inactive voice and give it as a pcm voice. + * + * voice_lock is already held. */ int snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) @@ -92,12 +94,10 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) struct snd_emux *emu; struct snd_emux_voice *vp; struct best_voice best[V_END]; - unsigned long flags; int i; emu = hw->synth; - spin_lock_irqsave(&emu->voice_lock, flags); lookup_voices(emu, hw, best, 1); /* no OFF voices */ for (i = 0; i < V_END; i++) { if (best[i].voice >= 0) { @@ -113,11 +113,9 @@ snd_emu10k1_synth_get_voice(struct snd_emu10k1 *hw) vp->emu->num_voices--; vp->ch = -1; vp->state = SNDRV_EMUX_ST_OFF; - spin_unlock_irqrestore(&emu->voice_lock, flags); return ch; } } - spin_unlock_irqrestore(&emu->voice_lock, flags); /* not found */ return -ENOMEM; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e938a68..fed93cb 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -329,8 +329,10 @@ int snd_hda_get_sub_nodes(struct hda_codec *codec, hda_nid_t nid, unsigned int parm; parm = snd_hda_param_read(codec, nid, AC_PAR_NODE_COUNT); - if (parm == -1) + if (parm == -1) { + *start_id = 0; return 0; + } *start_id = (parm >> 16) & 0x7fff; return (int)(parm & 0x7fff); } diff --git a/sound/pci/hda/hda_i915.c b/sound/pci/hda/hda_i915.c index 76c13d5..9e136be 100644 --- a/sound/pci/hda/hda_i915.c +++ b/sound/pci/hda/hda_i915.c @@ -22,20 +22,28 @@ #include <drm/i915_powerwell.h> #include "hda_i915.h" -static void (*get_power)(void); -static void (*put_power)(void); +static int (*get_power)(void); +static int (*put_power)(void); +static int (*get_cdclk)(void); -void hda_display_power(bool enable) +int hda_display_power(bool enable) { if (!get_power || !put_power) - return; + return -ENODEV; snd_printdd("HDA display power %s \n", enable ? "Enable" : "Disable"); if (enable) - get_power(); + return get_power(); else - put_power(); + return put_power(); +} + +int haswell_get_cdclk(void) +{ + if (!get_cdclk) + return -EINVAL; + return get_cdclk(); } int hda_i915_init(void) @@ -55,6 +63,10 @@ int hda_i915_init(void) return -ENODEV; } + get_cdclk = symbol_request(i915_get_cdclk_freq); + if (!get_cdclk) /* may have abnormal BCLK and audio playback rate */ + snd_printd("hda-i915: get_cdclk symbol get fail\n"); + snd_printd("HDA driver get symbol successfully from i915 module\n"); return err; @@ -70,6 +82,10 @@ int hda_i915_exit(void) symbol_put(i915_release_power_well); put_power = NULL; } + if (get_cdclk) { + symbol_put(i915_get_cdclk_freq); + get_cdclk = NULL; + } return 0; } diff --git a/sound/pci/hda/hda_i915.h b/sound/pci/hda/hda_i915.h index 5a63da2..26869fa 100644 --- a/sound/pci/hda/hda_i915.h +++ b/sound/pci/hda/hda_i915.h @@ -17,11 +17,13 @@ #define __SOUND_HDA_I915_H #ifdef CONFIG_SND_HDA_I915 -void hda_display_power(bool enable); +int hda_display_power(bool enable); +int haswell_get_cdclk(void); int hda_i915_init(void); int hda_i915_exit(void); #else -static inline void hda_display_power(bool enable) {} +static inline int hda_display_power(bool enable) { return 0; } +static inline int haswell_get_cdclk(void) { return -EINVAL; } static inline int hda_i915_init(void) { return -ENODEV; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b5c4c2e..86e63b6 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -748,6 +748,54 @@ static inline void mark_runtime_wc(struct azx *chip, struct azx_dev *azx_dev, } #endif +#ifdef CONFIG_SND_HDA_I915 +/* Intel HSW/BDW display HDA controller Extended Mode registers. + * EM4 (M value) and EM5 (N Value) are used to convert CDClk (Core Display + * Clock) to 24MHz BCLK: BCLK = CDCLK * M / N + * The values will be lost when the display power well is disabled. + */ +#define ICH6_REG_EM4 0x100c +#define ICH6_REG_EM5 0x1010 + +static void haswell_set_bclk(struct azx *chip) +{ + int cdclk_freq; + unsigned int bclk_m, bclk_n; + + cdclk_freq = haswell_get_cdclk(); + if (cdclk_freq < 0) + return; + + switch (cdclk_freq) { + case 337500: + bclk_m = 16; + bclk_n = 225; + break; + + case 450000: + default: /* default CDCLK 450MHz */ + bclk_m = 4; + bclk_n = 75; + break; + + case 540000: + bclk_m = 4; + bclk_n = 90; + break; + + case 675000: + bclk_m = 8; + bclk_n = 225; + break; + } + + azx_writew(chip, EM4, bclk_m); + azx_writew(chip, EM5, bclk_n); +} +#else +static inline void haswell_set_bclk(struct azx *chip) {} +#endif + static int azx_acquire_irq(struct azx *chip, int do_disconnect); static int azx_send_cmd(struct hda_bus *bus, unsigned int val); /* @@ -2917,7 +2965,7 @@ static int azx_suspend(struct device *dev) struct azx *chip = card->private_data; struct azx_pcm *p; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); @@ -2948,11 +2996,13 @@ static int azx_resume(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; - if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) + if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) { hda_display_power(true); + haswell_set_bclk(chip); + } pci_set_power_state(pci, PCI_D0); pci_restore_state(pci); if (pci_enable_device(pci) < 0) { @@ -2983,7 +3033,7 @@ static int azx_runtime_suspend(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME)) @@ -3009,14 +3059,16 @@ static int azx_runtime_resume(struct device *dev) struct hda_codec *codec; int status; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME)) return 0; - if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) + if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) { hda_display_power(true); + haswell_set_bclk(chip); + } /* Read STATESTS before controller reset */ status = azx_readw(chip, STATESTS); @@ -3044,7 +3096,7 @@ static int azx_runtime_idle(struct device *dev) struct snd_card *card = dev_get_drvdata(dev); struct azx *chip = card->private_data; - if (chip->disabled) + if (chip->disabled || chip->init_failed) return 0; if (!power_save_controller || @@ -3744,6 +3796,10 @@ static int azx_first_init(struct azx *chip) /* initialize chip */ azx_init_pci(chip); + + if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) + haswell_set_bclk(chip); + azx_init_chip(chip, (probe_only[dev] & 2) == 0); /* codec detection */ @@ -3902,8 +3958,12 @@ static int azx_probe_continue(struct azx *chip) snd_printk(KERN_ERR SFX "Error request power-well from i915\n"); goto out_free; } + err = hda_display_power(true); + if (err < 0) { + snd_printk(KERN_ERR SFX "Cannot turn on display power on i915\n"); + goto out_free; + } #endif - hda_display_power(true); } err = azx_first_init(chip); @@ -3986,6 +4046,9 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { /* Lynx Point */ { PCI_DEVICE(0x8086, 0x8c20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + /* 9 Series */ + { PCI_DEVICE(0x8086, 0x8ca0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Wellsburg */ { PCI_DEVICE(0x8086, 0x8d20), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, @@ -4022,6 +4085,9 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { /* BayTrail */ { PCI_DEVICE(0x8086, 0x0f04), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH_NOPM }, + /* Braswell */ + { PCI_DEVICE(0x8086, 0x2284), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* ICH */ { PCI_DEVICE(0x8086, 0x2668), .driver_data = AZX_DRIVER_ICH | AZX_DCAPS_OLD_SSYNC | diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 41ebdd8..10dc0c8 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -316,6 +316,8 @@ static const struct hda_fixup ad1986a_fixups[] = { static const struct snd_pci_quirk ad1986a_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x30af, "HP B2800", AD1986A_FIXUP_LAPTOP_IMIC), + SND_PCI_QUIRK(0x1043, 0x1443, "ASUS Z99He", AD1986A_FIXUP_EAPD), + SND_PCI_QUIRK(0x1043, 0x1447, "ASUS A8JN", AD1986A_FIXUP_EAPD), SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8100, "ASUS P5", AD1986A_FIXUP_3STACK), SND_PCI_QUIRK_MASK(0x1043, 0xff00, 0x8200, "ASUS M2", AD1986A_FIXUP_3STACK), SND_PCI_QUIRK(0x10de, 0xcb84, "ASUS A8N-VM", AD1986A_FIXUP_3STACK), diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index a91ad74..8458b6e 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -4379,6 +4379,9 @@ static void ca0132_download_dsp(struct hda_codec *codec) return; /* NOP */ #endif + if (spec->dsp_state == DSP_DOWNLOAD_FAILED) + return; /* don't retry failures */ + chipio_enable_clocks(codec); spec->dsp_state = DSP_DOWNLOADING; if (!ca0132_download_dsp_images(codec)) @@ -4555,7 +4558,8 @@ static int ca0132_init(struct hda_codec *codec) struct auto_pin_cfg *cfg = &spec->autocfg; int i; - spec->dsp_state = DSP_DOWNLOAD_INIT; + if (spec->dsp_state != DSP_DOWNLOAD_FAILED) + spec->dsp_state = DSP_DOWNLOAD_INIT; spec->curr_chip_addx = INVALID_CHIP_ADDRESS; snd_hda_power_up(codec); @@ -4666,6 +4670,7 @@ static int patch_ca0132(struct hda_codec *codec) codec->spec = spec; spec->codec = codec; + spec->dsp_state = DSP_DOWNLOAD_INIT; spec->num_mixers = 1; spec->mixers[0] = ca0132_mixer; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index fde381d..131d7d4 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3232,6 +3232,7 @@ enum { CXT_FIXUP_HEADPHONE_MIC_PIN, CXT_FIXUP_HEADPHONE_MIC, CXT_FIXUP_GPIO1, + CXT_FIXUP_ASPIRE_DMIC, }; static void cxt_fixup_stereo_dmic(struct hda_codec *codec, @@ -3386,6 +3387,12 @@ static const struct hda_fixup cxt_fixups[] = { { } }, }, + [CXT_FIXUP_ASPIRE_DMIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_stereo_dmic, + .chained = true, + .chain_id = CXT_FIXUP_GPIO1, + }, }; static const struct snd_pci_quirk cxt5051_fixups[] = { @@ -3395,7 +3402,7 @@ static const struct snd_pci_quirk cxt5051_fixups[] = { static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC), - SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_GPIO1), + SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 23e0bc6..14c5778 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -743,12 +743,12 @@ static struct channel_map_table map_tables[] = { { SNDRV_CHMAP_RC, RC }, { SNDRV_CHMAP_FLC, FLC }, { SNDRV_CHMAP_FRC, FRC }, - { SNDRV_CHMAP_FLH, FLH }, - { SNDRV_CHMAP_FRH, FRH }, + { SNDRV_CHMAP_TFL, FLH }, + { SNDRV_CHMAP_TFR, FRH }, { SNDRV_CHMAP_FLW, FLW }, { SNDRV_CHMAP_FRW, FRW }, { SNDRV_CHMAP_TC, TC }, - { SNDRV_CHMAP_FCH, FCH }, + { SNDRV_CHMAP_TFC, FCH }, {} /* terminator */ }; @@ -1023,8 +1023,10 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec, AMP_OUT_UNMUTE); eld = &per_pin->sink_eld; - if (!eld->monitor_present) + if (!eld->monitor_present) { + hdmi_set_channel_count(codec, per_pin->cvt_nid, channels); return; + } if (!non_pcm && per_pin->chmap_set) ca = hdmi_manual_channel_allocation(channels, per_pin->chmap); @@ -1476,19 +1478,22 @@ static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) } } - if (pin_eld->eld_valid && !eld->eld_valid) { - update_eld = true; + if (pin_eld->eld_valid != eld->eld_valid) eld_changed = true; - } + + if (pin_eld->eld_valid && !eld->eld_valid) + update_eld = true; + if (update_eld) { bool old_eld_valid = pin_eld->eld_valid; pin_eld->eld_valid = eld->eld_valid; - eld_changed = pin_eld->eld_size != eld->eld_size || + if (pin_eld->eld_size != eld->eld_size || memcmp(pin_eld->eld_buffer, eld->eld_buffer, - eld->eld_size) != 0; - if (eld_changed) + eld->eld_size) != 0) { memcpy(pin_eld->eld_buffer, eld->eld_buffer, eld->eld_size); + eld_changed = true; + } pin_eld->eld_size = eld->eld_size; pin_eld->info = eld->info; @@ -2852,6 +2857,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x80862808, .name = "Broadwell HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862880, .name = "CedarTrail HDMI", .patch = patch_generic_hdmi }, { .id = 0x80862882, .name = "Valleyview2 HDMI", .patch = patch_generic_hdmi }, +{ .id = 0x80862883, .name = "Braswell HDMI", .patch = patch_generic_hdmi }, { .id = 0x808629fb, .name = "Crestline HDMI", .patch = patch_generic_hdmi }, {} /* terminator */ }; @@ -2908,6 +2914,7 @@ MODULE_ALIAS("snd-hda-codec-id:80862807"); MODULE_ALIAS("snd-hda-codec-id:80862808"); MODULE_ALIAS("snd-hda-codec-id:80862880"); MODULE_ALIAS("snd-hda-codec-id:80862882"); +MODULE_ALIAS("snd-hda-codec-id:80862883"); MODULE_ALIAS("snd-hda-codec-id:808629fb"); MODULE_LICENSE("GPL"); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6a32c85..0919345 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -179,6 +179,8 @@ static void alc_fix_pll(struct hda_codec *codec) spec->pll_coef_idx); val = snd_hda_codec_read(codec, spec->pll_nid, 0, AC_VERB_GET_PROC_COEF, 0); + if (val == -1) + return; snd_hda_codec_write(codec, spec->pll_nid, 0, AC_VERB_SET_COEF_INDEX, spec->pll_coef_idx); snd_hda_codec_write(codec, spec->pll_nid, 0, AC_VERB_SET_PROC_COEF, @@ -321,6 +323,7 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type) case 0x10ec0885: case 0x10ec0887: /*case 0x10ec0889:*/ /* this causes an SPDIF problem */ + case 0x10ec0900: alc889_coef_init(codec); break; case 0x10ec0888: @@ -944,6 +947,7 @@ static int alc_codec_rename_from_preset(struct hda_codec *codec) static const struct snd_pci_quirk beep_white_list[] = { SND_PCI_QUIRK(0x1043, 0x103c, "ASUS", 1), + SND_PCI_QUIRK(0x1043, 0x115d, "ASUS", 1), SND_PCI_QUIRK(0x1043, 0x829f, "ASUS", 1), SND_PCI_QUIRK(0x1043, 0x8376, "EeePC", 1), SND_PCI_QUIRK(0x1043, 0x83ce, "EeePC", 1), @@ -1596,12 +1600,10 @@ static const struct hda_fixup alc260_fixups[] = { [ALC260_FIXUP_COEF] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { - { 0x20, AC_VERB_SET_COEF_INDEX, 0x07 }, - { 0x20, AC_VERB_SET_PROC_COEF, 0x3040 }, + { 0x1a, AC_VERB_SET_COEF_INDEX, 0x07 }, + { 0x1a, AC_VERB_SET_PROC_COEF, 0x3040 }, { } }, - .chained = true, - .chain_id = ALC260_FIXUP_HP_PIN_0F, }, [ALC260_FIXUP_GPIO1] = { .type = HDA_FIXUP_VERBS, @@ -1616,8 +1618,8 @@ static const struct hda_fixup alc260_fixups[] = { [ALC260_FIXUP_REPLACER] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { - { 0x20, AC_VERB_SET_COEF_INDEX, 0x07 }, - { 0x20, AC_VERB_SET_PROC_COEF, 0x3050 }, + { 0x1a, AC_VERB_SET_COEF_INDEX, 0x07 }, + { 0x1a, AC_VERB_SET_PROC_COEF, 0x3050 }, { } }, .chained = true, @@ -2260,6 +2262,7 @@ static int patch_alc882(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0882: case 0x10ec0885: + case 0x10ec0900: break; default: /* ALC883 and variants */ @@ -2689,6 +2692,8 @@ static int alc269_parse_auto_config(struct hda_codec *codec) static void alc269vb_toggle_power_output(struct hda_codec *codec, int power_up) { int val = alc_read_coef_idx(codec, 0x04); + if (val == -1) + return; if (power_up) val |= 1 << 11; else @@ -2759,6 +2764,9 @@ static void alc283_shutup(struct hda_codec *codec) alc_write_coef_idx(codec, 0x43, 0x9004); + /*depop hp during suspend*/ + alc_write_coef_idx(codec, 0x06, 0x2100); + snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); @@ -2903,6 +2911,15 @@ static int alc269_resume(struct hda_codec *codec) snd_hda_codec_resume_cache(codec); alc_inv_dmic_sync(codec, true); hda_call_check_power_status(codec, 0x01); + + /* on some machine, the BIOS will clear the codec gpio data when enter + * suspend, and won't restore the data after resume, so we restore it + * in the driver. + */ + if (spec->gpio_led) + snd_hda_codec_write(codec, codec->afg, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_led); + if (spec->has_alc5505_dsp) alc5505_dsp_resume(codec); @@ -3049,8 +3066,9 @@ static void alc269_fixup_mic_mute_hook(void *private_data, int enabled) if (spec->mute_led_polarity) enabled = !enabled; - pinval = AC_PINCTL_IN_EN | - (enabled ? AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_80); + pinval = snd_hda_codec_get_pin_target(codec, spec->mute_led_nid); + pinval &= ~AC_PINCTL_VREFEN; + pinval |= enabled ? AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_80; if (spec->mute_led_nid) snd_hda_set_pin_ctl_cache(codec, spec->mute_led_nid, pinval); } @@ -3480,6 +3498,15 @@ static void alc_fixup_no_shutup(struct hda_codec *codec, } } +static void alc_fixup_auto_mute_via_amp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + struct alc_spec *spec = codec->spec; + spec->gen.auto_mute_via_amp = 1; + } +} + static void alc_fixup_headset_mode_alc668(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3680,6 +3707,7 @@ enum { ALC269_FIXUP_HEADSET_MIC, ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, + ALC269_FIXUP_LIFEBOOK_EXTMIC, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -3797,6 +3825,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_QUANTA_MUTE }, + [ALC269_FIXUP_LIFEBOOK_EXTMIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1903c }, /* headset mic, with jack detect */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4096,6 +4131,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), @@ -4111,6 +4147,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), @@ -4207,27 +4244,30 @@ static void alc269_fill_coef(struct hda_codec *codec) if ((alc_get_coef0(codec) & 0x00ff) == 0x017) { val = alc_read_coef_idx(codec, 0x04); /* Power up output pin */ - alc_write_coef_idx(codec, 0x04, val | (1<<11)); + if (val != -1) + alc_write_coef_idx(codec, 0x04, val | (1<<11)); } if ((alc_get_coef0(codec) & 0x00ff) == 0x018) { val = alc_read_coef_idx(codec, 0xd); - if ((val & 0x0c00) >> 10 != 0x1) { + if (val != -1 && (val & 0x0c00) >> 10 != 0x1) { /* Capless ramp up clock control */ alc_write_coef_idx(codec, 0xd, val | (1<<10)); } val = alc_read_coef_idx(codec, 0x17); - if ((val & 0x01c0) >> 6 != 0x4) { + if (val != -1 && (val & 0x01c0) >> 6 != 0x4) { /* Class D power on reset */ alc_write_coef_idx(codec, 0x17, val | (1<<7)); } } val = alc_read_coef_idx(codec, 0xd); /* Class D */ - alc_write_coef_idx(codec, 0xd, val | (1<<14)); + if (val != -1) + alc_write_coef_idx(codec, 0xd, val | (1<<14)); val = alc_read_coef_idx(codec, 0x4); /* HP */ - alc_write_coef_idx(codec, 0x4, val | (1<<11)); + if (val != -1) + alc_write_coef_idx(codec, 0x4, val | (1<<11)); } /* @@ -4299,6 +4339,7 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC284; break; case 0x10ec0286: + case 0x10ec0288: spec->codec_variant = ALC269_TYPE_ALC286; break; case 0x10ec0255: @@ -4646,6 +4687,7 @@ enum { ALC662_FIXUP_BASS_CHMAP, ALC662_FIXUP_BASS_1A, ALC662_FIXUP_BASS_1A_CHMAP, + ALC668_FIXUP_AUTO_MUTE, }; static const struct hda_fixup alc662_fixups[] = { @@ -4806,6 +4848,12 @@ static const struct hda_fixup alc662_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic_0x12, }, + [ALC668_FIXUP_AUTO_MUTE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_auto_mute_via_amp, + .chained = true, + .chain_id = ALC668_FIXUP_DELL_MIC_NO_PRESENCE + }, [ALC668_FIXUP_DELL_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4851,11 +4899,13 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0623, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0624, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0623, "Dell", ALC668_FIXUP_AUTO_MUTE), + SND_PCI_QUIRK(0x1028, 0x0624, "Dell", ALC668_FIXUP_AUTO_MUTE), SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1028, 0x0628, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0628, "Dell", ALC668_FIXUP_AUTO_MUTE), + SND_PCI_QUIRK(0x1028, 0x0696, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A_CHMAP), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_CHMAP), @@ -5095,6 +5145,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0283, .name = "ALC283", .patch = patch_alc269 }, { .id = 0x10ec0284, .name = "ALC284", .patch = patch_alc269 }, { .id = 0x10ec0286, .name = "ALC286", .patch = patch_alc269 }, + { .id = 0x10ec0288, .name = "ALC288", .patch = patch_alc269 }, { .id = 0x10ec0290, .name = "ALC290", .patch = patch_alc269 }, { .id = 0x10ec0292, .name = "ALC292", .patch = patch_alc269 }, { .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660", @@ -5114,6 +5165,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = { { .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 }, { .id = 0x10ec0671, .name = "ALC671", .patch = patch_alc662 }, { .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 }, + { .id = 0x10ec0867, .name = "ALC891", .patch = patch_alc882 }, { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 }, { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 }, { .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc882 }, diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index d761c0b..121336b 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -84,6 +84,7 @@ enum { STAC_DELL_EQ, STAC_ALIENWARE_M17X, STAC_92HD89XX_HP_FRONT_JACK, + STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK, STAC_92HD73XX_MODELS }; @@ -102,6 +103,7 @@ enum { STAC_92HD83XXX_HEADSET_JACK, STAC_92HD83XXX_HP, STAC_HP_ENVY_BASS, + STAC_HP_BNB13_EQ, STAC_92HD83XXX_MODELS }; @@ -546,8 +548,8 @@ static void stac_init_power_map(struct hda_codec *codec) if (snd_hda_jack_tbl_get(codec, nid)) continue; if (def_conf == AC_JACK_PORT_COMPLEX && - !(spec->vref_mute_led_nid == nid || - is_jack_detectable(codec, nid))) { + spec->vref_mute_led_nid != nid && + is_jack_detectable(codec, nid)) { snd_hda_jack_detect_enable_callback(codec, nid, STAC_PWR_EVENT, jack_update_power); @@ -580,9 +582,9 @@ static void stac_store_hints(struct hda_codec *codec) spec->gpio_mask; } if (get_int_hint(codec, "gpio_dir", &spec->gpio_dir)) - spec->gpio_mask &= spec->gpio_mask; - if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) spec->gpio_dir &= spec->gpio_mask; + if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) + spec->gpio_data &= spec->gpio_mask; if (get_int_hint(codec, "eapd_mask", &spec->eapd_mask)) spec->eapd_mask &= spec->gpio_mask; if (get_int_hint(codec, "gpio_mute", &spec->gpio_mute)) @@ -1791,6 +1793,11 @@ static const struct hda_pintbl stac92hd89xx_hp_front_jack_pin_configs[] = { {} }; +static const struct hda_pintbl stac92hd89xx_hp_z1_g2_right_mic_jack_pin_configs[] = { + { 0x0e, 0x400000f0 }, + {} +}; + static void stac92hd73xx_fixup_ref(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -1913,6 +1920,10 @@ static const struct hda_fixup stac92hd73xx_fixups[] = { [STAC_92HD89XX_HP_FRONT_JACK] = { .type = HDA_FIXUP_PINS, .v.pins = stac92hd89xx_hp_front_jack_pin_configs, + }, + [STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK] = { + .type = HDA_FIXUP_PINS, + .v.pins = stac92hd89xx_hp_z1_g2_right_mic_jack_pin_configs, } }; @@ -1973,6 +1984,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = { "Alienware M17x", STAC_ALIENWARE_M17X), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0490, "Alienware M17x R3", STAC_DELL_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1927, + "HP Z1 G2", STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2b17, "unknown HP", STAC_92HD89XX_HP_FRONT_JACK), {} /* terminator */ @@ -2136,6 +2149,434 @@ static void stac92hd83xxx_fixup_headset_jack(struct hda_codec *codec, spec->headset_jack = 1; } +static const struct hda_verb hp_bnb13_eq_verbs[] = { + /* 44.1KHz base */ + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x68 }, + { 0x22, 0x7A8, 0x17 }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x68 }, + { 0x22, 0x7AB, 0x17 }, + { 0x22, 0x7AC, 0x00 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x83 }, + { 0x22, 0x7A7, 0x2F }, + { 0x22, 0x7A8, 0xD1 }, + { 0x22, 0x7A9, 0x83 }, + { 0x22, 0x7AA, 0x2F }, + { 0x22, 0x7AB, 0xD1 }, + { 0x22, 0x7AC, 0x01 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x68 }, + { 0x22, 0x7A8, 0x17 }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x68 }, + { 0x22, 0x7AB, 0x17 }, + { 0x22, 0x7AC, 0x02 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x7C }, + { 0x22, 0x7A7, 0xC6 }, + { 0x22, 0x7A8, 0x0C }, + { 0x22, 0x7A9, 0x7C }, + { 0x22, 0x7AA, 0xC6 }, + { 0x22, 0x7AB, 0x0C }, + { 0x22, 0x7AC, 0x03 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC3 }, + { 0x22, 0x7A7, 0x25 }, + { 0x22, 0x7A8, 0xAF }, + { 0x22, 0x7A9, 0xC3 }, + { 0x22, 0x7AA, 0x25 }, + { 0x22, 0x7AB, 0xAF }, + { 0x22, 0x7AC, 0x04 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x85 }, + { 0x22, 0x7A8, 0x73 }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x85 }, + { 0x22, 0x7AB, 0x73 }, + { 0x22, 0x7AC, 0x05 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x85 }, + { 0x22, 0x7A7, 0x39 }, + { 0x22, 0x7A8, 0xC7 }, + { 0x22, 0x7A9, 0x85 }, + { 0x22, 0x7AA, 0x39 }, + { 0x22, 0x7AB, 0xC7 }, + { 0x22, 0x7AC, 0x06 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3C }, + { 0x22, 0x7A7, 0x90 }, + { 0x22, 0x7A8, 0xB0 }, + { 0x22, 0x7A9, 0x3C }, + { 0x22, 0x7AA, 0x90 }, + { 0x22, 0x7AB, 0xB0 }, + { 0x22, 0x7AC, 0x07 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x7A }, + { 0x22, 0x7A7, 0xC6 }, + { 0x22, 0x7A8, 0x39 }, + { 0x22, 0x7A9, 0x7A }, + { 0x22, 0x7AA, 0xC6 }, + { 0x22, 0x7AB, 0x39 }, + { 0x22, 0x7AC, 0x08 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC4 }, + { 0x22, 0x7A7, 0xE9 }, + { 0x22, 0x7A8, 0xDC }, + { 0x22, 0x7A9, 0xC4 }, + { 0x22, 0x7AA, 0xE9 }, + { 0x22, 0x7AB, 0xDC }, + { 0x22, 0x7AC, 0x09 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3D }, + { 0x22, 0x7A7, 0xE1 }, + { 0x22, 0x7A8, 0x0D }, + { 0x22, 0x7A9, 0x3D }, + { 0x22, 0x7AA, 0xE1 }, + { 0x22, 0x7AB, 0x0D }, + { 0x22, 0x7AC, 0x0A }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x89 }, + { 0x22, 0x7A7, 0xB6 }, + { 0x22, 0x7A8, 0xEB }, + { 0x22, 0x7A9, 0x89 }, + { 0x22, 0x7AA, 0xB6 }, + { 0x22, 0x7AB, 0xEB }, + { 0x22, 0x7AC, 0x0B }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x39 }, + { 0x22, 0x7A7, 0x9D }, + { 0x22, 0x7A8, 0xFE }, + { 0x22, 0x7A9, 0x39 }, + { 0x22, 0x7AA, 0x9D }, + { 0x22, 0x7AB, 0xFE }, + { 0x22, 0x7AC, 0x0C }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x76 }, + { 0x22, 0x7A7, 0x49 }, + { 0x22, 0x7A8, 0x15 }, + { 0x22, 0x7A9, 0x76 }, + { 0x22, 0x7AA, 0x49 }, + { 0x22, 0x7AB, 0x15 }, + { 0x22, 0x7AC, 0x0D }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC8 }, + { 0x22, 0x7A7, 0x80 }, + { 0x22, 0x7A8, 0xF5 }, + { 0x22, 0x7A9, 0xC8 }, + { 0x22, 0x7AA, 0x80 }, + { 0x22, 0x7AB, 0xF5 }, + { 0x22, 0x7AC, 0x0E }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x0F }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x90 }, + { 0x22, 0x7A7, 0x68 }, + { 0x22, 0x7A8, 0xF1 }, + { 0x22, 0x7A9, 0x90 }, + { 0x22, 0x7AA, 0x68 }, + { 0x22, 0x7AB, 0xF1 }, + { 0x22, 0x7AC, 0x10 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x34 }, + { 0x22, 0x7A7, 0x47 }, + { 0x22, 0x7A8, 0x6C }, + { 0x22, 0x7A9, 0x34 }, + { 0x22, 0x7AA, 0x47 }, + { 0x22, 0x7AB, 0x6C }, + { 0x22, 0x7AC, 0x11 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x6F }, + { 0x22, 0x7A7, 0x97 }, + { 0x22, 0x7A8, 0x0F }, + { 0x22, 0x7A9, 0x6F }, + { 0x22, 0x7AA, 0x97 }, + { 0x22, 0x7AB, 0x0F }, + { 0x22, 0x7AC, 0x12 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xCB }, + { 0x22, 0x7A7, 0xB8 }, + { 0x22, 0x7A8, 0x94 }, + { 0x22, 0x7A9, 0xCB }, + { 0x22, 0x7AA, 0xB8 }, + { 0x22, 0x7AB, 0x94 }, + { 0x22, 0x7AC, 0x13 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x14 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x95 }, + { 0x22, 0x7A7, 0x76 }, + { 0x22, 0x7A8, 0x5B }, + { 0x22, 0x7A9, 0x95 }, + { 0x22, 0x7AA, 0x76 }, + { 0x22, 0x7AB, 0x5B }, + { 0x22, 0x7AC, 0x15 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x31 }, + { 0x22, 0x7A7, 0xAC }, + { 0x22, 0x7A8, 0x31 }, + { 0x22, 0x7A9, 0x31 }, + { 0x22, 0x7AA, 0xAC }, + { 0x22, 0x7AB, 0x31 }, + { 0x22, 0x7AC, 0x16 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x6A }, + { 0x22, 0x7A7, 0x89 }, + { 0x22, 0x7A8, 0xA5 }, + { 0x22, 0x7A9, 0x6A }, + { 0x22, 0x7AA, 0x89 }, + { 0x22, 0x7AB, 0xA5 }, + { 0x22, 0x7AC, 0x17 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xCE }, + { 0x22, 0x7A7, 0x53 }, + { 0x22, 0x7A8, 0xCF }, + { 0x22, 0x7A9, 0xCE }, + { 0x22, 0x7AA, 0x53 }, + { 0x22, 0x7AB, 0xCF }, + { 0x22, 0x7AC, 0x18 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x19 }, + { 0x22, 0x7AD, 0x80 }, + /* 48KHz base */ + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x88 }, + { 0x22, 0x7A8, 0xDC }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x88 }, + { 0x22, 0x7AB, 0xDC }, + { 0x22, 0x7AC, 0x1A }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x82 }, + { 0x22, 0x7A7, 0xEE }, + { 0x22, 0x7A8, 0x46 }, + { 0x22, 0x7A9, 0x82 }, + { 0x22, 0x7AA, 0xEE }, + { 0x22, 0x7AB, 0x46 }, + { 0x22, 0x7AC, 0x1B }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x88 }, + { 0x22, 0x7A8, 0xDC }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x88 }, + { 0x22, 0x7AB, 0xDC }, + { 0x22, 0x7AC, 0x1C }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x7D }, + { 0x22, 0x7A7, 0x09 }, + { 0x22, 0x7A8, 0x28 }, + { 0x22, 0x7A9, 0x7D }, + { 0x22, 0x7AA, 0x09 }, + { 0x22, 0x7AB, 0x28 }, + { 0x22, 0x7AC, 0x1D }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC2 }, + { 0x22, 0x7A7, 0xE5 }, + { 0x22, 0x7A8, 0xB4 }, + { 0x22, 0x7A9, 0xC2 }, + { 0x22, 0x7AA, 0xE5 }, + { 0x22, 0x7AB, 0xB4 }, + { 0x22, 0x7AC, 0x1E }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0xA3 }, + { 0x22, 0x7A8, 0x1F }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0xA3 }, + { 0x22, 0x7AB, 0x1F }, + { 0x22, 0x7AC, 0x1F }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x84 }, + { 0x22, 0x7A7, 0xCA }, + { 0x22, 0x7A8, 0xF1 }, + { 0x22, 0x7A9, 0x84 }, + { 0x22, 0x7AA, 0xCA }, + { 0x22, 0x7AB, 0xF1 }, + { 0x22, 0x7AC, 0x20 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3C }, + { 0x22, 0x7A7, 0xD5 }, + { 0x22, 0x7A8, 0x9C }, + { 0x22, 0x7A9, 0x3C }, + { 0x22, 0x7AA, 0xD5 }, + { 0x22, 0x7AB, 0x9C }, + { 0x22, 0x7AC, 0x21 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x7B }, + { 0x22, 0x7A7, 0x35 }, + { 0x22, 0x7A8, 0x0F }, + { 0x22, 0x7A9, 0x7B }, + { 0x22, 0x7AA, 0x35 }, + { 0x22, 0x7AB, 0x0F }, + { 0x22, 0x7AC, 0x22 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC4 }, + { 0x22, 0x7A7, 0x87 }, + { 0x22, 0x7A8, 0x45 }, + { 0x22, 0x7A9, 0xC4 }, + { 0x22, 0x7AA, 0x87 }, + { 0x22, 0x7AB, 0x45 }, + { 0x22, 0x7AC, 0x23 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3E }, + { 0x22, 0x7A7, 0x0A }, + { 0x22, 0x7A8, 0x78 }, + { 0x22, 0x7A9, 0x3E }, + { 0x22, 0x7AA, 0x0A }, + { 0x22, 0x7AB, 0x78 }, + { 0x22, 0x7AC, 0x24 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x88 }, + { 0x22, 0x7A7, 0xE2 }, + { 0x22, 0x7A8, 0x05 }, + { 0x22, 0x7A9, 0x88 }, + { 0x22, 0x7AA, 0xE2 }, + { 0x22, 0x7AB, 0x05 }, + { 0x22, 0x7AC, 0x25 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x3A }, + { 0x22, 0x7A7, 0x1A }, + { 0x22, 0x7A8, 0xA3 }, + { 0x22, 0x7A9, 0x3A }, + { 0x22, 0x7AA, 0x1A }, + { 0x22, 0x7AB, 0xA3 }, + { 0x22, 0x7AC, 0x26 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x77 }, + { 0x22, 0x7A7, 0x1D }, + { 0x22, 0x7A8, 0xFB }, + { 0x22, 0x7A9, 0x77 }, + { 0x22, 0x7AA, 0x1D }, + { 0x22, 0x7AB, 0xFB }, + { 0x22, 0x7AC, 0x27 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xC7 }, + { 0x22, 0x7A7, 0xDA }, + { 0x22, 0x7A8, 0xE5 }, + { 0x22, 0x7A9, 0xC7 }, + { 0x22, 0x7AA, 0xDA }, + { 0x22, 0x7AB, 0xE5 }, + { 0x22, 0x7AC, 0x28 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x29 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x8E }, + { 0x22, 0x7A7, 0xD7 }, + { 0x22, 0x7A8, 0x22 }, + { 0x22, 0x7A9, 0x8E }, + { 0x22, 0x7AA, 0xD7 }, + { 0x22, 0x7AB, 0x22 }, + { 0x22, 0x7AC, 0x2A }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x35 }, + { 0x22, 0x7A7, 0x26 }, + { 0x22, 0x7A8, 0xC6 }, + { 0x22, 0x7A9, 0x35 }, + { 0x22, 0x7AA, 0x26 }, + { 0x22, 0x7AB, 0xC6 }, + { 0x22, 0x7AC, 0x2B }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x71 }, + { 0x22, 0x7A7, 0x28 }, + { 0x22, 0x7A8, 0xDE }, + { 0x22, 0x7A9, 0x71 }, + { 0x22, 0x7AA, 0x28 }, + { 0x22, 0x7AB, 0xDE }, + { 0x22, 0x7AC, 0x2C }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xCA }, + { 0x22, 0x7A7, 0xD9 }, + { 0x22, 0x7A8, 0x3A }, + { 0x22, 0x7A9, 0xCA }, + { 0x22, 0x7AA, 0xD9 }, + { 0x22, 0x7AB, 0x3A }, + { 0x22, 0x7AC, 0x2D }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x2E }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x93 }, + { 0x22, 0x7A7, 0x5E }, + { 0x22, 0x7A8, 0xD8 }, + { 0x22, 0x7A9, 0x93 }, + { 0x22, 0x7AA, 0x5E }, + { 0x22, 0x7AB, 0xD8 }, + { 0x22, 0x7AC, 0x2F }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x32 }, + { 0x22, 0x7A7, 0xB7 }, + { 0x22, 0x7A8, 0xB1 }, + { 0x22, 0x7A9, 0x32 }, + { 0x22, 0x7AA, 0xB7 }, + { 0x22, 0x7AB, 0xB1 }, + { 0x22, 0x7AC, 0x30 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x6C }, + { 0x22, 0x7A7, 0xA1 }, + { 0x22, 0x7A8, 0x28 }, + { 0x22, 0x7A9, 0x6C }, + { 0x22, 0x7AA, 0xA1 }, + { 0x22, 0x7AB, 0x28 }, + { 0x22, 0x7AC, 0x31 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0xCD }, + { 0x22, 0x7A7, 0x48 }, + { 0x22, 0x7A8, 0x4F }, + { 0x22, 0x7A9, 0xCD }, + { 0x22, 0x7AA, 0x48 }, + { 0x22, 0x7AB, 0x4F }, + { 0x22, 0x7AC, 0x32 }, + { 0x22, 0x7AD, 0x80 }, + { 0x22, 0x7A6, 0x40 }, + { 0x22, 0x7A7, 0x00 }, + { 0x22, 0x7A8, 0x00 }, + { 0x22, 0x7A9, 0x40 }, + { 0x22, 0x7AA, 0x00 }, + { 0x22, 0x7AB, 0x00 }, + { 0x22, 0x7AC, 0x33 }, + { 0x22, 0x7AD, 0x80 }, + /* common */ + { 0x22, 0x782, 0xC1 }, + { 0x22, 0x771, 0x2C }, + { 0x22, 0x772, 0x2C }, + { 0x22, 0x788, 0x04 }, + { 0x01, 0x7B0, 0x08 }, + {} +}; + static const struct hda_fixup stac92hd83xxx_fixups[] = { [STAC_92HD83XXX_REF] = { .type = HDA_FIXUP_PINS, @@ -2210,6 +2651,12 @@ static const struct hda_fixup stac92hd83xxx_fixups[] = { {} }, }, + [STAC_HP_BNB13_EQ] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = hp_bnb13_eq_verbs, + .chained = true, + .chain_id = STAC_92HD83XXX_HP_MIC_LED, + }, }; static const struct hda_model_fixup stac92hd83xxx_models[] = { @@ -2225,6 +2672,7 @@ static const struct hda_model_fixup stac92hd83xxx_models[] = { { .id = STAC_92HD83XXX_HP_MIC_LED, .name = "hp-mic-led" }, { .id = STAC_92HD83XXX_HEADSET_JACK, .name = "headset-jack" }, { .id = STAC_HP_ENVY_BASS, .name = "hp-envy-bass" }, + { .id = STAC_HP_BNB13_EQ, .name = "hp-bnb13-eq" }, {} }; @@ -2273,7 +2721,101 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1899, "HP Folio 13", STAC_HP_LED_GPIO10), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x18df, - "HP Folio", STAC_92HD83XXX_HP_MIC_LED), + "HP Folio", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x18F8, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1909, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x190A, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1940, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1941, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1942, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1943, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1944, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1945, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1946, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1948, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1949, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x194A, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x194B, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x194C, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x194E, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x194F, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1950, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1951, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x195A, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x195B, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x195C, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1991, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2103, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2104, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2105, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2106, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2107, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2108, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2109, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x210A, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x210B, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x211C, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x211D, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x211E, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x211F, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2120, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2121, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2122, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2123, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x213E, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x213F, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2140, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x21B2, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x21B3, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x21B5, + "HP bNB13", STAC_HP_BNB13_EQ), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x21B6, + "HP bNB13", STAC_HP_BNB13_EQ), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x1900, "HP", STAC_92HD83XXX_HP_MIC_LED), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x2000, @@ -3656,11 +4198,18 @@ static int stac_parse_auto_config(struct hda_codec *codec) return err; } - stac_init_power_map(codec); - return 0; } +static int stac_build_controls(struct hda_codec *codec) +{ + int err = snd_hda_gen_build_controls(codec); + + if (err < 0) + return err; + stac_init_power_map(codec); + return 0; +} static int stac_init(struct hda_codec *codec) { @@ -3794,7 +4343,7 @@ static void stac_set_power_state(struct hda_codec *codec, hda_nid_t fg, #endif /* CONFIG_PM */ static const struct hda_codec_ops stac_patch_ops = { - .build_controls = snd_hda_gen_build_controls, + .build_controls = stac_build_controls, .build_pcms = snd_hda_gen_build_pcms, .init = stac_init, .free = stac_free, diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index 28ec872..b6e278f 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -685,9 +685,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_pointer(struct snd_pcm_substream * if (!(snd_ice1712_read(ice, ICE1712_IREG_PBK_CTRL) & 1)) return 0; ptr = runtime->buffer_size - inw(ice->ddma_port + 4); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_playback_ds_pointer(struct snd_pcm_substream *substream) @@ -704,9 +705,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_ds_pointer(struct snd_pcm_substrea addr = ICE1712_DSC_ADDR0; ptr = snd_ice1712_ds_read(ice, substream->number * 2, addr) - ice->playback_con_virt_addr[substream->number]; + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_capture_pointer(struct snd_pcm_substream *substream) @@ -717,9 +719,10 @@ static snd_pcm_uframes_t snd_ice1712_capture_pointer(struct snd_pcm_substream *s if (!(snd_ice1712_read(ice, ICE1712_IREG_CAP_CTRL) & 1)) return 0; ptr = inl(ICEREG(ice, CONCAP_ADDR)) - ice->capture_con_virt_addr; + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static const struct snd_pcm_hardware snd_ice1712_playback = { @@ -1113,9 +1116,10 @@ static snd_pcm_uframes_t snd_ice1712_playback_pro_pointer(struct snd_pcm_substre if (!(inl(ICEMT(ice, PLAYBACK_CONTROL)) & ICE1712_PLAYBACK_START)) return 0; ptr = ice->playback_pro_size - (inw(ICEMT(ice, PLAYBACK_SIZE)) << 2); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static snd_pcm_uframes_t snd_ice1712_capture_pro_pointer(struct snd_pcm_substream *substream) @@ -1126,9 +1130,10 @@ static snd_pcm_uframes_t snd_ice1712_capture_pro_pointer(struct snd_pcm_substrea if (!(inl(ICEMT(ice, PLAYBACK_CONTROL)) & ICE1712_CAPTURE_START_SHADOW)) return 0; ptr = ice->capture_pro_size - (inw(ICEMT(ice, CAPTURE_SIZE)) << 2); + ptr = bytes_to_frames(substream->runtime, ptr); if (ptr == substream->runtime->buffer_size) ptr = 0; - return bytes_to_frames(substream->runtime, ptr); + return ptr; } static const struct snd_pcm_hardware snd_ice1712_playback_pro = { diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 64b9fda..dbbbacf 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -53,6 +53,7 @@ static DEFINE_PCI_DEVICE_TABLE(xonar_ids) = { { OXYGEN_PCI_SUBID(0x1043, 0x835e) }, { OXYGEN_PCI_SUBID(0x1043, 0x838e) }, { OXYGEN_PCI_SUBID(0x1043, 0x8522) }, + { OXYGEN_PCI_SUBID(0x1043, 0x85f4) }, { OXYGEN_PCI_SUBID_BROKEN_EEPROM }, { } }; diff --git a/sound/pci/oxygen/xonar_pcm179x.c b/sound/pci/oxygen/xonar_pcm179x.c index c8c7f2c..e026059 100644 --- a/sound/pci/oxygen/xonar_pcm179x.c +++ b/sound/pci/oxygen/xonar_pcm179x.c @@ -100,8 +100,8 @@ */ /* - * Xonar Essence ST (Deluxe)/STX - * ----------------------------- + * Xonar Essence ST (Deluxe)/STX (II) + * ---------------------------------- * * CMI8788: * @@ -1138,6 +1138,14 @@ int get_xonar_pcm179x_model(struct oxygen *chip, chip->model.resume = xonar_stx_resume; chip->model.set_dac_params = set_pcm1796_params; break; + case 0x85f4: + chip->model = model_xonar_st; + /* TODO: daughterboard support */ + chip->model.shortname = "Xonar STX II"; + chip->model.init = xonar_stx_init; + chip->model.resume = xonar_stx_resume; + chip->model.set_dac_params = set_pcm1796_params; + break; default: return -EINVAL; } diff --git a/sound/soc/blackfin/bf5xx-i2s-pcm.c b/sound/soc/blackfin/bf5xx-i2s-pcm.c index 9cb4a80..bc9983d 100644 --- a/sound/soc/blackfin/bf5xx-i2s-pcm.c +++ b/sound/soc/blackfin/bf5xx-i2s-pcm.c @@ -293,19 +293,19 @@ static int bf5xx_pcm_silence(struct snd_pcm_substream *substream, unsigned int sample_size = runtime->sample_bits / 8; void *buf = runtime->dma_area; struct bf5xx_i2s_pcm_data *dma_data; - unsigned int offset, size; + unsigned int offset, samples; dma_data = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream); if (dma_data->tdm_mode) { offset = pos * 8 * sample_size; - size = count * 8 * sample_size; + samples = count * 8; } else { offset = frames_to_bytes(runtime, pos); - size = frames_to_bytes(runtime, count); + samples = count * runtime->channels; } - snd_pcm_format_set_silence(runtime->format, buf + offset, size); + snd_pcm_format_set_silence(runtime->format, buf + offset, samples); return 0; } diff --git a/sound/soc/codecs/adau1701.c b/sound/soc/codecs/adau1701.c index adee866..56bfc67 100644 --- a/sound/soc/codecs/adau1701.c +++ b/sound/soc/codecs/adau1701.c @@ -230,8 +230,10 @@ static int adau1701_reg_read(void *context, unsigned int reg, *value = 0; - for (i = 0; i < size; i++) - *value |= recv_buf[i] << (i * 8); + for (i = 0; i < size; i++) { + *value <<= 8; + *value |= recv_buf[i]; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 1e0fa3b..e1dfebb 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -124,9 +124,8 @@ static int cs42l51_set_chan_mix(struct snd_kcontrol *kcontrol, static const DECLARE_TLV_DB_SCALE(adc_pcm_tlv, -5150, 50, 0); static const DECLARE_TLV_DB_SCALE(tone_tlv, -1050, 150, 0); -/* This is a lie. after -102 db, it stays at -102 */ -/* maybe a range would be better */ -static const DECLARE_TLV_DB_SCALE(aout_tlv, -11550, 50, 0); + +static const DECLARE_TLV_DB_SCALE(aout_tlv, -10200, 50, 0); static const DECLARE_TLV_DB_SCALE(boost_tlv, 1600, 1600, 0); static const char *chan_mix[] = { @@ -141,7 +140,7 @@ static const struct soc_enum cs42l51_chan_mix = static const struct snd_kcontrol_new cs42l51_snd_controls[] = { SOC_DOUBLE_R_SX_TLV("PCM Playback Volume", CS42L51_PCMA_VOL, CS42L51_PCMB_VOL, - 6, 0x19, 0x7F, adc_pcm_tlv), + 0, 0x19, 0x7F, adc_pcm_tlv), SOC_DOUBLE_R("PCM Playback Switch", CS42L51_PCMA_VOL, CS42L51_PCMB_VOL, 7, 1, 1), SOC_DOUBLE_R_SX_TLV("Analog Playback Volume", @@ -149,7 +148,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0x34, 0xE4, aout_tlv), SOC_DOUBLE_R_SX_TLV("ADC Mixer Volume", CS42L51_ADCA_VOL, CS42L51_ADCB_VOL, - 6, 0x19, 0x7F, adc_pcm_tlv), + 0, 0x19, 0x7F, adc_pcm_tlv), SOC_DOUBLE_R("ADC Mixer Switch", CS42L51_ADCA_VOL, CS42L51_ADCB_VOL, 7, 1, 1), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index be2ba1b..ab3ac7b 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -352,7 +352,7 @@ static const char * const right_swap_text[] = { static const unsigned int swap_values[] = { 0, 1, 3 }; static const struct soc_enum adca_swap_enum = - SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 2, 1, + SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 2, 3, ARRAY_SIZE(left_swap_text), left_swap_text, swap_values); @@ -361,7 +361,7 @@ static const struct snd_kcontrol_new adca_mixer = SOC_DAPM_ENUM("Route", adca_swap_enum); static const struct soc_enum pcma_swap_enum = - SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 6, 1, + SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 6, 3, ARRAY_SIZE(left_swap_text), left_swap_text, swap_values); @@ -370,7 +370,7 @@ static const struct snd_kcontrol_new pcma_mixer = SOC_DAPM_ENUM("Route", pcma_swap_enum); static const struct soc_enum adcb_swap_enum = - SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 0, 1, + SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 0, 3, ARRAY_SIZE(right_swap_text), right_swap_text, swap_values); @@ -379,7 +379,7 @@ static const struct snd_kcontrol_new adcb_mixer = SOC_DAPM_ENUM("Route", adcb_swap_enum); static const struct soc_enum pcmb_swap_enum = - SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 4, 1, + SOC_VALUE_ENUM_SINGLE(CS42L52_ADC_PCM_MIXER, 4, 3, ARRAY_SIZE(right_swap_text), right_swap_text, swap_values); diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index 3b20c86..eade6e2 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -325,7 +325,7 @@ static const char * const cs42l73_mono_mix_texts[] = { static const unsigned int cs42l73_mono_mix_values[] = { 0, 1, 2 }; static const struct soc_enum spk_asp_enum = - SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 6, 1, + SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 6, 3, ARRAY_SIZE(cs42l73_mono_mix_texts), cs42l73_mono_mix_texts, cs42l73_mono_mix_values); @@ -343,7 +343,7 @@ static const struct snd_kcontrol_new spk_xsp_mixer = SOC_DAPM_ENUM("Route", spk_xsp_enum); static const struct soc_enum esl_asp_enum = - SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 2, 5, + SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 2, 3, ARRAY_SIZE(cs42l73_mono_mix_texts), cs42l73_mono_mix_texts, cs42l73_mono_mix_values); @@ -352,7 +352,7 @@ static const struct snd_kcontrol_new esl_asp_mixer = SOC_DAPM_ENUM("Route", esl_asp_enum); static const struct soc_enum esl_xsp_enum = - SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 0, 7, + SOC_VALUE_ENUM_SINGLE(CS42L73_MMIXCTL, 0, 3, ARRAY_SIZE(cs42l73_mono_mix_texts), cs42l73_mono_mix_texts, cs42l73_mono_mix_values); diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 8bddf3f..9c20ef5 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -255,6 +255,7 @@ static struct reg_default max98090_reg[] = { static bool max98090_volatile_register(struct device *dev, unsigned int reg) { switch (reg) { + case M98090_REG_SOFTWARE_RESET: case M98090_REG_DEVICE_STATUS: case M98090_REG_JACK_STATUS: case M98090_REG_REVISION_ID: @@ -1377,8 +1378,8 @@ static const struct snd_soc_dapm_route max98090_dapm_routes[] = { {"STENL Mux", "Sidetone Left", "DMICL"}, {"STENR Mux", "Sidetone Right", "ADCR"}, {"STENR Mux", "Sidetone Right", "DMICR"}, - {"DACL", "NULL", "STENL Mux"}, - {"DACR", "NULL", "STENL Mux"}, + {"DACL", NULL, "STENL Mux"}, + {"DACR", NULL, "STENL Mux"}, {"AIFINL", NULL, "SHDN"}, {"AIFINR", NULL, "SHDN"}, @@ -2249,7 +2250,7 @@ static int max98090_probe(struct snd_soc_codec *codec) /* Register for interrupts */ dev_dbg(codec->dev, "irq = %d\n", max98090->irq); - ret = request_threaded_irq(max98090->irq, NULL, + ret = devm_request_threaded_irq(codec->dev, max98090->irq, NULL, max98090_interrupt, IRQF_TRIGGER_FALLING | IRQF_ONESHOT, "max98090_interrupt", codec); if (ret < 0) { @@ -2360,6 +2361,8 @@ static int max98090_runtime_resume(struct device *dev) regcache_cache_only(max98090->regmap, false); + max98090_reset(max98090); + regcache_sync(max98090->regmap); return 0; diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index c26a8f8..aa5253a 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -2061,6 +2061,7 @@ static struct snd_soc_codec_driver soc_codec_dev_rt5640 = { static const struct regmap_config rt5640_regmap = { .reg_bits = 8, .val_bits = 16, + .use_single_rw = true, .max_register = RT5640_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5640_ranges) * RT5640_PR_SPACING), diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 1f4093f..b76c6b6 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1398,8 +1398,7 @@ static int sgtl5000_probe(struct snd_soc_codec *codec) /* enable small pop, introduce 400ms delay in turning off */ snd_soc_update_bits(codec, SGTL5000_CHIP_REF_CTRL, - SGTL5000_SMALL_POP, - SGTL5000_SMALL_POP); + SGTL5000_SMALL_POP, 1); /* disable short cut detector */ snd_soc_write(codec, SGTL5000_CHIP_SHORT_CTRL, 0); diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h index 2f8c889..bd7a344 100644 --- a/sound/soc/codecs/sgtl5000.h +++ b/sound/soc/codecs/sgtl5000.h @@ -275,7 +275,7 @@ #define SGTL5000_BIAS_CTRL_MASK 0x000e #define SGTL5000_BIAS_CTRL_SHIFT 1 #define SGTL5000_BIAS_CTRL_WIDTH 3 -#define SGTL5000_SMALL_POP 0x0001 +#define SGTL5000_SMALL_POP 0 /* * SGTL5000_CHIP_MIC_CTRL diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c index 4068f24..bb3878c 100644 --- a/sound/soc/codecs/sigmadsp.c +++ b/sound/soc/codecs/sigmadsp.c @@ -176,6 +176,13 @@ static int _process_sigma_firmware(struct device *dev, goto done; } + if (ssfw_head->version != 1) { + dev_err(dev, + "Failed to load firmware: Invalid version %d. Supported firmware versions: 1\n", + ssfw_head->version); + goto done; + } + crc = crc32(0, fw->data + sizeof(*ssfw_head), fw->size - sizeof(*ssfw_head)); pr_debug("%s: crc=%x\n", __func__, crc); diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 64ad84d..11c8d1f 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -164,7 +164,7 @@ static int snd_soc_dapm_put_volsw_aic3x(struct snd_kcontrol *kcontrol, mask <<= shift; val <<= shift; - change = snd_soc_test_bits(codec, val, mask, reg); + change = snd_soc_test_bits(codec, reg, mask, val); if (change) { update.kcontrol = kcontrol; update.reg = reg; diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 871f851..ea16dc4 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -154,6 +154,7 @@ static struct reg_default wm8962_reg[] = { { 40, 0x0000 }, /* R40 - SPKOUTL volume */ { 41, 0x0000 }, /* R41 - SPKOUTR volume */ + { 49, 0x0010 }, /* R49 - Class D Control 1 */ { 51, 0x0003 }, /* R51 - Class D Control 2 */ { 56, 0x0506 }, /* R56 - Clocking 4 */ @@ -795,7 +796,6 @@ static bool wm8962_volatile_register(struct device *dev, unsigned int reg) case WM8962_ALC2: case WM8962_THERMAL_SHUTDOWN_STATUS: case WM8962_ADDITIONAL_CONTROL_4: - case WM8962_CLASS_D_CONTROL_1: case WM8962_DC_SERVO_6: case WM8962_INTERRUPT_STATUS_1: case WM8962_INTERRUPT_STATUS_2: @@ -2901,13 +2901,22 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source, static int wm8962_mute(struct snd_soc_dai *dai, int mute) { struct snd_soc_codec *codec = dai->codec; - int val; + int val, ret; if (mute) - val = WM8962_DAC_MUTE; + val = WM8962_DAC_MUTE | WM8962_DAC_MUTE_ALT; else val = 0; + /** + * The DAC mute bit is mirrored in two registers, update both to keep + * the register cache consistent. + */ + ret = snd_soc_update_bits(codec, WM8962_CLASS_D_CONTROL_1, + WM8962_DAC_MUTE_ALT, val); + if (ret < 0) + return ret; + return snd_soc_update_bits(codec, WM8962_ADC_DAC_CONTROL_1, WM8962_DAC_MUTE, val); } diff --git a/sound/soc/codecs/wm8962.h b/sound/soc/codecs/wm8962.h index a1a5d52..910aafd 100644 --- a/sound/soc/codecs/wm8962.h +++ b/sound/soc/codecs/wm8962.h @@ -1954,6 +1954,10 @@ #define WM8962_SPKOUTL_ENA_MASK 0x0040 /* SPKOUTL_ENA */ #define WM8962_SPKOUTL_ENA_SHIFT 6 /* SPKOUTL_ENA */ #define WM8962_SPKOUTL_ENA_WIDTH 1 /* SPKOUTL_ENA */ +#define WM8962_DAC_MUTE_ALT 0x0010 /* DAC_MUTE */ +#define WM8962_DAC_MUTE_ALT_MASK 0x0010 /* DAC_MUTE */ +#define WM8962_DAC_MUTE_ALT_SHIFT 4 /* DAC_MUTE */ +#define WM8962_DAC_MUTE_ALT_WIDTH 1 /* DAC_MUTE */ #define WM8962_SPKOUTL_PGA_MUTE 0x0002 /* SPKOUTL_PGA_MUTE */ #define WM8962_SPKOUTL_PGA_MUTE_MASK 0x0002 /* SPKOUTL_PGA_MUTE */ #define WM8962_SPKOUTL_PGA_MUTE_SHIFT 1 /* SPKOUTL_PGA_MUTE */ diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 86426a1..c9ce977 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -3492,6 +3492,7 @@ static irqreturn_t wm8994_mic_irq(int irq, void *data) return IRQ_HANDLED; } +/* Should be called with accdet_lock held */ static void wm1811_micd_stop(struct snd_soc_codec *codec) { struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec); @@ -3499,14 +3500,10 @@ static void wm1811_micd_stop(struct snd_soc_codec *codec) if (!wm8994->jackdet) return; - mutex_lock(&wm8994->accdet_lock); - snd_soc_update_bits(codec, WM8958_MIC_DETECT_1, WM8958_MICD_ENA, 0); wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_JACK); - mutex_unlock(&wm8994->accdet_lock); - if (wm8994->wm8994->pdata.jd_ext_cap) snd_soc_dapm_disable_pin(&codec->dapm, "MICBIAS2"); @@ -3547,10 +3544,10 @@ static void wm8958_open_circuit_work(struct work_struct *work) open_circuit_work.work); struct device *dev = wm8994->wm8994->dev; - wm1811_micd_stop(wm8994->hubs.codec); - mutex_lock(&wm8994->accdet_lock); + wm1811_micd_stop(wm8994->hubs.codec); + dev_dbg(dev, "Reporting open circuit\n"); wm8994->jack_mic = false; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 0d5de60..f0e97fc 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1341,6 +1341,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) file, blocks, pos - firmware->size); out_fw: + regmap_async_complete(regmap); release_firmware(firmware); wm_adsp_buf_free(&buf_list); out: @@ -1694,3 +1695,5 @@ int wm_adsp2_init(struct wm_adsp *adsp, bool dvfs) return 0; } EXPORT_SYMBOL_GPL(wm_adsp2_init); + +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 32ddb7f..aab16a7 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -632,8 +632,17 @@ static int davinci_config_channel_size(struct davinci_audio_dev *dev, { u32 fmt; u32 tx_rotate = (word_length / 4) & 0x7; - u32 rx_rotate = (32 - word_length) / 4; u32 mask = (1ULL << word_length) - 1; + /* + * For captured data we should not rotate, inversion and masking is + * enoguh to get the data to the right position: + * Format data from bus after reverse (XRBUF) + * S16_LE: |LSB|MSB|xxx|xxx| |xxx|xxx|MSB|LSB| + * S24_3LE: |LSB|DAT|MSB|xxx| |xxx|MSB|DAT|LSB| + * S24_LE: |LSB|DAT|MSB|xxx| |xxx|MSB|DAT|LSB| + * S32_LE: |LSB|DAT|DAT|MSB| |MSB|DAT|DAT|LSB| + */ + u32 rx_rotate = 0; /* * if s BCLK-to-LRCLK ratio has been configured via the set_clkdiv() diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index 25c31f1..2f63575 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -263,6 +263,19 @@ static void dw_i2s_shutdown(struct snd_pcm_substream *substream, snd_soc_dai_set_dma_data(dai, substream, NULL); } +static int dw_i2s_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(dai); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + i2s_write_reg(dev->i2s_base, TXFFR, 1); + else + i2s_write_reg(dev->i2s_base, RXFFR, 1); + + return 0; +} + static int dw_i2s_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -294,6 +307,7 @@ static struct snd_soc_dai_ops dw_i2s_dai_ops = { .startup = dw_i2s_startup, .shutdown = dw_i2s_shutdown, .hw_params = dw_i2s_hw_params, + .prepare = dw_i2s_prepare, .trigger = dw_i2s_trigger, }; diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index a3119a0..6c6b35e 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -725,7 +725,8 @@ static int pxa_ssp_probe(struct snd_soc_dai *dai) ssp_handle = of_parse_phandle(dev->of_node, "port", 0); if (!ssp_handle) { dev_err(dev, "unable to get 'port' phandle\n"); - return -ENODEV; + ret = -ENODEV; + goto err_priv; } priv->ssp = pxa_ssp_request_of(ssp_handle, "SoC audio"); @@ -766,9 +767,7 @@ static int pxa_ssp_remove(struct snd_soc_dai *dai) SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_64000 | \ SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) -#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ - SNDRV_PCM_FMTBIT_S24_LE | \ - SNDRV_PCM_FMTBIT_S32_LE) +#define PXA_SSP_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE) static const struct snd_soc_dai_ops pxa_ssp_dai_ops = { .startup = pxa_ssp_startup, diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index b302f3b..2ac8d88 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -922,11 +922,9 @@ static int i2s_suspend(struct snd_soc_dai *dai) { struct i2s_dai *i2s = to_info(dai); - if (dai->active) { - i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); - i2s->suspend_i2scon = readl(i2s->addr + I2SCON); - i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); - } + i2s->suspend_i2smod = readl(i2s->addr + I2SMOD); + i2s->suspend_i2scon = readl(i2s->addr + I2SCON); + i2s->suspend_i2spsr = readl(i2s->addr + I2SPSR); return 0; } @@ -935,11 +933,9 @@ static int i2s_resume(struct snd_soc_dai *dai) { struct i2s_dai *i2s = to_info(dai); - if (dai->active) { - writel(i2s->suspend_i2scon, i2s->addr + I2SCON); - writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); - writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); - } + writel(i2s->suspend_i2scon, i2s->addr + I2SCON); + writel(i2s->suspend_i2smod, i2s->addr + I2SMOD); + writel(i2s->suspend_i2spsr, i2s->addr + I2SPSR); return 0; } diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c index b33ca7c..5dbf494 100644 --- a/sound/soc/sh/fsi.c +++ b/sound/soc/sh/fsi.c @@ -1775,8 +1775,7 @@ static const struct snd_soc_dai_ops fsi_dai_ops = { static struct snd_pcm_hardware fsi_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .formats = FSI_FMTS, .rates = FSI_RATES, .rate_min = 8000, diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index a357060..f6e45b1 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -662,8 +662,7 @@ static void rsnd_dai_remove(struct platform_device *pdev, static struct snd_pcm_hardware rsnd_pcm_hardware = { .info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP | - SNDRV_PCM_INFO_MMAP_VALID | - SNDRV_PCM_INFO_PAUSE, + SNDRV_PCM_INFO_MMAP_VALID, .formats = RSND_FMTS, .rates = RSND_RATES, .rate_min = 8000, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index b2949ae..d3fa7b7 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -251,7 +251,6 @@ static int dapm_kcontrol_data_alloc(struct snd_soc_dapm_widget *widget, static void dapm_kcontrol_free(struct snd_kcontrol *kctl) { struct dapm_kcontrol_data *data = snd_kcontrol_chip(kctl); - kfree(data->widget); kfree(data->wlist); kfree(data); } @@ -676,9 +675,9 @@ static int dapm_create_or_share_mixmux_kcontrol(struct snd_soc_dapm_widget *w, int shared; struct snd_kcontrol *kcontrol; bool wname_in_long_name, kcname_in_long_name; - char *long_name; + char *long_name = NULL; const char *name; - int ret; + int ret = 0; if (dapm->codec) prefix = dapm->codec->name_prefix; @@ -743,15 +742,17 @@ static int dapm_create_or_share_mixmux_kcontrol(struct snd_soc_dapm_widget *w, kcontrol = snd_soc_cnew(&w->kcontrol_news[kci], NULL, name, prefix); - kfree(long_name); - if (!kcontrol) - return -ENOMEM; + if (!kcontrol) { + ret = -ENOMEM; + goto exit_free; + } + kcontrol->private_free = dapm_kcontrol_free; ret = dapm_kcontrol_data_alloc(w, kcontrol); if (ret) { snd_ctl_free_one(kcontrol); - return ret; + goto exit_free; } ret = snd_ctl_add(card, kcontrol); @@ -759,17 +760,18 @@ static int dapm_create_or_share_mixmux_kcontrol(struct snd_soc_dapm_widget *w, dev_err(dapm->dev, "ASoC: failed to add widget %s dapm kcontrol %s: %d\n", w->name, name, ret); - return ret; + goto exit_free; } } ret = dapm_kcontrol_add_widget(kcontrol, w); - if (ret) - return ret; + if (ret == 0) + w->kcontrols[kci] = kcontrol; - w->kcontrols[kci] = kcontrol; +exit_free: + kfree(long_name); - return 0; + return ret; } /* create new dapm mixer control */ diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 330c9a6..8457ebb 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1129,13 +1129,36 @@ static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream) } } +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd); + +/* Set FE's runtime_update state; the state is protected via PCM stream lock + * for avoiding the race with trigger callback. + * If the state is unset and a trigger is pending while the previous operation, + * process the pending trigger action here. + */ +static void dpcm_set_fe_update_state(struct snd_soc_pcm_runtime *fe, + int stream, enum snd_soc_dpcm_update state) +{ + struct snd_pcm_substream *substream = + snd_soc_dpcm_get_substream(fe, stream); + + snd_pcm_stream_lock_irq(substream); + if (state == SND_SOC_DPCM_UPDATE_NO && fe->dpcm[stream].trigger_pending) { + dpcm_fe_dai_do_trigger(substream, + fe->dpcm[stream].trigger_pending - 1); + fe->dpcm[stream].trigger_pending = 0; + } + fe->dpcm[stream].runtime_update = state; + snd_pcm_stream_unlock_irq(substream); +} + static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) { struct snd_soc_pcm_runtime *fe = fe_substream->private_data; struct snd_pcm_runtime *runtime = fe_substream->runtime; int stream = fe_substream->stream, ret = 0; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); ret = dpcm_be_dai_startup(fe, fe_substream->stream); if (ret < 0) { @@ -1157,13 +1180,13 @@ static int dpcm_fe_dai_startup(struct snd_pcm_substream *fe_substream) dpcm_set_fe_runtime(fe_substream); snd_pcm_limit_hw_rates(runtime); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; unwind: dpcm_be_dai_startup_unwind(fe, fe_substream->stream); be_err: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -1210,7 +1233,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* shutdown the BEs */ dpcm_be_dai_shutdown(fe, substream->stream); @@ -1224,7 +1247,7 @@ static int dpcm_fe_dai_shutdown(struct snd_pcm_substream *substream) dpcm_dapm_stream_event(fe, stream, SND_SOC_DAPM_STREAM_STOP); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_CLOSE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return 0; } @@ -1272,7 +1295,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) int err, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); dev_dbg(fe->dev, "ASoC: hw_free FE %s\n", fe->dai_link->name); @@ -1287,7 +1310,7 @@ static int dpcm_fe_dai_hw_free(struct snd_pcm_substream *substream) err = dpcm_be_dai_hw_free(fe, stream); fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return 0; @@ -1380,7 +1403,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, int ret, stream = substream->stream; mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); memcpy(&fe->dpcm[substream->stream].hw_params, params, sizeof(struct snd_pcm_hw_params)); @@ -1403,7 +1426,7 @@ static int dpcm_fe_dai_hw_params(struct snd_pcm_substream *substream, fe->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_PARAMS; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; } @@ -1517,7 +1540,7 @@ static int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger); -static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *fe = substream->private_data; int stream = substream->stream, ret; @@ -1591,6 +1614,23 @@ out: return ret; } +static int dpcm_fe_dai_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *fe = substream->private_data; + int stream = substream->stream; + + /* if FE's runtime_update is already set, we're in race; + * process this trigger later at exit + */ + if (fe->dpcm[stream].runtime_update != SND_SOC_DPCM_UPDATE_NO) { + fe->dpcm[stream].trigger_pending = cmd + 1; + return 0; /* delayed, assuming it's successful */ + } + + /* we're alone, let's trigger */ + return dpcm_fe_dai_do_trigger(substream, cmd); +} + static int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm; @@ -1634,7 +1674,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) dev_dbg(fe->dev, "ASoC: prepare FE %s\n", fe->dai_link->name); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_FE); /* there is no point preparing this FE if there are no BEs */ if (list_empty(&fe->dpcm[stream].be_clients)) { @@ -1661,7 +1701,7 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream) fe->dpcm[stream].state = SND_SOC_DPCM_STATE_PREPARE; out: - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); mutex_unlock(&fe->card->mutex); return ret; @@ -1808,11 +1848,11 @@ static int dpcm_run_new_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_startup(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to startup some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -1821,11 +1861,11 @@ static int dpcm_run_old_update(struct snd_soc_pcm_runtime *fe, int stream) { int ret; - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_BE; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_BE); ret = dpcm_run_update_shutdown(fe, stream); if (ret < 0) dev_err(fe->dev, "ASoC: failed to shutdown some BEs\n"); - fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_NO; + dpcm_set_fe_update_state(fe, stream, SND_SOC_DPCM_UPDATE_NO); return ret; } @@ -1882,6 +1922,7 @@ int soc_dpcm_runtime_update(struct snd_soc_card *card) dpcm_be_disconnect(fe, SNDRV_PCM_STREAM_PLAYBACK); } + dpcm_path_put(&list); capture: /* skip if FE doesn't have capture capability */ if (!fe->cpu_dai->driver->capture.channels_min) diff --git a/sound/usb/card.c b/sound/usb/card.c index 64952e2..4476b90 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -307,6 +307,11 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) static int snd_usb_audio_free(struct snd_usb_audio *chip) { + struct list_head *p, *n; + + list_for_each_safe(p, n, &chip->ep_list) + snd_usb_endpoint_free(p); + mutex_destroy(&chip->mutex); kfree(chip); return 0; @@ -583,27 +588,30 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, struct snd_usb_audio *chip) { struct snd_card *card; - struct list_head *p, *n; + struct list_head *p; + bool was_shutdown; if (chip == (void *)-1L) return; card = chip->card; down_write(&chip->shutdown_rwsem); + was_shutdown = chip->shutdown; chip->shutdown = 1; up_write(&chip->shutdown_rwsem); mutex_lock(®ister_mutex); - chip->num_interfaces--; - if (chip->num_interfaces <= 0) { + if (!was_shutdown) { + struct snd_usb_endpoint *ep; + snd_card_disconnect(card); /* release the pcm resources */ list_for_each(p, &chip->pcm_list) { snd_usb_stream_disconnect(p); } /* release the endpoint resources */ - list_for_each_safe(p, n, &chip->ep_list) { - snd_usb_endpoint_free(p); + list_for_each_entry(ep, &chip->ep_list, list) { + snd_usb_endpoint_release(ep); } /* release the midi resources */ list_for_each(p, &chip->midi_list) { @@ -613,6 +621,10 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, list_for_each(p, &chip->mixer_list) { snd_usb_mixer_disconnect(p); } + } + + chip->num_interfaces--; + if (chip->num_interfaces <= 0) { usb_chip[chip->index] = NULL; mutex_unlock(®ister_mutex); snd_card_free_when_closed(card); diff --git a/sound/usb/card.h b/sound/usb/card.h index 5ecacaa..2d30a9e6 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -91,6 +91,7 @@ struct snd_usb_endpoint { unsigned int curframesize; /* current packet size in frames (for capture) */ unsigned int syncmaxsize; /* sync endpoint packet size */ unsigned int fill_max:1; /* fill max packet size always */ + unsigned int udh01_fb_quirk:1; /* corrupted feedback data */ unsigned int datainterval; /* log_2 of data packet interval */ unsigned int syncinterval; /* P for adaptive mode, 0 otherwise */ unsigned char silence_value; diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 93e970f..b0a0f20 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -470,6 +470,10 @@ struct snd_usb_endpoint *snd_usb_add_endpoint(struct snd_usb_audio *chip, ep->syncinterval = 3; ep->syncmaxsize = le16_to_cpu(get_endpoint(alts, 1)->wMaxPacketSize); + + if (chip->usb_id == USB_ID(0x0644, 0x8038) /* TEAC UD-H01 */ && + ep->syncmaxsize == 4) + ep->udh01_fb_quirk = 1; } list_add_tail(&ep->list, &chip->ep_list); @@ -956,19 +960,30 @@ int snd_usb_endpoint_deactivate(struct snd_usb_endpoint *ep) } /** + * snd_usb_endpoint_release: Tear down an snd_usb_endpoint + * + * @ep: the endpoint to release + * + * This function does not care for the endpoint's use count but will tear + * down all the streaming URBs immediately. + */ +void snd_usb_endpoint_release(struct snd_usb_endpoint *ep) +{ + release_urbs(ep, 1); +} + +/** * snd_usb_endpoint_free: Free the resources of an snd_usb_endpoint * * @ep: the list header of the endpoint to free * - * This function does not care for the endpoint's use count but will tear - * down all the streaming URBs immediately and free all resources. + * This free all resources of the given ep. */ void snd_usb_endpoint_free(struct list_head *head) { struct snd_usb_endpoint *ep; ep = list_entry(head, struct snd_usb_endpoint, list); - release_urbs(ep, 1); kfree(ep); } @@ -1078,7 +1093,16 @@ void snd_usb_handle_sync_urb(struct snd_usb_endpoint *ep, if (f == 0) return; - if (unlikely(ep->freqshift == INT_MIN)) { + if (unlikely(sender->udh01_fb_quirk)) { + /* + * The TEAC UD-H01 firmware sometimes changes the feedback value + * by +/- 0x1.0000. + */ + if (f < ep->freqn - 0x8000) + f += 0x10000; + else if (f > ep->freqn + 0x8000) + f -= 0x10000; + } else if (unlikely(ep->freqshift == INT_MIN)) { /* * The first time we see a feedback value, determine its format * by shifting it left or right until it matches the nominal diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 2287adf..fe65a38 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -21,6 +21,7 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); int snd_usb_endpoint_deactivate(struct snd_usb_endpoint *ep); +void snd_usb_endpoint_release(struct snd_usb_endpoint *ep); void snd_usb_endpoint_free(struct list_head *head); int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index b901f46..c7aa71e 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -364,6 +364,8 @@ static void snd_usbmidi_error_timer(unsigned long data) if (in && in->error_resubmit) { in->error_resubmit = 0; for (j = 0; j < INPUT_URBS; ++j) { + if (atomic_read(&in->urbs[j]->use_count)) + continue; in->urbs[j]->dev = umidi->dev; snd_usbmidi_submit_urb(in->urbs[j], GFP_ATOMIC); } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index be4db47..061be0e 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -886,6 +886,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): + case USB_ID(0x046d, 0x0819): /* Logitech Webcam C210 */ case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */ case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */ diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 0339d46..4df31b0 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -322,8 +322,11 @@ static struct usbmix_name_map hercules_usb51_map[] = { { 0 } /* terminator */ }; -static const struct usbmix_name_map kef_x300a_map[] = { - { 10, NULL }, /* firmware locks up (?) when we try to access this FU */ +/* some (all?) SCMS USB3318 devices are affected by a firmware lock up + * when anything attempts to access FU 10 (control) + */ +static const struct usbmix_name_map scms_usb3318_map[] = { + { 10, NULL }, { 0 } }; @@ -415,8 +418,14 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = ebox44_map, }, { + /* KEF X300A */ .id = USB_ID(0x27ac, 0x1000), - .map = kef_x300a_map, + .map = scms_usb3318_map, + }, + { + /* Arcam rPAC */ + .id = USB_ID(0x25c4, 0x0003), + .map = scms_usb3318_map, }, { 0 } /* terminator */ }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index d42a584..ea4b9a8 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -802,6 +802,11 @@ static int snd_ftu_eff_switch_put(struct snd_kcontrol *kctl, return changed; } +static void kctl_private_value_free(struct snd_kcontrol *kctl) +{ + kfree((void *)kctl->private_value); +} + static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, int validx, int bUnitID) { @@ -836,6 +841,7 @@ static int snd_ftu_create_effect_switch(struct usb_mixer_interface *mixer, return -ENOMEM; } + kctl->private_free = kctl_private_value_free; err = snd_ctl_add(mixer->chip->card, kctl); if (err < 0) return err; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index b375d58..98ca3540 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -1492,7 +1492,8 @@ static void retire_playback_urb(struct snd_usb_substream *subs, * on two reads of a counter updated every ms. */ if (abs(est_delay - subs->last_delay) * 1000 > runtime->rate * 2) - snd_printk(KERN_DEBUG "delay: estimated %d, actual %d\n", + dev_dbg_ratelimited(&subs->dev->dev, + "delay: estimated %d, actual %d\n", est_delay, subs->last_delay); if (!subs->running) { diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index f5f0595..83bddbd 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -72,22 +72,21 @@ } }, -/* Creative/Toshiba Multimedia Center SB-0500 */ +/* Creative/E-Mu devices */ { - USB_DEVICE(0x041e, 0x3048), + USB_DEVICE(0x041e, 0x3010), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { - .vendor_name = "Toshiba", - .product_name = "SB-0500", + .vendor_name = "Creative Labs", + .product_name = "Sound Blaster MP3+", .ifnum = QUIRK_NO_INTERFACE } }, - -/* Creative/E-Mu devices */ +/* Creative/Toshiba Multimedia Center SB-0500 */ { - USB_DEVICE(0x041e, 0x3010), + USB_DEVICE(0x041e, 0x3048), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { - .vendor_name = "Creative Labs", - .product_name = "Sound Blaster MP3+", + .vendor_name = "Toshiba", + .product_name = "SB-0500", .ifnum = QUIRK_NO_INTERFACE } }, @@ -386,6 +385,36 @@ YAMAHA_DEVICE(0x105d, NULL), } }, { + USB_DEVICE(0x0499, 0x1509), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + /* .vendor_name = "Yamaha", */ + /* .product_name = "Steinberg UR22", */ + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_YAMAHA + }, + { + .ifnum = 4, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, +{ USB_DEVICE(0x0499, 0x150a), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { /* .vendor_name = "Yamaha", */ @@ -1582,6 +1611,35 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, { + /* BOSS ME-25 */ + USB_DEVICE(0x0582, 0x0113), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = & (const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, +{ /* only 44.1 kHz works at the moment */ USB_DEVICE(0x0582, 0x0120), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { @@ -2521,6 +2579,46 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, { + USB_DEVICE(0x1235, 0x0010), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Focusrite", + .product_name = "Saffire 6 USB", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 4, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .attributes = UAC_EP_CS_ATTR_SAMPLE_RATE, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC, + .rates = SNDRV_PCM_RATE_44100 | + SNDRV_PCM_RATE_48000, + .rate_min = 44100, + .rate_max = 48000, + .nr_rates = 2, + .rate_table = (unsigned int[]) { + 44100, 48000 + } + } + }, + { + .ifnum = 1, + .type = QUIRK_MIDI_RAW_BYTES + }, + { + .ifnum = -1 + } + } + } +}, +{ USB_DEVICE(0x1235, 0x0018), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { .vendor_name = "Novation", @@ -2569,6 +2667,57 @@ YAMAHA_DEVICE(0x7010, "UB99"), .type = QUIRK_MIDI_NOVATION } }, +{ + /* + * Focusrite Scarlett 18i6 + * + * Avoid mixer creation, which otherwise fails because some of + * the interface descriptor subtypes for interface 0 are + * unknown. That should be fixed or worked-around but this at + * least allows the device to be used successfully with a DAW + * and an external mixer. See comments below about other + * ignored interfaces. + */ + USB_DEVICE(0x1235, 0x8004), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .vendor_name = "Focusrite", + .product_name = "Scarlett 18i6", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = & (const struct snd_usb_audio_quirk[]) { + { + /* InterfaceSubClass 1 (Control Device) */ + .ifnum = 0, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + /* InterfaceSubClass 1 (Control Device) */ + .ifnum = 3, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 4, + .type = QUIRK_MIDI_STANDARD_INTERFACE + }, + { + /* InterfaceSubClass 1 (Device Firmware Update) */ + .ifnum = 5, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, /* Access Music devices */ { @@ -2655,133 +2804,45 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, -/* Hauppauge HVR-950Q and HVR-850 */ -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7200), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7240), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-850", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7210), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7217), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721b), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721e), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721f), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7280), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x0fd9, 0x0008), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, +/* + * Auvitek au0828 devices with audio interface. + * This should be kept in sync with drivers/media/usb/au0828/au0828-cards.c + * Please notice that some drivers are DVB only, and don't need to be + * here. That's the case, for example, of DVICO_FUSIONHDTV7. + */ + +#define AU0828_DEVICE(vid, pid, vname, pname) { \ + USB_DEVICE_VENDOR_SPEC(vid, pid), \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_INT_CLASS | \ + USB_DEVICE_ID_MATCH_INT_SUBCLASS, \ + .bInterfaceClass = USB_CLASS_AUDIO, \ + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, \ + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { \ + .vendor_name = vname, \ + .product_name = pname, \ + .ifnum = QUIRK_ANY_INTERFACE, \ + .type = QUIRK_AUDIO_ALIGN_TRANSFER, \ + } \ +} + +AU0828_DEVICE(0x2040, 0x7200, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7240, "Hauppauge", "HVR-850"), +AU0828_DEVICE(0x2040, 0x7210, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7217, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721b, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721e, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721f, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7280, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7201, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x2040, 0x7211, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x2040, 0x7281, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x05e1, 0x0480, "Hauppauge", "Woodbury"), +AU0828_DEVICE(0x2040, 0x8200, "Hauppauge", "Woodbury"), +AU0828_DEVICE(0x2040, 0x7260, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7213, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), /* Digidesign Mbox */ { @@ -3054,58 +3115,6 @@ YAMAHA_DEVICE(0x7010, "UB99"), { /* - * Focusrite Scarlett 18i6 - * - * Avoid mixer creation, which otherwise fails because some of - * the interface descriptor subtypes for interface 0 are - * unknown. That should be fixed or worked-around but this at - * least allows the device to be used successfully with a DAW - * and an external mixer. See comments below about other - * ignored interfaces. - */ - USB_DEVICE(0x1235, 0x8004), - .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { - .vendor_name = "Focusrite", - .product_name = "Scarlett 18i6", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_COMPOSITE, - .data = & (const struct snd_usb_audio_quirk[]) { - { - /* InterfaceSubClass 1 (Control Device) */ - .ifnum = 0, - .type = QUIRK_IGNORE_INTERFACE - }, - { - .ifnum = 1, - .type = QUIRK_AUDIO_STANDARD_INTERFACE - }, - { - .ifnum = 2, - .type = QUIRK_AUDIO_STANDARD_INTERFACE - }, - { - /* InterfaceSubClass 1 (Control Device) */ - .ifnum = 3, - .type = QUIRK_IGNORE_INTERFACE - }, - { - .ifnum = 4, - .type = QUIRK_MIDI_STANDARD_INTERFACE - }, - { - /* InterfaceSubClass 1 (Device Firmware Update) */ - .ifnum = 5, - .type = QUIRK_IGNORE_INTERFACE - }, - { - .ifnum = -1 - } - } - } -}, - -{ - /* * Some USB MIDI devices don't have an audio control interface, * so we have to grab MIDI streaming interfaces here. */ diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 0df9ede..8bea686 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -662,8 +662,9 @@ static int snd_usb_cm6206_boot_quirk(struct usb_device *dev) /* * Novation Twitch DJ controller + * Focusrite Novation Saffire 6 USB audio card */ -static int snd_usb_twitch_boot_quirk(struct usb_device *dev) +static int snd_usb_novation_boot_quirk(struct usb_device *dev) { /* preemptively set up the device because otherwise the * raw MIDI endpoints are not active */ @@ -972,9 +973,9 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev, /* Digidesign Mbox 2 */ return snd_usb_mbox2_boot_quirk(dev); - case USB_ID(0x1235, 0x0018): - /* Focusrite Novation Twitch */ - return snd_usb_twitch_boot_quirk(dev); + case USB_ID(0x1235, 0x0010): /* Focusrite Novation Saffire 6 USB */ + case USB_ID(0x1235, 0x0018): /* Focusrite Novation Twitch */ + return snd_usb_novation_boot_quirk(dev); case USB_ID(0x133e, 0x0815): /* Access Music VirusTI Desktop */ @@ -1127,6 +1128,20 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, if ((le16_to_cpu(dev->descriptor.idVendor) == 0x23ba) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); + + /* Marantz/Denon devices with USB DAC functionality need a delay + * after each class compliant request + */ + if ((le16_to_cpu(dev->descriptor.idVendor) == 0x154e) && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) { + + switch (le16_to_cpu(dev->descriptor.idProduct)) { + case 0x3005: /* Marantz HD-DAC1 */ + case 0x3006: /* Marantz SA-14S1 */ + mdelay(20); + break; + } + } } /* diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ce8dc61..d326fec 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -35,6 +35,7 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; + u32 nr_unordered_events; }; enum hist_column { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9d78c70..532a6a3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -681,8 +681,7 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event, return -ETIME; if (timestamp < s->ordered_samples.last_flush) { - printf("Warning: Timestamp below last timeslice flush\n"); - return -EINVAL; + s->stats.nr_unordered_events++; } if (!list_empty(sc)) { @@ -1168,6 +1167,8 @@ static void perf_session__warn_about_errors(const struct perf_session *session, "Do you have a KVM guest running and not using 'perf kvm'?\n", session->stats.nr_unprocessable_samples); } + if (session->stats.nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); } volatile int session_done; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9f3eae2..2d9ab94 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,6 +4,7 @@ TARGETS += efivarfs TARGETS += kcmp TARGETS += memory-hotplug TARGETS += mqueue +TARGETS += mount TARGETS += net TARGETS += ptrace TARGETS += timers diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile new file mode 100644 index 0000000..337d853 --- /dev/null +++ b/tools/testing/selftests/mount/Makefile @@ -0,0 +1,17 @@ +# Makefile for mount selftests. + +all: unprivileged-remount-test + +unprivileged-remount-test: unprivileged-remount-test.c + gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + +# Allow specific tests to be selected. +test_unprivileged_remount: unprivileged-remount-test + @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi + +run_tests: all test_unprivileged_remount + +clean: + rm -f unprivileged-remount-test + +.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c new file mode 100644 index 0000000..5177850 --- /dev/null +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -0,0 +1,370 @@ +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <grp.h> +#include <stdbool.h> +#include <stdarg.h> + +#ifndef CLONE_NEWNS +# define CLONE_NEWNS 0x00020000 +#endif +#ifndef CLONE_NEWUTS +# define CLONE_NEWUTS 0x04000000 +#endif +#ifndef CLONE_NEWIPC +# define CLONE_NEWIPC 0x08000000 +#endif +#ifndef CLONE_NEWNET +# define CLONE_NEWNET 0x40000000 +#endif +#ifndef CLONE_NEWUSER +# define CLONE_NEWUSER 0x10000000 +#endif +#ifndef CLONE_NEWPID +# define CLONE_NEWPID 0x20000000 +#endif + +#ifndef MS_REC +# define MS_REC 16384 +#endif +#ifndef MS_RELATIME +# define MS_RELATIME (1 << 21) +#endif +#ifndef MS_STRICTATIME +# define MS_STRICTATIME (1 << 24) +#endif + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + die("vsnprintf failed: %s\n", + strerror(errno)); + } + if (buf_len >= sizeof(buf)) { + die("vsnprintf output truncated\n"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + die("open of %s failed: %s\n", + filename, strerror(errno)); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + if (close(fd) != 0) { + die("close of %s failed: %s\n", + filename, strerror(errno)); + } +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); + +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) { + die("statvfs of %s failed: %s\n", + path, strerror(errno)); + } + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ + ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ + ST_SYNCHRONOUS | ST_MANDLOCK)) { + die("Unrecognized mount flags\n"); + } + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + +static void create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER) !=0) { + die("unshare(CLONE_NEWUSER) failed: %s\n", + strerror(errno)); + } + + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgid(0) != 0) { + die ("setgid(0) failed %s\n", + strerror(errno)); + } + if (setuid(0) != 0) { + die("setuid(0) failed %s\n", + strerror(errno)); + } +} + +static +bool test_unpriv_remount(const char *fstype, const char *mount_options, + int mount_flags, int remount_flags, int invalid_flags) +{ + pid_t child; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + create_and_enter_userns(); + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { + die("mount of %s with options '%s' on /tmp failed: %s\n", + fstype, + mount_options? mount_options : "", + strerror(errno)); + } + + create_and_enter_userns(); + + if (unshare(CLONE_NEWNS) != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + if (mount("/tmp", "/tmp", "none", + MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp with invalid flags " + "succeeded unexpectedly\n"); + } + exit(EXIT_SUCCESS); +} + +static bool test_unpriv_remount_simple(int mount_flags) +{ + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); +} + +static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) +{ + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, + invalid_flags); +} + +static bool test_priv_mount_unpriv_remount(void) +{ + pid_t child; + int ret; + const char *orig_path = "/dev"; + const char *dest_path = "/tmp"; + int orig_mnt_flags, remount_mnt_flags; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + orig_mnt_flags = read_mnt_flags(orig_path); + + create_and_enter_userns(); + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); + if (ret != 0) { + die("recursive bind mount of %s onto %s failed: %s\n", + orig_path, dest_path, strerror(errno)); + } + + ret = mount(dest_path, dest_path, "none", + MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); + if (ret != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + remount_mnt_flags = read_mnt_flags(dest_path); + if (orig_mnt_flags != remount_mnt_flags) { + die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", + dest_path, orig_path); + } + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + if (!test_unpriv_remount_simple(MS_RDONLY)) { + die("MS_RDONLY malfunctions\n"); + } + if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { + die("MS_NODEV malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOSUID)) { + die("MS_NOSUID malfunctions\n"); + } + if (!test_unpriv_remount_simple(MS_NOEXEC)) { + die("MS_NOEXEC malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME, + MS_NOATIME)) + { + die("MS_RELATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME, + MS_NOATIME)) + { + die("MS_STRICTATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME, + MS_STRICTATIME)) + { + die("MS_NOATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, + MS_NOATIME)) + { + die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, + MS_NOATIME)) + { + die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); + } + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, + MS_STRICTATIME)) + { + die("MS_NOATIME|MS_DIRATIME malfunctions\n"); + } + if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) + { + die("Default atime malfunctions\n"); + } + if (!test_priv_mount_unpriv_remount()) { + die("Mount flags unexpectedly changed after remount\n"); + } + return EXIT_SUCCESS; +} diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c index fe1e66b..a87e99f 100644 --- a/tools/usb/ffs-test.c +++ b/tools/usb/ffs-test.c @@ -116,8 +116,8 @@ static const struct { .header = { .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC), .length = cpu_to_le32(sizeof descriptors), - .fs_count = 3, - .hs_count = 3, + .fs_count = cpu_to_le32(3), + .hs_count = cpu_to_le32(3), }, .fs_descs = { .intf = { diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 685fc72..b001dbf 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -751,6 +751,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) case 0: if (!target_cpus) return; + break; case 1: target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 39dc5bc..5eaf18f 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -203,10 +203,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, spin_lock(&ioapic->lock); for (index = 0; index < IOAPIC_NUM_PINS; index++) { e = &ioapic->redirtbl[index]; - if (!e->fields.mask && - (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || - kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, - index) || index == RTC_GSI)) { + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || + kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || + index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode)) { __set_bit(e->fields.vector, diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index c329c8f..a650aa4 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, - unsigned long size) + unsigned long npages) { gfn_t end_gfn; pfn_t pfn; pfn = gfn_to_pfn_memslot(slot, gfn); - end_gfn = gfn + (size >> PAGE_SHIFT); + end_gfn = gfn + npages; gfn += 1; if (is_error_noslot_pfn(pfn)) @@ -61,6 +61,14 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, return pfn; } +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { gfn_t gfn, end_gfn; @@ -111,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. */ - pfn = kvm_pin_pages(slot, gfn, page_size); + pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); if (is_error_noslot_pfn(pfn)) { gfn += 1; continue; @@ -123,6 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); + kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); goto unmap_pages; } @@ -134,7 +143,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) return 0; unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); return r; } @@ -272,14 +281,6 @@ out_unlock: return r; } -static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) - kvm_release_pfn_clean(pfn + i); -} - static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aac732d..b9bf294 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,6 +52,7 @@ #include <asm/processor.h> #include <asm/io.h> +#include <asm/ioctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -673,8 +674,7 @@ static void sort_memslots(struct kvm_memslots *slots) slots->id_to_index[slots->memslots[i].id] = i; } -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation) +void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) { if (new) { int id = new->id; @@ -685,8 +685,6 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, if (new->npages != npages) sort_memslots(slots); } - - slots->generation = last_generation + 1; } static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) @@ -708,10 +706,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, { struct kvm_memslots *old_memslots = kvm->memslots; - update_memslots(slots, new, kvm->memslots->generation); + /* + * Set the low bit in the generation, which disables SPTE caching + * until the end of synchronize_srcu_expedited. + */ + WARN_ON(old_memslots->generation & 1); + slots->generation = old_memslots->generation + 1; + + update_memslots(slots, new); rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); + /* + * Increment the new memslot generation a second time. This prevents + * vm exits that race with memslot updates from caching a memslot + * generation that will (potentially) be valid forever. + */ + slots->generation++; + kvm_arch_memslots_updated(kvm); return old_memslots; @@ -1970,6 +1982,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) + return -EINVAL; + #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* * Special cases: vcpu ioctls that are asynchronous to vcpu execution, |