From a6e2f029ae34f41adb6ae3812c32c5d326e1abd2 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 12:48:40 -0400 Subject: Make asm/word-at-a-time.h available on all architectures Added the x86 implementation of word-at-a-time to the generic version, which previously only supported big-endian. Omitted the x86-specific load_unaligned_zeropad(), which in any case is also not present for the existing BE-only implementation of a word-at-a-time, and is only used under CONFIG_DCACHE_WORD_ACCESS. Added as a "generic-y" to the Kbuilds of all architectures that didn't previously have it. Signed-off-by: Chris Metcalf diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 1a80cc9..c8f07ce 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -47,4 +47,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 1d66afd..519810d 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -19,4 +19,5 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 07051a6..c80181e 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -45,4 +45,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 7aeb322..2f697020 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -58,4 +58,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d294f6a..09916c8 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -25,4 +25,5 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 30edce3..2c987dc 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 5ade4a1..0988816 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -57,4 +57,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index ccff13d..dc05773 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h +generic-y += word-at-a-time.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index ba1cdc0..219e54b 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += module.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 199320f..611c0df 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -53,4 +53,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9989ddb..cefeaba 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7fe5c61..dee810f 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -17,4 +17,5 @@ generic-y += serial.h generic-y += trace_clock.h generic-y += ucontext.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index de30b0c..27cbc02 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 434639d..e224789 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -60,4 +60,5 @@ generic-y += types.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 050712e..d9c0b44 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += preempt.h generic-y += rwsem.h generic-y += trace_clock.h generic-y += vtime.h +generic-y += word-at-a-time.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index dc5385e..9e8089b 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 138fb3d..ff19975 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -12,3 +12,4 @@ generic-y += sections.h generic-y += trace_clock.h generic-y += xor.h generic-y += serial.h +generic-y += word-at-a-time.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index d536544..ea59330 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -38,4 +38,5 @@ generic-y += termbits.h generic-y += termios.h generic-y += trace_clock.h generic-y += types.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 3d63ff6..33c1d3e 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -24,4 +24,5 @@ generic-y += preempt.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index d12b377..932070c 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -61,4 +61,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 14d15bf..85acffd 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -28,4 +28,5 @@ generic-y += statfs.h generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index 94f9ea8..011dde0 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -1,15 +1,10 @@ #ifndef _ASM_WORD_AT_A_TIME_H #define _ASM_WORD_AT_A_TIME_H -/* - * This says "generic", but it's actually big-endian only. - * Little-endian can use more efficient versions of these - * interfaces, see for example - * arch/x86/include/asm/word-at-a-time.h - * for those. - */ - #include +#include + +#ifdef __BIG_ENDIAN struct word_at_a_time { const unsigned long high_bits, low_bits; @@ -53,4 +48,73 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #define zero_bytemask(mask) (~1ul << __fls(mask)) #endif +#else + +/* + * The optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) +{ + return count_masked_bytes(mask); +} + +#endif /* __BIG_ENDIAN */ + #endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v0.10.2 From 30035e45753b708e7d47a98398500ca005e02b86 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 12:52:04 -0400 Subject: string: provide strscpy() The strscpy() API is intended to be used instead of strlcpy(), and instead of most uses of strncpy(). - Unlike strlcpy(), it doesn't read from memory beyond (src + size). - Unlike strlcpy() or strncpy(), the API provides an easy way to check for destination buffer overflow: an -E2BIG error return value. - The provided implementation is robust in the face of the source buffer being asynchronously changed during the copy, unlike the current implementation of strlcpy(). - Unlike strncpy(), the destination buffer will be NUL-terminated if the string in the source buffer is too long. - Also unlike strncpy(), the destination buffer will not be updated beyond the NUL termination, avoiding strncpy's behavior of zeroing the entire tail end of the destination buffer. (A memset() after the strscpy() can be used if this behavior is desired.) - The implementation should be reasonably performant on all platforms since it uses the asm/word-at-a-time.h API rather than simple byte copy. Kernel-to-kernel string copy is not considered to be performance critical in any case. Signed-off-by: Chris Metcalf diff --git a/include/linux/string.h b/include/linux/string.h index a8d90db..9ef7795 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -25,6 +25,9 @@ extern char * strncpy(char *,const char *, __kernel_size_t); #ifndef __HAVE_ARCH_STRLCPY size_t strlcpy(char *, const char *, size_t); #endif +#ifndef __HAVE_ARCH_STRSCPY +ssize_t __must_check strscpy(char *, const char *, size_t); +#endif #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif diff --git a/lib/string.c b/lib/string.c index 13d1e84..8dbb7b1 100644 --- a/lib/string.c +++ b/lib/string.c @@ -27,6 +27,10 @@ #include #include +#include +#include +#include + #ifndef __HAVE_ARCH_STRNCASECMP /** * strncasecmp - Case insensitive, length-limited string comparison @@ -146,6 +150,90 @@ size_t strlcpy(char *dest, const char *src, size_t size) EXPORT_SYMBOL(strlcpy); #endif +#ifndef __HAVE_ARCH_STRSCPY +/** + * strscpy - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. + * The routine returns the number of characters copied (not including + * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. + * The behavior is undefined if the string buffers overlap. + * The destination buffer is always NUL terminated, unless it's zero-sized. + * + * Preferred to strlcpy() since the API doesn't require reading memory + * from the src string beyond the specified "count" bytes, and since + * the return value is easier to error-check than strlcpy()'s. + * In addition, the implementation is robust to the string changing out + * from underneath it, unlike the current strlcpy() implementation. + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() + * with an overflow test, then just memset() the tail of the dest buffer. + */ +ssize_t strscpy(char *dest, const char *src, size_t count) +{ + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + size_t max = count; + long res = 0; + + if (count == 0) + return -E2BIG; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* + * If src is unaligned, don't cross a page boundary, + * since we don't know if the next page is mapped. + */ + if ((long)src & (sizeof(long) - 1)) { + size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1)); + if (limit < max) + max = limit; + } +#else + /* If src or dest is unaligned, don't do word-at-a-time. */ + if (((long) dest | (long) src) & (sizeof(long) - 1)) + max = 0; +#endif + + while (max >= sizeof(unsigned long)) { + unsigned long c, data; + + c = *(unsigned long *)(src+res); + *(unsigned long *)(dest+res) = c; + if (has_zero(c, &data, &constants)) { + data = prep_zero_mask(c, data, &constants); + data = create_zero_mask(data); + return res + find_zero(data); + } + res += sizeof(unsigned long); + count -= sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (count) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Hit buffer length without finding a NUL; force NUL-termination. */ + if (res) + dest[res-1] = '\0'; + + return -E2BIG; +} +EXPORT_SYMBOL(strscpy); +#endif + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another -- cgit v0.10.2 From 30059d494a72603d066baf55c748803df968aa08 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 13:07:38 -0400 Subject: tile: use global strscpy() rather than private copy Now that strscpy() is a standard API, remove the local copy. Signed-off-by: Chris Metcalf diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c index ee186e1..f102048 100644 --- a/arch/tile/gxio/mpipe.c +++ b/arch/tile/gxio/mpipe.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -29,32 +30,6 @@ /* HACK: Avoid pointless "shadow" warnings. */ #define link link_shadow -/** - * strscpy - Copy a C-string into a sized buffer, but only if it fits - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Use this routine to avoid copying too-long strings. - * The routine returns the total number of bytes copied - * (including the trailing NUL) or zero if the buffer wasn't - * big enough. To ensure that programmers pay attention - * to the return code, the destination has a single NUL - * written at the front (if size is non-zero) when the - * buffer is not big enough. - */ -static size_t strscpy(char *dest, const char *src, size_t size) -{ - size_t len = strnlen(src, size) + 1; - if (len > size) { - if (size) - dest[0] = '\0'; - return 0; - } - memcpy(dest, src, len); - return len; -} - int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index) { char file[32]; @@ -540,7 +515,7 @@ int gxio_mpipe_link_instance(const char *link_name) if (!context) return GXIO_ERR_NO_DEVICE; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; return gxio_mpipe_info_instance_aux(context, name); @@ -559,7 +534,7 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac) rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac); if (rv >= 0) { - if (strscpy(link_name, name.name, sizeof(name.name)) == 0) + if (strscpy(link_name, name.name, sizeof(name.name)) < 0) return GXIO_ERR_INVAL_MEMORY_SIZE; memcpy(link_mac, mac.mac, sizeof(mac.mac)); } @@ -576,7 +551,7 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link, _gxio_mpipe_link_name_t name; int rv; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; rv = gxio_mpipe_link_open_aux(context, name, flags); -- cgit v0.10.2