From 78b7280cce23293f7570ad52c1ffe1485c6d9669 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2011 17:57:23 +0000 Subject: KEYS: Improve /proc/keys Improve /proc/keys by: (1) Don't attempt to summarise the payload of a negated key. It won't have one. To this end, a helper function - key_is_instantiated() has been added that allows the caller to find out whether the key is positively instantiated (as opposed to being uninstantiated or negatively instantiated). (2) Do show keys that are negative, expired or revoked rather than hiding them. This requires an override flag (no_state_check) to be passed to search_my_process_keyrings() and keyring_search_aux() to suppress this check. Without this, keys that are possessed by the caller, but only grant permissions to the caller if possessed are skipped as the possession check fails. Keys that are visible due to user, group or other checks are visible with or without this patch. Signed-off-by: David Howells Signed-off-by: James Morris diff --git a/include/linux/key.h b/include/linux/key.h index b2bb017..ef19b99 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -276,6 +276,19 @@ static inline key_serial_t key_serial(struct key *key) return key ? key->serial : 0; } +/** + * key_is_instantiated - Determine if a key has been positively instantiated + * @key: The key to check. + * + * Return true if the specified key has been positively instantiated, false + * otherwise. + */ +static inline bool key_is_instantiated(const struct key *key) +{ + return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && + !test_bit(KEY_FLAG_NEGATIVE, &key->flags); +} + #define rcu_dereference_key(KEY) \ (rcu_dereference_protected((KEY)->payload.rcudata, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index cfa7a5e..fa000d2 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) int err = key->type_data.x[0]; seq_puts(m, key->description); - if (err) - seq_printf(m, ": %d", err); - else - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) { + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); + } } /* diff --git a/security/keys/internal.h b/security/keys/internal.h index 07a025f..f375152 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -109,11 +109,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, const struct cred *cred, struct key_type *type, const void *description, - key_match_func_t match); + key_match_func_t match, + bool no_state_check); extern key_ref_t search_my_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, + bool no_state_check, const struct cred *cred); extern key_ref_t search_process_keyrings(struct key_type *type, const void *description, diff --git a/security/keys/keyring.c b/security/keys/keyring.c index cdd2f3f..a06ffab 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -176,13 +176,15 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) else seq_puts(m, "[anon]"); - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) - seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); - else - seq_puts(m, ": empty"); - rcu_read_unlock(); + if (key_is_instantiated(keyring)) { + rcu_read_lock(); + klist = rcu_dereference(keyring->payload.subscriptions); + if (klist) + seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); + else + seq_puts(m, ": empty"); + rcu_read_unlock(); + } } /* @@ -271,6 +273,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * @type: The type of key to search for. * @description: Parameter for @match. * @match: Function to rule on whether or not a key is the one required. + * @no_state_check: Don't check if a matching key is bad * * Search the supplied keyring tree for a key that matches the criteria given. * The root keyring and any linked keyrings must grant Search permission to the @@ -303,7 +306,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, const struct cred *cred, struct key_type *type, const void *description, - key_match_func_t match) + key_match_func_t match, + bool no_state_check) { struct { struct keyring_list *keylist; @@ -345,6 +349,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, kflags = keyring->flags; if (keyring->type == type && match(keyring, description)) { key = keyring; + if (no_state_check) + goto found; /* check it isn't negative and hasn't expired or been * revoked */ @@ -384,11 +390,13 @@ descend: continue; /* skip revoked keys and expired keys */ - if (kflags & (1 << KEY_FLAG_REVOKED)) - continue; + if (!no_state_check) { + if (kflags & (1 << KEY_FLAG_REVOKED)) + continue; - if (key->expiry && now.tv_sec >= key->expiry) - continue; + if (key->expiry && now.tv_sec >= key->expiry) + continue; + } /* keys that don't match */ if (!match(key, description)) @@ -399,6 +407,9 @@ descend: cred, KEY_SEARCH) < 0) continue; + if (no_state_check) + goto found; + /* we set a different error code if we pass a negative key */ if (kflags & (1 << KEY_FLAG_NEGATIVE)) { err = key->type_data.reject_error; @@ -478,7 +489,7 @@ key_ref_t keyring_search(key_ref_t keyring, return ERR_PTR(-ENOKEY); return keyring_search_aux(keyring, current->cred, - type, description, type->match); + type, description, type->match, false); } EXPORT_SYMBOL(keyring_search); diff --git a/security/keys/proc.c b/security/keys/proc.c index 525cf8a..49bbc97 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -199,7 +199,7 @@ static int proc_keys_show(struct seq_file *m, void *v) if (key->perm & KEY_POS_VIEW) { skey_ref = search_my_process_keyrings(key->type, key, lookup_user_key_possessed, - cred); + true, cred); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 930634e..6c0480d 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -331,6 +331,7 @@ void key_fsgid_changed(struct task_struct *tsk) key_ref_t search_my_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, + bool no_state_check, const struct cred *cred) { key_ref_t key_ref, ret, err; @@ -350,7 +351,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, if (cred->thread_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->thread_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -371,7 +372,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->tgcred->process_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -395,7 +396,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, make_key_ref(rcu_dereference( cred->tgcred->session_keyring), 1), - cred, type, description, match); + cred, type, description, match, no_state_check); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -417,7 +418,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, else if (cred->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->user->session_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -459,7 +460,8 @@ key_ref_t search_process_keyrings(struct key_type *type, might_sleep(); - key_ref = search_my_process_keyrings(type, description, match, cred); + key_ref = search_my_process_keyrings(type, description, match, + false, cred); if (!IS_ERR(key_ref)) goto found; err = key_ref; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index df3c041..b18a717 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -530,8 +530,7 @@ struct key *request_key_and_link(struct key_type *type, dest_keyring, flags); /* search all the process keyrings for a key */ - key_ref = search_process_keyrings(type, description, type->match, - cred); + key_ref = search_process_keyrings(type, description, type->match, cred); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6816403..f6337c9 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -59,7 +59,8 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); + if (key_is_instantiated(key)) + seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index c6ca866..63bb1aa 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -169,8 +169,8 @@ EXPORT_SYMBOL_GPL(user_destroy); void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) + seq_printf(m, ": %u", key->datalen); } EXPORT_SYMBOL_GPL(user_describe); -- cgit v0.10.2 From 4aab1e896a0a9d57420ff2867caa5a369123d8cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2011 17:57:33 +0000 Subject: KEYS: Make request_key() and co. return an error for a negative key Make request_key() and co. return an error for a negative or rejected key. If the key was simply negated, then return ENOKEY, otherwise return the error with which it was rejected. Without this patch, the following command returns a key number (with the latest keyutils): [root@andromeda ~]# keyctl request2 user debug:foo rejected @s 586569904 Trying to print the key merely gets you a permission denied error: [root@andromeda ~]# keyctl print 586569904 keyctl_read_alloc: Permission denied Doing another request_key() call does get you the error, as long as it hasn't expired yet: [root@andromeda ~]# keyctl request user debug:foo request_key: Key was rejected by service Signed-off-by: David Howells Signed-off-by: James Morris diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 427fddc..eca5191 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -206,8 +206,14 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type, goto error5; } + /* wait for the key to finish being constructed */ + ret = wait_for_key_construction(key, 1); + if (ret < 0) + goto error6; + ret = key->serial; +error6: key_put(key); error5: key_type_put(ktype); -- cgit v0.10.2 From 5806896019ceaa0a1e808182afb4bba33c948ad6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 14 Mar 2011 19:32:21 -0400 Subject: security: select correct default LSM_MMAP_MIN_ADDR on ARM. The default for this is universally set to 64k, but the help says: For most ia64, ppc64 and x86 users with lots of address space a value of 65536 is reasonable and should cause no problems. On arm and other archs it should not be higher than 32768. The text is right, in that we are seeing selinux-enabled ARM targets that fail to launch /sbin/init because selinux blocks a memory map. So select the right value if we know we are building ARM. Signed-off-by: Paul Gortmaker Signed-off-by: James Morris diff --git a/security/Kconfig b/security/Kconfig index 95accd4..e0f08b5 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -167,6 +167,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX + default 32768 if ARM default 65536 help This is the portion of low virtual memory which should be protected -- cgit v0.10.2 From b299eb5cde1a91706c450804006c6559b0826df8 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Thu, 3 Mar 2011 04:30:13 +0530 Subject: ACPI:Fix goto flows in thermal-sys This patch fixes two minor bugs in thermal_sys: (a) The flow of goto's in thermal_hwmon_add_sysfs. (b) Remove the temp*_crit only if there is a get_crit_temp defined, in thermal_remove_hwmon_sysfs. Signed-off-by: Durgadoss R Signed-off-by: Len Brown diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 713b7ea..0b1c82a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -499,7 +499,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) dev_set_drvdata(hwmon->device, hwmon); result = device_create_file(hwmon->device, &dev_attr_name); if (result) - goto unregister_hwmon_device; + goto free_mem; register_sys_interface: tz->hwmon = hwmon; @@ -513,7 +513,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) sysfs_attr_init(&tz->temp_input.attr.attr); result = device_create_file(hwmon->device, &tz->temp_input.attr); if (result) - goto unregister_hwmon_device; + goto unregister_name; if (tz->ops->get_crit_temp) { unsigned long temperature; @@ -527,7 +527,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) result = device_create_file(hwmon->device, &tz->temp_crit.attr); if (result) - goto unregister_hwmon_device; + goto unregister_input; } } @@ -539,9 +539,9 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return 0; - unregister_hwmon_device: - device_remove_file(hwmon->device, &tz->temp_crit.attr); + unregister_input: device_remove_file(hwmon->device, &tz->temp_input.attr); + unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); @@ -560,7 +560,8 @@ thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) tz->hwmon = NULL; device_remove_file(hwmon->device, &tz->temp_input.attr); - device_remove_file(hwmon->device, &tz->temp_crit.attr); + if (tz->ops->get_crit_temp) + device_remove_file(hwmon->device, &tz->temp_crit.attr); mutex_lock(&thermal_list_lock); list_del(&tz->hwmon_node); -- cgit v0.10.2 From 0415b00d175e0d8945e6785aad21b5f157976ce0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Mar 2011 18:50:09 +0100 Subject: percpu: Always align percpu output section to PAGE_SIZE Percpu allocator honors alignment request upto PAGE_SIZE and both the percpu addresses in the percpu address space and the translated kernel addresses should be aligned accordingly. The calculation of the former depends on the alignment of percpu output section in the kernel image. The linker script macros PERCPU_VADDR() and PERCPU() are used to define this output section and the latter takes @align parameter. Several architectures are using @align smaller than PAGE_SIZE breaking percpu memory alignment. This patch removes @align parameter from PERCPU(), renames it to PERCPU_SECTION() and makes it always align to PAGE_SIZE. While at it, add PCPU_SETUP_BUG_ON() checks such that alignment problems are reliably detected and remove percpu alignment comment recently added in workqueue.c as the condition would trigger BUG way before reaching there. For um, this patch raises the alignment of percpu area. As the area is in .init, there shouldn't be any noticeable difference. This problem was discovered by David Howells while debugging boot failure on mn10300. Signed-off-by: Tejun Heo Acked-by: Mike Frysinger Cc: uclinux-dist-devel@blackfin.uclinux.org Cc: David Howells Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a..8d57948 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ SECTIONS __init_begin = ALIGN(PAGE_SIZE); INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page needed for the THREAD_SIZE aligned init_task gets freed after init */ . = ALIGN(THREAD_SIZE); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b4348e6..e5287f2 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -82,7 +82,7 @@ SECTIONS #endif } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) #ifndef CONFIG_XIP_KERNEL . = ALIGN(PAGE_SIZE); diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 854fa49..8d85c8c 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -136,7 +136,7 @@ SECTIONS . = ALIGN(16); INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .exit.data : { diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 728bbd9..a6990cb 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -102,7 +102,7 @@ SECTIONS #endif __vmlinux_end = .; /* Last address of the physical file. */ #ifdef CONFIG_ETRAX_ARCH_V32 - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .init.ramfs : { INIT_RAM_FS diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 0daae8a..7e958d8 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -37,7 +37,7 @@ SECTIONS _einittext = .; INIT_DATA_SECTION(8) - PERCPU(L1_CACHE_BYTES, 4096) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index c194d64..2e7ccf7 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -53,7 +53,7 @@ SECTIONS __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 832afbb..8616709 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -115,7 +115,7 @@ SECTIONS EXIT_DATA } - PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE) + PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 968bcd2..6f702a6 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -70,7 +70,7 @@ SECTIONS .exit.text : { EXIT_TEXT; } .exit.data : { EXIT_DATA; } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 8f1e4ef..85b8661 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS EXIT_DATA } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b9150f0..920276c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 1bc18cd..56fe6bc 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -77,7 +77,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - PERCPU(0x100, PAGE_SIZE) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index af4d461..731c10c 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -66,7 +66,7 @@ SECTIONS __machvec_end = .; } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* * .exit.text is discarded at runtime, not link time, to deal with diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 92b557a..c022075 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -108,7 +108,7 @@ SECTIONS __sun4v_2insn_patch_end = .; } - PERCPU(SMP_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 38f64fa..631f10d 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -60,7 +60,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 - PERCPU(L2_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 34bede8..4938de5 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -42,7 +42,7 @@ INIT_SETUP(0) } - PERCPU(32, 32) + PERCPU_SECTION(32) .initcall.init : { INIT_CALLS diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 624a201..3e9fb5d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -319,7 +319,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif . = ALIGN(PAGE_SIZE); diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index a282006..88ecea3 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -155,7 +155,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE) + PERCPU_SECTION(XCHAL_ICACHE_LINESIZE) /* We need this dummy segment here */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5..f301cea 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -15,7 +15,7 @@ * HEAD_TEXT_SECTION * INIT_TEXT_SECTION(PAGE_SIZE) * INIT_DATA_SECTION(...) - * PERCPU(CACHELINE_SIZE, PAGE_SIZE) + * PERCPU_SECTION(CACHELINE_SIZE) * __init_end = .; * * _stext = .; @@ -709,7 +709,7 @@ * * Note that this macros defines __per_cpu_load as an absolute symbol. * If there is no need to put the percpu section at a predetermined - * address, use PERCPU(). + * address, use PERCPU_SECTION. */ #define PERCPU_VADDR(cacheline, vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -729,20 +729,19 @@ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); /** - * PERCPU - define output section for percpu area, simple version + * PERCPU_SECTION - define output section for percpu area, simple version * @cacheline: cacheline size - * @align: required alignment * - * Align to @align and outputs output section for percpu area. This macro - * doesn't manipulate @vaddr or @phdr and __per_cpu_load and + * Align to PAGE_SIZE and outputs output section for percpu area. This + * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. * - * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,) + * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,) * except that __per_cpu_load is defined as a relative symbol against * .data..percpu which is required for relocatable x86_32 configuration. */ -#define PERCPU(cacheline, align) \ - . = ALIGN(align); \ +#define PERCPU_SECTION(cacheline) \ + . = ALIGN(PAGE_SIZE); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 04ef830..d30a502 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2860,9 +2860,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) } } - /* just in case, make sure it's actually aligned - * - this is affected by PERCPU() alignment in vmlinux.lds.S - */ + /* just in case, make sure it's actually aligned */ BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); return wq->cpu_wq.v ? 0 : -ENOMEM; } diff --git a/mm/percpu.c b/mm/percpu.c index 3f93001..c5feb79 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1216,8 +1216,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); #ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); + PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK); #endif PCPU_SETUP_BUG_ON(!base_addr); + PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); -- cgit v0.10.2 From 9f63b88bd7a1ac1afbb4358772a39abaeddbdd13 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:34 +0800 Subject: ACPI: osl, add acpi_os_create_lock interface Signed-off-by: Lin Ming Signed-off-by: Len Brown diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c90c76a..cf750a7 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -883,14 +883,6 @@ void acpi_os_wait_events_complete(void *context) EXPORT_SYMBOL(acpi_os_wait_events_complete); -/* - * Deallocate the memory for a spinlock. - */ -void acpi_os_delete_lock(acpi_spinlock handle) -{ - return; -} - acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { @@ -1322,6 +1314,31 @@ int acpi_resources_are_enforced(void) EXPORT_SYMBOL(acpi_resources_are_enforced); /* + * Create and initialize a spinlock. + */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle) +{ + spinlock_t *lock; + + lock = ACPI_ALLOCATE(sizeof(spinlock_t)); + if (!lock) + return AE_NO_MEMORY; + spin_lock_init(lock); + *out_handle = lock; + + return AE_OK; +} + +/* + * Deallocate the memory for a spinlock. + */ +void acpi_os_delete_lock(acpi_spinlock handle) +{ + ACPI_FREE(handle); +} + +/* * Acquire a spinlock. * * handle is a pointer to the spinlock_t. diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a3252a5..a756bc8 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -98,6 +98,9 @@ acpi_os_table_override(struct acpi_table_header *existing_table, /* * Spinlock primitives */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle); + void acpi_os_delete_lock(acpi_spinlock handle); acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle); -- cgit v0.10.2 From 3854c8e32f46ffa6ee0bf2eb01137f5a48b2754f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:35 +0800 Subject: ACPICA: Use acpi_os_create_lock interface Replace spin_lock_init with acpi_os_create_lock. Signed-off-by: Lin Ming Signed-off-by: Len Brown diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index d69750b..6d512fc 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -226,12 +226,9 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; * Spinlocks are used for interfaces that can be possibly called at * interrupt level */ -ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */ -#define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock -#define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock -#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock +ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ +ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ +ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index a946c68..519d4ee 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -83,9 +83,20 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ - spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); - spin_lock_init(acpi_ev_global_lock_pending_lock); + status = acpi_os_create_lock (&acpi_gbl_gpe_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); -- cgit v0.10.2 From 749c27639b95c5c4a8185e02a4efb189034944ed Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:36 +0800 Subject: ACPICA: Fix code divergence of global lock handling Commit 9cd0314(ACPI / ACPICA: Fix global lock acquisition) was backported into ACPICA code base, and some divergence was introduced. This patch fixed it, - rename acpi_ev_global_lock_pending/acpi_ev_global_lock_pending_lock to acpi_gbl_global_lock_pending/acpi_gbl_global_lock_pending_lock. - move the initialization of acpi_gbl_global_lock_pending_lock from acpi_ut_mutex_initialize to acpi_ev_init_global_lock_handler. Signed-off-by: Lin Ming Reviewed-by: Rafael J. Wysocki Signed-off-by: Len Brown diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 6d512fc..73863d86 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -214,13 +214,16 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[ACPI_NUM_MUTEX]; /* * Global lock mutex is an actual AML mutex object - * Global lock semaphore works in conjunction with the HW global lock + * Global lock semaphore works in conjunction with the actual global lock + * Global lock spinlock is used for "pending" handshake */ ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex; ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore; +ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock; ACPI_EXTERN u16 acpi_gbl_global_lock_handle; ACPI_EXTERN u8 acpi_gbl_global_lock_acquired; ACPI_EXTERN u8 acpi_gbl_global_lock_present; +ACPI_EXTERN u8 acpi_gbl_global_lock_pending; /* * Spinlocks are used for interfaces that can be possibly called at @@ -228,7 +231,6 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; */ ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 7dc8094..69a3b4a 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -284,39 +284,41 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) * RETURN: ACPI_INTERRUPT_HANDLED * * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there's a thread waiting for - * the global lock, signal it. - * - * NOTE: Assumes that the semaphore can be signaled from interrupt level. If - * this is not possible for some reason, a separate thread will have to be - * scheduled to do this. + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. * ******************************************************************************/ -static u8 acpi_ev_global_lock_pending; static u32 acpi_ev_global_lock_handler(void *context) { acpi_status status; acpi_cpu_flags flags; - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - if (!acpi_ev_global_lock_pending) { - goto out; + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; } - /* Send a unit to the semaphore */ - + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); } - acpi_ev_global_lock_pending = FALSE; + acpi_gbl_global_lock_pending = FALSE; - out: - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); +cleanup_and_exit: + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return (ACPI_INTERRUPT_HANDLED); } @@ -350,14 +352,20 @@ acpi_status acpi_ev_init_global_lock_handler(void) * Map to AE_OK, but mark global lock as not present. Any attempt to * actually use the global lock will be flagged with an error. */ + acpi_gbl_global_lock_present = FALSE; if (status == AE_NO_HARDWARE_RESPONSE) { ACPI_ERROR((AE_INFO, "No response from Global Lock hardware, disabling lock")); - acpi_gbl_global_lock_present = FALSE; return_ACPI_STATUS(AE_OK); } + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; acpi_gbl_global_lock_present = TRUE; return_ACPI_STATUS(status); } @@ -414,7 +422,7 @@ static int acpi_ev_global_lock_acquired; acpi_status acpi_ev_acquire_global_lock(u16 timeout) { acpi_cpu_flags flags; - acpi_status status = AE_OK; + acpi_status status; u8 acquired = FALSE; ACPI_FUNCTION_TRACE(ev_acquire_global_lock); @@ -458,15 +466,15 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) } /* - * Make sure that a global lock actually exists. If not, just treat the - * lock as a standard mutex. + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. */ if (!acpi_gbl_global_lock_present) { acpi_gbl_global_lock_acquired = TRUE; return_ACPI_STATUS(AE_OK); } - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); do { @@ -475,20 +483,19 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); if (acquired) { acpi_gbl_global_lock_acquired = TRUE; - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Acquired hardware Global Lock\n")); break; } - acpi_ev_global_lock_pending = TRUE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - /* - * Did not get the lock. The pending bit was set above, and we - * must wait until we get the global lock released interrupt. + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Waiting for hardware Global Lock\n")); @@ -496,17 +503,16 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) * Wait for handshake with the global lock interrupt handler. * This interface releases the interpreter if we must wait. */ - status = acpi_ex_system_wait_semaphore( - acpi_gbl_global_lock_semaphore, - ACPI_WAIT_FOREVER); + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); } while (ACPI_SUCCESS(status)); - acpi_ev_global_lock_pending = FALSE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 519d4ee..7d797e2 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -93,11 +93,6 @@ acpi_status acpi_ut_mutex_initialize(void) return_ACPI_STATUS (status); } - status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); - if (ACPI_FAILURE (status)) { - return_ACPI_STATUS (status); - } - /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); if (ACPI_FAILURE(status)) { -- cgit v0.10.2 From 4bcad6c1ef53a9a0224f4654ceb3b9030d0769ec Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 24 Mar 2011 13:56:47 +0000 Subject: dlm: Remove superfluous call to recalc_sigpending() recalc_sigpending() is called within sigprocmask(), so there is no need call it again after sigprocmask() has returned. Signed-off-by: Matt Fleming Signed-off-by: David Teigland diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d5ab3fe..e96bf3e 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf, out_sig: sigprocmask(SIG_SETMASK, &tmpsig, NULL); - recalc_sigpending(); out_free: kfree(kbuf); return error; -- cgit v0.10.2 From 787e5b06a80e7fc9dc02d9b53a9d8d2ac63b7ace Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 23 Mar 2011 08:23:52 +0100 Subject: percpu: Cast away printk format warning On 32-bit systems which don't happen to implicitly define or cast VMALLOC_START and/or VMALLOC_END to long in their arch headers, the printk in the percpu code will cause a warning to be emitted: mm/percpu.c: In function 'pcpu_embed_first_chunk': mm/percpu.c:1648: warning: format '%lx' expects type 'long unsigned int', but argument 3 has type 'unsigned int' So add an explicit cast to unsigned long here. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index 3f93001..8a11cd2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1646,8 +1646,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, /* warn if maximum distance is further than 75% of vmalloc space */ if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " - "space 0x%lx\n", - max_distance, VMALLOC_END - VMALLOC_START); + "space 0x%lx\n", max_distance, + (unsigned long)(VMALLOC_END - VMALLOC_START)); #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /* and fail if we have fallback */ rc = -EINVAL; -- cgit v0.10.2 From 5f55924deaa62d6df687c131fb92aebe071ec787 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Mar 2011 18:06:58 +0200 Subject: percpu: Avoid extra NOP in percpu_cmpxchg16b_double percpu_cmpxchg16b_double() uses alternative_io() and looks like : e8 .. .. .. .. call this_cpu_cmpxchg16b_emu X bytes NOPX or, once patched (if cpu supports native instruction) on SMP build : 65 48 0f c7 0e cmpxchg16b %gs:(%rsi) 0f 94 c0 sete %al on !SMP build : 48 0f c7 0e cmpxchg16b (%rsi) 0f 94 c0 sete %al Therefore, NOPX should be : P6_NOP3 on SMP P6_NOP2 on !SMP Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Cc: Ingo Molnar Cc: Pekka Enberg Signed-off-by: Tejun Heo diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b43..d68fca6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -509,6 +509,11 @@ do { \ * it in software. The address used in the cmpxchg16 instruction must be * aligned to a 16 byte boundary. */ +#ifdef CONFIG_SMP +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP3 +#else +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP2 +#endif #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ ({ \ char __ret; \ @@ -517,7 +522,7 @@ do { \ typeof(o2) __o2 = o2; \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ - alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + alternative_io(CMPXCHG16B_EMU_CALL, \ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ -- cgit v0.10.2 From 8023976cf4627d9f1d82ad468ec40e32eb87d211 Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:51:56 +0800 Subject: SELinux: Add class support to the role_trans structure If kernel policy version is >= 26, then the binary representation of the role_trans structure supports specifying the class for the current subject or the newly created object. If kernel policy version is < 26, then the class field would be default to the process class. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 348eb00..bfc5218 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -30,13 +30,14 @@ #define POLICYDB_VERSION_PERMISSIVE 23 #define POLICYDB_VERSION_BOUNDARY 24 #define POLICYDB_VERSION_FILENAME_TRANS 25 +#define POLICYDB_VERSION_ROLETRANS 26 /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX #define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE #else -#define POLICYDB_VERSION_MAX POLICYDB_VERSION_FILENAME_TRANS +#define POLICYDB_VERSION_MAX POLICYDB_VERSION_ROLETRANS #endif /* Mask for just the mount related flags */ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index e7b850a..fd62c50 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -128,6 +128,11 @@ static struct policydb_compat_info policydb_compat[] = { .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, + { + .version = POLICYDB_VERSION_ROLETRANS, + .sym_num = SYM_NUM, + .ocon_num = OCON_NUM, + }, }; static struct policydb_compat_info *policydb_lookup_compat(int version) @@ -2302,8 +2307,17 @@ int policydb_read(struct policydb *p, void *fp) tr->role = le32_to_cpu(buf[0]); tr->type = le32_to_cpu(buf[1]); tr->new_role = le32_to_cpu(buf[2]); + if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) { + rc = next_entry(buf, fp, sizeof(u32)); + if (rc) + goto bad; + tr->tclass = le32_to_cpu(buf[0]); + } else + tr->tclass = p->process_class; + if (!policydb_role_isvalid(p, tr->role) || !policydb_type_isvalid(p, tr->type) || + !policydb_class_isvalid(p, tr->tclass) || !policydb_role_isvalid(p, tr->new_role)) goto bad; ltr = tr; diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 732ea4a..801175f 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -72,7 +72,8 @@ struct role_datum { struct role_trans { u32 role; /* current role */ - u32 type; /* program executable type */ + u32 type; /* program executable type, or new object type */ + u32 tclass; /* process class, or new object class */ u32 new_role; /* new role */ struct role_trans *next; }; -- cgit v0.10.2 From 63a312ca55d09a3f6526919df495fff1073c88f4 Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:51:58 +0800 Subject: SELinux: Compute role in newcontext for all classes Apply role_transition rules for all kinds of classes. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 3e7544d..03f7a47 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1484,17 +1484,15 @@ static int security_compute_sid(u32 ssid, tcontext->type, tclass, qstr); /* Check for class-specific changes. */ - if (tclass == policydb.process_class) { - if (specified & AVTAB_TRANSITION) { - /* Look for a role transition rule. */ - for (roletr = policydb.role_tr; roletr; - roletr = roletr->next) { - if (roletr->role == scontext->role && - roletr->type == tcontext->type) { - /* Use the role transition rule. */ - newcontext.role = roletr->new_role; - break; - } + if (specified & AVTAB_TRANSITION) { + /* Look for a role transition rule. */ + for (roletr = policydb.role_tr; roletr; roletr = roletr->next) { + if ((roletr->role == scontext->role) && + (roletr->type == tcontext->type) && + (roletr->tclass == tclass)) { + /* Use the role transition rule. */ + newcontext.role = roletr->new_role; + break; } } } -- cgit v0.10.2 From c900ff323d761753a56d8d6a67b034ceee277b6e Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:52:00 +0800 Subject: SELinux: Write class field in role_trans_write. If kernel policy version is >= 26, then write the class field of the role_trans structure into the binary reprensentation. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index fd62c50..a493eae 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2535,8 +2535,9 @@ static int cat_write(void *vkey, void *datum, void *ptr) return 0; } -static int role_trans_write(struct role_trans *r, void *fp) +static int role_trans_write(struct policydb *p, void *fp) { + struct role_trans *r = p->role_tr; struct role_trans *tr; u32 buf[3]; size_t nel; @@ -2556,6 +2557,12 @@ static int role_trans_write(struct role_trans *r, void *fp) rc = put_entry(buf, sizeof(u32), 3, fp); if (rc) return rc; + if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) { + buf[0] = cpu_to_le32(tr->tclass); + rc = put_entry(buf, sizeof(u32), 1, fp); + if (rc) + return rc; + } } return 0; @@ -3267,7 +3274,7 @@ int policydb_write(struct policydb *p, void *fp) if (rc) return rc; - rc = role_trans_write(p->role_tr, fp); + rc = role_trans_write(p, fp); if (rc) return rc; -- cgit v0.10.2 -- cgit v0.10.2 From cfc64fd91fabed099a4c3df58559f4b7efe9bcce Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 31 Mar 2011 00:27:32 +0900 Subject: tomoyo: fix memory leak in tomoyo_commit_ok() When memory used for policy exceeds the quota, tomoyo_memory_ok() return false. In this case, tomoyo_commit_ok() must call kfree() before returning NULL. This bug exists since 2.6.35. Signed-off-by: Xiaochen Wang Acked-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/security/tomoyo/memory.c b/security/tomoyo/memory.c index 2976126..42a7b1b 100644 --- a/security/tomoyo/memory.c +++ b/security/tomoyo/memory.c @@ -75,6 +75,7 @@ void *tomoyo_commit_ok(void *data, const unsigned int size) memset(data, 0, size); return ptr; } + kfree(ptr); return NULL; } -- cgit v0.10.2 From de6efe0a966cf86b3c4039a610b2d4157db707f2 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Mar 2011 23:09:52 +0800 Subject: spi/dw_spi: unify the low level read/write routines The original version has many duplicated codes for null/u8/u16 case, so unify them to make it cleaner Signed-off-by: Feng Tang Signed-off-by: Grant Likely diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 9a61964..c4fca3d 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -58,8 +58,6 @@ struct chip_data { u8 bits_per_word; u16 clk_div; /* baud rate divider */ u32 speed_hz; /* baud rate */ - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); void (*cs_control)(u32 command); }; @@ -185,80 +183,45 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static int null_writer(struct dw_spi *dws) -{ - u8 n_bytes = dws->n_bytes; - - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - dw_writew(dws, dr, 0); - dws->tx += n_bytes; - wait_till_not_busy(dws); - return 1; -} - -static int null_reader(struct dw_spi *dws) +static int dw_writer(struct dw_spi *dws) { - u8 n_bytes = dws->n_bytes; + u16 txw = 0; - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - dw_readw(dws, dr); - dws->rx += n_bytes; - } - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; -} - -static int u8_writer(struct dw_spi *dws) -{ if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) || (dws->tx == dws->tx_end)) return 0; - dw_writew(dws, dr, *(u8 *)(dws->tx)); - ++dws->tx; - - wait_till_not_busy(dws); - return 1; -} - -static int u8_reader(struct dw_spi *dws) -{ - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - *(u8 *)(dws->rx) = dw_readw(dws, dr); - ++dws->rx; + /* Set the tx word if the transfer's original "tx" is not null */ + if (dws->tx_end - dws->len) { + if (dws->n_bytes == 1) + txw = *(u8 *)(dws->tx); + else + txw = *(u16 *)(dws->tx); } - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; -} - -static int u16_writer(struct dw_spi *dws) -{ - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - - dw_writew(dws, dr, *(u16 *)(dws->tx)); - dws->tx += 2; + dw_writew(dws, dr, txw); + dws->tx += dws->n_bytes; wait_till_not_busy(dws); return 1; } -static int u16_reader(struct dw_spi *dws) +static int dw_reader(struct dw_spi *dws) { - u16 temp; + u16 rxw; while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) && (dws->rx < dws->rx_end)) { - temp = dw_readw(dws, dr); - *(u16 *)(dws->rx) = temp; - dws->rx += 2; + rxw = dw_readw(dws, dr); + /* Care rx only if the transfer's original "rx" is not null */ + if (dws->rx_end - dws->len) { + if (dws->n_bytes == 1) + *(u8 *)(dws->rx) = rxw; + else + *(u16 *)(dws->rx) = rxw; + } + dws->rx += dws->n_bytes; } wait_till_not_busy(dws); @@ -383,8 +346,8 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) left = (left > int_level) ? int_level : left; while (left--) - dws->write(dws); - dws->read(dws); + dw_writer(dws); + dw_reader(dws); /* Re-enable the IRQ if there is still data left to tx */ if (dws->tx_end > dws->tx) @@ -417,13 +380,13 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id) /* Must be called inside pump_transfers() */ static void poll_transfer(struct dw_spi *dws) { - while (dws->write(dws)) - dws->read(dws); + while (dw_writer(dws)) + dw_reader(dws); /* * There is a possibility that the last word of a transaction * will be lost if data is not ready. Re-read to solve this issue. */ - dws->read(dws); + dw_reader(dws); dw_spi_xfer_done(dws); } @@ -483,8 +446,6 @@ static void pump_transfers(unsigned long data) dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; - dws->write = dws->tx ? chip->write : null_writer; - dws->read = dws->rx ? chip->read : null_reader; dws->cs_change = transfer->cs_change; dws->len = dws->cur_transfer->len; if (chip != dws->prev_chip) @@ -520,18 +481,10 @@ static void pump_transfers(unsigned long data) case 8: dws->n_bytes = 1; dws->dma_width = 1; - dws->read = (dws->read != null_reader) ? - u8_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u8_writer : null_writer; break; case 16: dws->n_bytes = 2; dws->dma_width = 2; - dws->read = (dws->read != null_reader) ? - u16_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u16_writer : null_writer; break; default: printk(KERN_ERR "MRST SPI0: unsupported bits:" @@ -733,13 +686,9 @@ static int dw_spi_setup(struct spi_device *spi) if (spi->bits_per_word <= 8) { chip->n_bytes = 1; chip->dma_width = 1; - chip->read = u8_reader; - chip->write = u8_writer; } else if (spi->bits_per_word <= 16) { chip->n_bytes = 2; chip->dma_width = 2; - chip->read = u16_reader; - chip->write = u16_writer; } else { /* Never take >16b case for MRST SPIC */ dev_err(&spi->dev, "invalid wordsize\n"); diff --git a/drivers/spi/dw_spi.h b/drivers/spi/dw_spi.h index fb0bce5..d8aac1f 100644 --- a/drivers/spi/dw_spi.h +++ b/drivers/spi/dw_spi.h @@ -137,8 +137,6 @@ struct dw_spi { u8 max_bits_per_word; /* maxim is 16b */ u32 dma_width; int cs_change; - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); irqreturn_t (*transfer_handler)(struct dw_spi *dws); void (*cs_control)(u32 command); -- cgit v0.10.2 From 8a33a373e5ffb6040c58ff41ea48ba21d5f8b5e9 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:53 +0800 Subject: spi/dw_spi: remove the un-necessary flush() The flush() is used to drain all the left data in rx fifo, currently is is always called together with disabling hw. But from spec, disabling hw will also reset all the fifo, so flush() is not needed. Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index c4fca3d..d3aaf8d 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -173,17 +173,6 @@ static void wait_till_not_busy(struct dw_spi *dws) "DW SPI: Status keeps busy for 5000us after a read/write!\n"); } -static void flush(struct dw_spi *dws) -{ - while (dw_readw(dws, sr) & SR_RF_NOT_EMPT) { - dw_readw(dws, dr); - cpu_relax(); - } - - wait_till_not_busy(dws); -} - - static int dw_writer(struct dw_spi *dws) { u16 txw = 0; @@ -297,8 +286,7 @@ static void giveback(struct dw_spi *dws) static void int_error_stop(struct dw_spi *dws, const char *msg) { - /* Stop and reset hw */ - flush(dws); + /* Stop the hw */ spi_enable_chip(dws, 0); dev_err(&dws->master->dev, "%s\n", msg); @@ -800,7 +788,6 @@ static void spi_hw_init(struct dw_spi *dws) spi_enable_chip(dws, 0); spi_mask_intr(dws, 0xff); spi_enable_chip(dws, 1); - flush(dws); /* * Try to detect the FIFO depth if not set by interface driver, -- cgit v0.10.2 From 2ff271bf6505038d8c937e73438ea3c80c387439 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:54 +0800 Subject: spi/dw_spi: change poll mode transfer from byte ops to batch ops Current poll transfer will read/write one word, then wait till the hw is non-busy, it's not efficient. This patch will try to read/write as many words as permitted by hardware FIFO depth. Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index d3aaf8d..7a2a722 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -160,6 +160,37 @@ static inline void mrst_spi_debugfs_remove(struct dw_spi *dws) } #endif /* CONFIG_DEBUG_FS */ +/* Return the max entries we can fill into tx fifo */ +static inline u32 tx_max(struct dw_spi *dws) +{ + u32 tx_left, tx_room, rxtx_gap; + + tx_left = (dws->tx_end - dws->tx) / dws->n_bytes; + tx_room = dws->fifo_len - dw_readw(dws, txflr); + + /* + * Another concern is about the tx/rx mismatch, we + * though to use (dws->fifo_len - rxflr - txflr) as + * one maximum value for tx, but it doesn't cover the + * data which is out of tx/rx fifo and inside the + * shift registers. So a control from sw point of + * view is taken. + */ + rxtx_gap = ((dws->rx_end - dws->rx) - (dws->tx_end - dws->tx)) + / dws->n_bytes; + + return min3(tx_left, tx_room, (u32) (dws->fifo_len - rxtx_gap)); +} + +/* Return the max entries we should read out of rx fifo */ +static inline u32 rx_max(struct dw_spi *dws) +{ + u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes; + + return min(rx_left, (u32)dw_readw(dws, rxflr)); +} + + static void wait_till_not_busy(struct dw_spi *dws) { unsigned long end = jiffies + 1 + usecs_to_jiffies(5000); @@ -175,33 +206,30 @@ static void wait_till_not_busy(struct dw_spi *dws) static int dw_writer(struct dw_spi *dws) { + u32 max = tx_max(dws); u16 txw = 0; - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - - /* Set the tx word if the transfer's original "tx" is not null */ - if (dws->tx_end - dws->len) { - if (dws->n_bytes == 1) - txw = *(u8 *)(dws->tx); - else - txw = *(u16 *)(dws->tx); + while (max--) { + /* Set the tx word if the transfer's original "tx" is not null */ + if (dws->tx_end - dws->len) { + if (dws->n_bytes == 1) + txw = *(u8 *)(dws->tx); + else + txw = *(u16 *)(dws->tx); + } + dw_writew(dws, dr, txw); + dws->tx += dws->n_bytes; } - dw_writew(dws, dr, txw); - dws->tx += dws->n_bytes; - - wait_till_not_busy(dws); return 1; } static int dw_reader(struct dw_spi *dws) { + u32 max = rx_max(dws); u16 rxw; - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { + while (max--) { rxw = dw_readw(dws, dr); /* Care rx only if the transfer's original "rx" is not null */ if (dws->rx_end - dws->len) { @@ -213,7 +241,6 @@ static int dw_reader(struct dw_spi *dws) dws->rx += dws->n_bytes; } - wait_till_not_busy(dws); return dws->rx == dws->rx_end; } @@ -368,13 +395,11 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id) /* Must be called inside pump_transfers() */ static void poll_transfer(struct dw_spi *dws) { - while (dw_writer(dws)) + do { + dw_writer(dws); dw_reader(dws); - /* - * There is a possibility that the last word of a transaction - * will be lost if data is not ready. Re-read to solve this issue. - */ - dw_reader(dws); + cpu_relax(); + } while (dws->rx_end > dws->rx); dw_spi_xfer_done(dws); } -- cgit v0.10.2 From 3b8a4dd3ebfcc647260ad5c39ef4f73eb3a6b155 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:55 +0800 Subject: spi/dw_spi: improve the interrupt mode with the batch ops leverage the performance gain by change in low level read/write batch operations Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 7a2a722..855ac4a 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -190,21 +190,7 @@ static inline u32 rx_max(struct dw_spi *dws) return min(rx_left, (u32)dw_readw(dws, rxflr)); } - -static void wait_till_not_busy(struct dw_spi *dws) -{ - unsigned long end = jiffies + 1 + usecs_to_jiffies(5000); - - while (time_before(jiffies, end)) { - if (!(dw_readw(dws, sr) & SR_BUSY)) - return; - cpu_relax(); - } - dev_err(&dws->master->dev, - "DW SPI: Status keeps busy for 5000us after a read/write!\n"); -} - -static int dw_writer(struct dw_spi *dws) +static void dw_writer(struct dw_spi *dws) { u32 max = tx_max(dws); u16 txw = 0; @@ -220,11 +206,9 @@ static int dw_writer(struct dw_spi *dws) dw_writew(dws, dr, txw); dws->tx += dws->n_bytes; } - - return 1; } -static int dw_reader(struct dw_spi *dws) +static void dw_reader(struct dw_spi *dws) { u32 max = rx_max(dws); u16 rxw; @@ -240,8 +224,6 @@ static int dw_reader(struct dw_spi *dws) } dws->rx += dws->n_bytes; } - - return dws->rx == dws->rx_end; } static void *next_transfer(struct dw_spi *dws) @@ -340,35 +322,28 @@ EXPORT_SYMBOL_GPL(dw_spi_xfer_done); static irqreturn_t interrupt_transfer(struct dw_spi *dws) { - u16 irq_status, irq_mask = 0x3f; - u32 int_level = dws->fifo_len / 2; - u32 left; + u16 irq_status = dw_readw(dws, isr); - irq_status = dw_readw(dws, isr) & irq_mask; /* Error handling */ if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) { dw_readw(dws, txoicr); dw_readw(dws, rxoicr); dw_readw(dws, rxuicr); - int_error_stop(dws, "interrupt_transfer: fifo overrun"); + int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun"); return IRQ_HANDLED; } + dw_reader(dws); + if (dws->rx_end == dws->rx) { + spi_mask_intr(dws, SPI_INT_TXEI); + dw_spi_xfer_done(dws); + return IRQ_HANDLED; + } if (irq_status & SPI_INT_TXEI) { spi_mask_intr(dws, SPI_INT_TXEI); - - left = (dws->tx_end - dws->tx) / dws->n_bytes; - left = (left > int_level) ? int_level : left; - - while (left--) - dw_writer(dws); - dw_reader(dws); - - /* Re-enable the IRQ if there is still data left to tx */ - if (dws->tx_end > dws->tx) - spi_umask_intr(dws, SPI_INT_TXEI); - else - dw_spi_xfer_done(dws); + dw_writer(dws); + /* Enable TX irq always, it will be disabled when RX finished */ + spi_umask_intr(dws, SPI_INT_TXEI); } return IRQ_HANDLED; @@ -377,15 +352,13 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) static irqreturn_t dw_spi_irq(int irq, void *dev_id) { struct dw_spi *dws = dev_id; - u16 irq_status, irq_mask = 0x3f; + u16 irq_status = dw_readw(dws, isr) & 0x3f; - irq_status = dw_readw(dws, isr) & irq_mask; if (!irq_status) return IRQ_NONE; if (!dws->cur_msg) { spi_mask_intr(dws, SPI_INT_TXEI); - /* Never fail */ return IRQ_HANDLED; } @@ -492,12 +465,8 @@ static void pump_transfers(unsigned long data) switch (bits) { case 8: - dws->n_bytes = 1; - dws->dma_width = 1; - break; case 16: - dws->n_bytes = 2; - dws->dma_width = 2; + dws->n_bytes = dws->dma_width = bits >> 3; break; default: printk(KERN_ERR "MRST SPI0: unsupported bits:" @@ -541,7 +510,7 @@ static void pump_transfers(unsigned long data) txint_level = dws->fifo_len / 2; txint_level = (templen > txint_level) ? txint_level : templen; - imask |= SPI_INT_TXEI; + imask |= SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI; dws->transfer_handler = interrupt_transfer; } -- cgit v0.10.2 From 84adccfb8cd2a6b8237da6752668ba25cd90c20b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Mar 2011 11:32:15 +0530 Subject: dmaengine/dw_dmac fix: dwc_scan_descriptors must compare first desc address also with llp dwc_scan_descriptors scans all descriptors from active_list in case transfer is not completed. It compares first_desc->lli.llp, and then all childrens of its tx_list. But it doesn't compare its own address, i.e. first_desc->txd.phys, as this is what we have initially programmed into the controller register. So this causes dma to stop and finish a transfer, which was never started. And thus fail. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 9c25c7d..b15c32c 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -304,6 +304,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* check first descriptors addr */ + if (desc->txd.phys == llp) + return; + + /* check first descriptors llp */ if (desc->lli.llp == llp) /* This one is currently in progress */ return; -- cgit v0.10.2 From e2ec771a99a5cf231c9dea4da26238bf073e1e9c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:52 +0800 Subject: dma: use BUG_ON correctly in iop-adma.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/iop-adma.c. Cc: Dan Williams Cc: Vinod Koul Signed-off-by: Coly Li Signed-off-by: Vinod Koul diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index c6b01f5..e03f811 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -619,7 +619,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -652,7 +652,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -686,7 +686,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u flags: %lx\n", -- cgit v0.10.2 From 7912d30007d0c958bcf11cd5ce19f77856cf041b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:53 +0800 Subject: dma: use BUG_ON correctly in mv_xor.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/mv_xor.c Cc: Vinod Koul Cc: Dan Williams Signed-off-by: Coly Li Signed-off-by: Vinod Koul diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index a25f5f6..954e334 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -671,7 +671,7 @@ mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memcpy_slot_count(len); @@ -710,7 +710,7 @@ mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memset_slot_count(len); @@ -744,7 +744,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan->device->common.dev, "%s src_cnt: %d len: dest %x %u flags: %ld\n", -- cgit v0.10.2 From 427cdf19b97e509e21e5d347e18d8b0b34723dfc Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:54 +0800 Subject: dma: use BUG_ON correctly in ppc4xx/adam.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/ppc4xx/adam.c Cc: Vinod Koul Cc: Dan Williams Cc: Grant Likely Cc: Anatolij Gustschin Cc: Sean MacLennan Cc: Joe Perches Signed-off-by: Coly Li Signed-off-by: Vinod Koul diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index cef5845..a2b62e9 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -2313,7 +2313,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2354,7 +2354,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2397,7 +2397,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( dma_dest, dma_src, src_cnt)); if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(ppc440spe_chan->device->common.dev, "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", @@ -2887,7 +2887,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, dst, src, src_cnt)); BUG_ON(!len); - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); BUG_ON(!src_cnt); if (src_cnt == 1 && dst[1] == src[0]) { -- cgit v0.10.2 From 1cb7b1e0de6a1f8f071f4a146e3d10f3a662f707 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 31 Mar 2011 13:36:38 +0200 Subject: ACPI EC: remove dead code static void acpi_ec_gpe_query(void *ec_cxt); -> The function is right above this declaration -> not needed. poll_force is also not used, cleaned up in ec.c and its users: compal-laptop and msi-laptop. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4..b3f1d6f 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -69,7 +69,6 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_CDELAY 10 /* Wait 10us before polling EC */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ #define ACPI_EC_STORM_THRESHOLD 8 /* number of false interrupts @@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write); int ec_transaction(u8 command, const u8 * wdata, unsigned wdata_len, - u8 * rdata, unsigned rdata_len, - int force_poll) + u8 * rdata, unsigned rdata_len) { struct transaction t = {.command = command, .wdata = wdata, .rdata = rdata, @@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt) mutex_unlock(&ec->lock); } -static void acpi_ec_gpe_query(void *ec_cxt); - static int ec_check_sci(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 034572b..f4f43e6 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -200,7 +200,7 @@ static bool extra_features; * watching the output of address 0x4F (do an ec_transaction writing 0x33 * into 0x4F and read a few bytes from the output, like so: * u8 writeData = 0x33; - * ec_transaction(0x4F, &writeData, 1, buffer, 32, 0); + * ec_transaction(0x4F, &writeData, 1, buffer, 32); * That address is labled "fan1 table information" in the service manual. * It should be clear which value in 'buffer' changes). This seems to be * related to fan speed. It isn't a proper 'realtime' fan speed value @@ -286,7 +286,7 @@ static int get_backlight_level(void) static void set_backlight_state(bool on) { u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA; - ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0); + ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0); } @@ -294,24 +294,24 @@ static void set_backlight_state(bool on) static void pwm_enable_control(void) { unsigned char writeData = PWM_ENABLE_DATA; - ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0); } static void pwm_disable_control(void) { unsigned char writeData = PWM_DISABLE_DATA; - ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0); } static void set_pwm(int pwm) { - ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0); + ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0); } static int get_fan_rpm(void) { u8 value, data = FAN_DATA; - ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0); + ec_transaction(FAN_ADDRESS, &data, 1, &value, 1); return 100 * (int)value; } diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 7e9bb6d..918a65d 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -120,7 +120,7 @@ static int set_lcd_level(int level) buf[1] = (u8) (level*31); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), - NULL, 0, 1); + NULL, 0); } static int get_lcd_level(void) @@ -129,7 +129,7 @@ static int get_lcd_level(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -142,7 +142,7 @@ static int get_auto_brightness(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -157,7 +157,7 @@ static int set_auto_brightness(int enable) wdata[0] = 4; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -165,7 +165,7 @@ static int set_auto_brightness(int enable) wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, - NULL, 0, 1); + NULL, 0); } static ssize_t set_device_state(const char *buf, size_t count, u8 mask) @@ -202,7 +202,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) u8 wdata = 0, rdata; int result; - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1); + result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) return -1; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e..1deb2a7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) -- cgit v0.10.2 From e2142df7ec7184ed4a77ada686bc1eb41075490f Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Thu, 31 Mar 2011 11:02:43 -0700 Subject: intel_mid_dma: fix runtime pm issues Use the correct api in probe to enable runtime pm for this driver. Additionally, do not just call legacy suspend for runtime_suspend, as this duplicates some work the pci core does for you. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Vinod Koul diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 798f46a..f153adf 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1292,8 +1292,7 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, if (err) goto err_dma; - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); return 0; @@ -1322,6 +1321,9 @@ err_enable_device: static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) { struct middma_device *device = pci_get_drvdata(pdev); + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_forbid(&pdev->dev); middma_shutdown(pdev); pci_dev_put(pdev); kfree(device); @@ -1385,13 +1387,20 @@ int dma_resume(struct pci_dev *pci) static int dma_runtime_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_suspend(pci_dev, PMSG_SUSPEND); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = SUSPENDED; + return 0; } static int dma_runtime_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_resume(pci_dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; } static int dma_runtime_idle(struct device *dev) -- cgit v0.10.2 From c6ff669bac5c409f4cb74366248f51b73f7d6feb Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 28 Mar 2011 14:17:26 -0500 Subject: dlm: delayed reply message warning Add an option (disabled by default) to print a warning message when a lock has been waiting a configurable amount of time for a reply message from another node. This is mainly for debugging. Signed-off-by: David Teigland diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 0d329ff..9b026ea 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -100,6 +100,7 @@ struct dlm_cluster { unsigned int cl_log_debug; unsigned int cl_protocol; unsigned int cl_timewarn_cs; + unsigned int cl_waitwarn_us; }; enum { @@ -114,6 +115,7 @@ enum { CLUSTER_ATTR_LOG_DEBUG, CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_TIMEWARN_CS, + CLUSTER_ATTR_WAITWARN_US, }; struct cluster_attribute { @@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1); CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); +CLUSTER_ATTR(waitwarn_us, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, + [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, NULL, }; @@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_log_debug = dlm_config.ci_log_debug; cl->cl_protocol = dlm_config.ci_protocol; cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; + cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_LOG_DEBUG 0 #define DEFAULT_PROTOCOL 0 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ +#define DEFAULT_WAITWARN_US 0 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = { .ci_scan_secs = DEFAULT_SCAN_SECS, .ci_log_debug = DEFAULT_LOG_DEBUG, .ci_protocol = DEFAULT_PROTOCOL, - .ci_timewarn_cs = DEFAULT_TIMEWARN_CS + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, + .ci_waitwarn_us = DEFAULT_WAITWARN_US }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 4f1d6fc..dd0ce24 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -28,6 +28,7 @@ struct dlm_config_info { int ci_log_debug; int ci_protocol; int ci_timewarn_cs; + int ci_waitwarn_us; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index b942049..6a92478 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -245,6 +245,7 @@ struct dlm_lkb { int8_t lkb_wait_type; /* type of reply waiting for */ int8_t lkb_wait_count; + int lkb_wait_nodeid; /* for debugging */ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ @@ -254,6 +255,7 @@ struct dlm_lkb { struct list_head lkb_ownqueue; /* list of locks for a process */ struct list_head lkb_time_list; ktime_t lkb_timestamp; + ktime_t lkb_wait_time; unsigned long lkb_timeout_cs; struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 04b8c44..e3c8641 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -799,10 +799,84 @@ static int msg_reply_type(int mstype) return -1; } +static int nodeid_warned(int nodeid, int num_nodes, int *warned) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + if (!warned[i]) { + warned[i] = nodeid; + return 0; + } + if (warned[i] == nodeid) + return 1; + } + return 0; +} + +void dlm_scan_waiters(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb; + ktime_t zero = ktime_set(0, 0); + s64 us; + s64 debug_maxus = 0; + u32 debug_scanned = 0; + u32 debug_expired = 0; + int num_nodes = 0; + int *warned = NULL; + + if (!dlm_config.ci_waitwarn_us) + return; + + mutex_lock(&ls->ls_waiters_mutex); + + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { + if (ktime_equal(lkb->lkb_wait_time, zero)) + continue; + + debug_scanned++; + + us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time)); + + if (us < dlm_config.ci_waitwarn_us) + continue; + + lkb->lkb_wait_time = zero; + + debug_expired++; + if (us > debug_maxus) + debug_maxus = us; + + if (!num_nodes) { + num_nodes = ls->ls_num_nodes; + warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); + if (warned) + memset(warned, 0, num_nodes * sizeof(int)); + } + if (!warned) + continue; + if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned)) + continue; + + log_error(ls, "waitwarn %x %lld %d us check connection to " + "node %d", lkb->lkb_id, (long long)us, + dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); + } + mutex_unlock(&ls->ls_waiters_mutex); + + if (warned) + kfree(warned); + + if (debug_expired) + log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", + debug_scanned, debug_expired, + dlm_config.ci_waitwarn_us, (long long)debug_maxus); +} + /* add/remove lkb from global waiters list of lkb's waiting for a reply from a remote node */ -static int add_to_waiters(struct dlm_lkb *lkb, int mstype) +static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error = 0; @@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) lkb->lkb_wait_count++; lkb->lkb_wait_type = mstype; + lkb->lkb_wait_time = ktime_get(); + lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ hold_lkb(lkb); list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); out: @@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls) list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); mutex_unlock(&ls->ls_timeout_mutex); + + if (!dlm_config.ci_waitwarn_us) + return; + + mutex_lock(&ls->ls_waiters_mutex); + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { + if (ktime_to_us(lkb->lkb_wait_time)) + lkb->lkb_wait_time = ktime_get(); + } + mutex_unlock(&ls->ls_waiters_mutex); } /* lkb is master or local copy */ @@ -2844,12 +2930,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) struct dlm_mhandle *mh; int to_nodeid, error; - error = add_to_waiters(lkb, mstype); + to_nodeid = r->res_nodeid; + + error = add_to_waiters(lkb, mstype, to_nodeid); if (error) return error; - to_nodeid = r->res_nodeid; - error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); if (error) goto fail; @@ -2951,12 +3037,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) struct dlm_mhandle *mh; int to_nodeid, error; - error = add_to_waiters(lkb, DLM_MSG_LOOKUP); + to_nodeid = dlm_dir_nodeid(r); + + error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid); if (error) return error; - to_nodeid = dlm_dir_nodeid(r); - error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); if (error) goto fail; diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 88e93c8..265017a 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb); void dlm_scan_rsbs(struct dlm_ls *ls); int dlm_lock_recovery_try(struct dlm_ls *ls); void dlm_unlock_recovery(struct dlm_ls *ls); +void dlm_scan_waiters(struct dlm_ls *ls); void dlm_scan_timeout(struct dlm_ls *ls); void dlm_adjust_timeouts(struct dlm_ls *ls); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f994a7d..14cbf40 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void) static int dlm_scand(void *data) { struct dlm_ls *ls; - int timeout_jiffies = dlm_config.ci_scan_secs * HZ; while (!kthread_should_stop()) { ls = find_ls_to_scan(); @@ -252,13 +251,14 @@ static int dlm_scand(void *data) ls->ls_scan_time = jiffies; dlm_scan_rsbs(ls); dlm_scan_timeout(ls); + dlm_scan_waiters(ls); dlm_unlock_recovery(ls); } else { ls->ls_scan_time += HZ; } - } else { - schedule_timeout_interruptible(timeout_jiffies); + continue; } + schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); } return 0; } -- cgit v0.10.2 From 6bde95ce33e1c2ac9b5cb3d814722105131090ec Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:09:41 -0400 Subject: SELinux: update git tree in MAINTAINERS update the git tree in MAINTAINERS Signed-off-by: Eric Paris diff --git a/MAINTAINERS b/MAINTAINERS index 560ecce..3297b8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5542,10 +5542,11 @@ M: James Morris M: Eric Paris L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git +T: git git://git.infradead.org/users/eparis/selinux.git S: Supported F: include/linux/selinux* F: security/selinux/ +F: scripts/selinux/ APPARMOR SECURITY MODULE M: John Johansen -- cgit v0.10.2 From f50a3ec961f90e38c0311411179d5dfee1412192 Mon Sep 17 00:00:00 2001 From: Kohei Kaigai Date: Fri, 1 Apr 2011 15:39:26 +0100 Subject: selinux: add type_transition with name extension support for selinuxfs The attached patch allows /selinux/create takes optional 4th argument to support TYPE_TRANSITION with name extension for userspace object managers. If 4th argument is not supplied, it shall perform as existing kernel. In fact, the regression test of SE-PostgreSQL works well on the patched kernel. Thanks, Signed-off-by: KaiGai Kohei [manually verify fuzz was not an issue, and it wasn't: eparis] Signed-off-by: Eric Paris diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index bfc5218..2cf6708 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -112,8 +112,8 @@ void security_compute_av_user(u32 ssid, u32 tsid, int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); -int security_transition_sid_user(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, + const char *objname, u32 *out_sid); int security_member_sid(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ea39cb7..973f5a4 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -753,11 +753,13 @@ out: static ssize_t sel_write_create(struct file *file, char *buf, size_t size) { char *scon = NULL, *tcon = NULL; + char *namebuf = NULL, *objname = NULL; u32 ssid, tsid, newsid; u16 tclass; ssize_t length; char *newcon = NULL; u32 len; + int nargs; length = task_has_security(current, SECURITY__COMPUTE_CREATE); if (length) @@ -773,9 +775,17 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (!tcon) goto out; + length = -ENOMEM; + namebuf = kzalloc(size + 1, GFP_KERNEL); + if (!namebuf) + goto out; + length = -EINVAL; - if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) + nargs = sscanf(buf, "%s %s %hu %s", scon, tcon, &tclass, namebuf); + if (nargs < 3 || nargs > 4) goto out; + if (nargs == 4) + objname = namebuf; length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); if (length) @@ -785,7 +795,8 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_transition_sid_user(ssid, tsid, tclass, &newsid); + length = security_transition_sid_user(ssid, tsid, tclass, + objname, &newsid); if (length) goto out; @@ -804,6 +815,7 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) length = len; out: kfree(newcon); + kfree(namebuf); kfree(tcon); kfree(scon); return length; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 03f7a47..39d7321 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1360,14 +1360,14 @@ out: static void filename_compute_type(struct policydb *p, struct context *newcontext, u32 scon, u32 tcon, u16 tclass, - const struct qstr *qstr) + const char *objname) { struct filename_trans *ft; for (ft = p->filename_trans; ft; ft = ft->next) { if (ft->stype == scon && ft->ttype == tcon && ft->tclass == tclass && - !strcmp(ft->name, qstr->name)) { + !strcmp(ft->name, objname)) { newcontext->type = ft->otype; return; } @@ -1378,7 +1378,7 @@ static int security_compute_sid(u32 ssid, u32 tsid, u16 orig_tclass, u32 specified, - const struct qstr *qstr, + const char *objname, u32 *out_sid, bool kern) { @@ -1479,9 +1479,9 @@ static int security_compute_sid(u32 ssid, } /* if we have a qstr this is a file trans check so check those rules */ - if (qstr) + if (objname) filename_compute_type(&policydb, &newcontext, scontext->type, - tcontext->type, tclass, qstr); + tcontext->type, tclass, objname); /* Check for class-specific changes. */ if (specified & AVTAB_TRANSITION) { @@ -1539,13 +1539,14 @@ int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid) { return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, - qstr, out_sid, true); + qstr ? qstr->name : NULL, out_sid, true); } -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) +int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, + const char *objname, u32 *out_sid) { return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, - NULL, out_sid, false); + objname, out_sid, false); } /** -- cgit v0.10.2 From 6ea0c34dac89611126455537552cffe6c7e832ad Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Apr 2011 01:41:32 +0200 Subject: percpu: Unify input section names The two percpu helper macros have the section names duplicated. So create a new define to merge the two. This also allows arches who need to link things more directly themselves to avoid duplicating the input sections in their linker script. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5..bf90fbc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -688,6 +688,28 @@ } /** + * PERCPU_INPUT - the percpu input sections + * @cacheline: cacheline size + * + * The core percpu section names and core symbols which do not rely + * directly upon load addresses. + * + * @cacheline is used to align subsections to avoid false cacheline + * sharing between subsections for different purposes. + */ +#define PERCPU_INPUT(cacheline) \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..page_aligned) \ + . = ALIGN(cacheline); \ + *(.data..percpu..readmostly) \ + . = ALIGN(cacheline); \ + *(.data..percpu) \ + *(.data..percpu..shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; + +/** * PERCPU_VADDR - define output section for percpu area * @cacheline: cacheline size * @vaddr: explicit base address (optional) @@ -715,16 +737,7 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ } phdr \ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); @@ -745,16 +758,7 @@ . = ALIGN(align); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ } -- cgit v0.10.2 From 17f60a7da150fdd0cfb9756f86a262daa72c835f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:07:50 -0400 Subject: capabilites: allow the application of capability limits to usermode helpers There is no way to limit the capabilities of usermodehelpers. This problem reared its head recently when someone complained that any user with cap_net_admin was able to load arbitrary kernel modules, even though the user didn't have cap_sys_module. The reason is because the actual load is done by a usermode helper and those always have the full cap set. This patch addes new sysctls which allow us to bound the permissions of usermode helpers. /proc/sys/kernel/usermodehelper/bset /proc/sys/kernel/usermodehelper/inheritable You must have CAP_SYS_MODULE and CAP_SETPCAP to change these (changes are &= ONLY). When the kernel launches a usermodehelper it will do so with these as the bset and pI. -v2: make globals static create spinlock to protect globals -v3: require both CAP_SETPCAP and CAP_SYS_MODULE -v4: fix the typo s/CAP_SET_PCAP/CAP_SETPCAP/ because I didn't commit Signed-off-by: Eric Paris No-objection-from: Serge E. Hallyn Acked-by: David Howells Acked-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 6efd7a7..79bb98d 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -24,6 +24,7 @@ #include #include #include +#include #define KMOD_PATH_LEN 256 @@ -109,6 +110,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) NULL, NULL, NULL); } +extern struct ctl_table usermodehelper_table[]; + extern void usermodehelper_init(void); extern int usermodehelper_disable(void); diff --git a/kernel/kmod.c b/kernel/kmod.c index 9cd0591..06fdea2 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,13 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; +#define CAP_BSET (void *)1 +#define CAP_PI (void *)2 + +static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; +static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; +static DEFINE_SPINLOCK(umh_sysctl_lock); + #ifdef CONFIG_MODULES /* @@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module); static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; + struct cred *new; int retval; spin_lock_irq(¤t->sighand->siglock); @@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data) goto fail; } + retval = -ENOMEM; + new = prepare_kernel_cred(current); + if (!new) + goto fail; + + spin_lock(&umh_sysctl_lock); + new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); + new->cap_inheritable = cap_intersect(usermodehelper_inheritable, + new->cap_inheritable); + spin_unlock(&umh_sysctl_lock); + + commit_creds(new); + retval = kernel_execve(sub_info->path, (const char *const *)sub_info->argv, (const char *const *)sub_info->envp); @@ -418,6 +440,84 @@ unlock: } EXPORT_SYMBOL(call_usermodehelper_exec); +static int proc_cap_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; + kernel_cap_t new_cap; + int err, i; + + if (write && (!capable(CAP_SETPCAP) || + !capable(CAP_SYS_MODULE))) + return -EPERM; + + /* + * convert from the global kernel_cap_t to the ulong array to print to + * userspace if this is a read. + */ + spin_lock(&umh_sysctl_lock); + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { + if (table->data == CAP_BSET) + cap_array[i] = usermodehelper_bset.cap[i]; + else if (table->data == CAP_PI) + cap_array[i] = usermodehelper_inheritable.cap[i]; + else + BUG(); + } + spin_unlock(&umh_sysctl_lock); + + t = *table; + t.data = &cap_array; + + /* + * actually read or write and array of ulongs from userspace. Remember + * these are least significant 32 bits first + */ + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + /* + * convert from the sysctl array of ulongs to the kernel_cap_t + * internal representation + */ + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) + new_cap.cap[i] = cap_array[i]; + + /* + * Drop everything not in the new_cap (but don't add things) + */ + spin_lock(&umh_sysctl_lock); + if (write) { + if (table->data == CAP_BSET) + usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); + if (table->data == CAP_PI) + usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + } + spin_unlock(&umh_sysctl_lock); + + return 0; +} + +struct ctl_table usermodehelper_table[] = { + { + .procname = "bset", + .data = CAP_BSET, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { + .procname = "inheritable", + .data = CAP_PI, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { } +}; + void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb324..965134b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -616,6 +617,11 @@ static struct ctl_table kern_table[] = { .child = random_table, }, { + .procname = "usermodehelper", + .mode = 0555, + .child = usermodehelper_table, + }, + { .procname = "overflowuid", .data = &overflowuid, .maxlen = sizeof(int), -- cgit v0.10.2 From 4bf2ea77dba76a22f49db3c10773896aaeeb8f66 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:28 -0400 Subject: capabilities: do not special case exec of init When the global init task is exec'd we have special case logic to make sure the pE is not reduced. There is no reason for this. If init wants to drop it's pE is should be allowed to do so. Remove this special logic. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris diff --git a/security/commoncap.c b/security/commoncap.c index f20e984..a93b3b7 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -529,15 +529,10 @@ skip: new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; - /* For init, we want to retain the capabilities set in the initial - * task. Thus we skip the usual capability rules - */ - if (!is_global_init(current)) { - if (effective) - new->cap_effective = new->cap_permitted; - else - cap_clear(new->cap_effective); - } + if (effective) + new->cap_effective = new->cap_permitted; + else + cap_clear(new->cap_effective); bprm->cap_effective = effective; /* -- cgit v0.10.2 From ffa8e59df047d57e812a04f7d6baf6a25c652c0c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:34 -0400 Subject: capabilities: do not drop CAP_SETPCAP from the initial task In olden' days of yore CAP_SETPCAP had special meaning for the init task. We actually have code to make sure that CAP_SETPCAP wasn't in pE of things using the init_cred. But CAP_SETPCAP isn't so special any more and we don't have a reason to special case dropping it for init or kthreads.... Signed-off-by: Eric Paris Acked-by: Andrew G. Morgan Signed-off-by: James Morris diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b4..11d5628 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -412,7 +412,6 @@ extern const kernel_cap_t __cap_init_eff_set; # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) # define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) -# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) @@ -423,10 +422,10 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ #define CAP_INIT_INH_SET CAP_EMPTY_SET +#define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) # define cap_set_full(c) do { (c) = __cap_full_set; } while (0) -# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,6 +546,9 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; + /** * nsown_capable - Check superior capability to one's own user_ns * @cap: The capability in question diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734..2a374d51 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -23,11 +23,9 @@ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; const kernel_cap_t __cap_full_set = CAP_FULL_SET; -const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); -EXPORT_SYMBOL(__cap_init_eff_set); int file_caps_enabled = 1; -- cgit v0.10.2 From 5163b583a036b103c3cec7171d6731c125773ed6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:39 -0400 Subject: capabilities: delete unused cap_set_full unused code. Clean it up. Signed-off-by: Eric Paris Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris diff --git a/include/linux/capability.h b/include/linux/capability.h index 11d5628..8d0da30 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -425,7 +425,6 @@ extern const kernel_cap_t __cap_init_eff_set; #define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) -# define cap_set_full(c) do { (c) = __cap_full_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,7 +546,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_full_set; /** * nsown_capable - Check superior capability to one's own user_ns diff --git a/kernel/capability.c b/kernel/capability.c index 2a374d51..14ea4210 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -22,10 +22,8 @@ */ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; -const kernel_cap_t __cap_full_set = CAP_FULL_SET; EXPORT_SYMBOL(__cap_empty_set); -EXPORT_SYMBOL(__cap_full_set); int file_caps_enabled = 1; -- cgit v0.10.2 From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:45 -0400 Subject: capabilities: delete all CAP_INIT macros The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time, but now days they aren't helping. Just open code the logic in the init_cred. Signed-off-by: Eric Paris Acked-by: David Howells Signed-off-by: James Morris diff --git a/include/linux/capability.h b/include/linux/capability.h index 8d0da30..04fed72 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -421,9 +421,6 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ -#define CAP_INIT_INH_SET CAP_EMPTY_SET -#define CAP_INIT_EFF_SET CAP_FULL_SET - # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151f..1f27720 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,13 +83,6 @@ extern struct group_info init_groups; #define INIT_IDS #endif -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow CAP_SETPCAP to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET - #ifdef CONFIG_RCU_BOOST #define INIT_TASK_RCU_BOOST() \ .rcu_boost_mutex = NULL, diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55..b982f08 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -49,10 +49,10 @@ struct cred init_cred = { .magic = CRED_MAGIC, #endif .securebits = SECUREBITS_DEFAULT, - .cap_inheritable = CAP_INIT_INH_SET, + .cap_inheritable = CAP_EMPTY_SET, .cap_permitted = CAP_FULL_SET, - .cap_effective = CAP_INIT_EFF_SET, - .cap_bset = CAP_INIT_BSET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, .user = INIT_USER, .group_info = &init_groups, #ifdef CONFIG_KEYS -- cgit v0.10.2 From 177525d26e31806d71653f74bbec13574b97892c Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Sun, 3 Apr 2011 20:58:28 +0200 Subject: eradicate bashisms in scripts/patch-kernel Silence a remaining annoying (or worse, irritating - "is my entire patched tree broken now!?") bashism-related message that occurs when /bin/sh is configured to instead deploy dash, a POSIX-compliant shell, as is the pretty much standard case on e.g. Debian. Current kernel version is 2.6.38 ( Flesh-Eating Bats with Fangs) ===> linux-2.6.38.patch-kernel_test/scripts/patch-kernel: line 253: [: =: unary operator expected <=== cannot find patch file: patch-2.6.39 Signed-off-by: Andreas Mohr Signed-off-by: Michal Marek diff --git a/scripts/patch-kernel b/scripts/patch-kernel index 46a59ca..20fb25c 100755 --- a/scripts/patch-kernel +++ b/scripts/patch-kernel @@ -250,7 +250,7 @@ while : # incrementing SUBLEVEL (s in v.p.s) do CURRENTFULLVERSION="$VERSION.$PATCHLEVEL.$SUBLEVEL" EXTRAVER= - if [ $STOPFULLVERSION = $CURRENTFULLVERSION ]; then + if [ x$STOPFULLVERSION = x$CURRENTFULLVERSION ]; then echo "Stopping at $CURRENTFULLVERSION base as requested." break fi -- cgit v0.10.2 From 2a7ce0edd661b3144c7b916ecf1eba0967b6d4a5 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 4 Apr 2011 15:19:59 -0500 Subject: dlm: remove shared message stub for recovery kmalloc a stub message struct during recovery instead of sharing the struct in the lockspace. This leaves the lockspace stub_ms only for faking downconvert replies, where it is never modified and sharing is not a problem. Also improve the debug messages in the same recovery function. Signed-off-by: David Teigland diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 6a92478..0262451 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -209,6 +209,7 @@ struct dlm_args { #define DLM_IFL_WATCH_TIMEWARN 0x00400000 #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 +#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */ #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e3c8641..8122779 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1037,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error; - if (ms != &ls->ls_stub_ms) + if (ms->m_flags != DLM_IFL_STUB_MS) mutex_lock(&ls->ls_waiters_mutex); error = _remove_from_waiters(lkb, ms->m_type, ms); - if (ms != &ls->ls_stub_ms) + if (ms->m_flags != DLM_IFL_STUB_MS) mutex_unlock(&ls->ls_waiters_mutex); return error; } @@ -1462,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become compatible with other granted locks */ -static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) +static void munge_demoted(struct dlm_lkb *lkb) { - if (ms->m_type != DLM_MSG_CONVERT_REPLY) { - log_print("munge_demoted %x invalid reply type %d", - lkb->lkb_id, ms->m_type); - return; - } - if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { log_print("munge_demoted %x invalid modes gr %d rq %d", lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); @@ -2966,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) /* down conversions go without a reply from the master */ if (!error && down_conversion(lkb)) { remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); + r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS; r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; r->res_ls->ls_stub_ms.m_result = 0; - r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); } @@ -3156,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) { + if (ms->m_flags == DLM_IFL_STUB_MS) + return; + lkb->lkb_sbflags = ms->m_sbflags; lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | (ms->m_flags & 0x0000FFFF); @@ -3698,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, /* convert was queued on remote master */ receive_flags_reply(lkb, ms); if (is_demoted(lkb)) - munge_demoted(lkb, ms); + munge_demoted(lkb); del_lkb(r, lkb); add_lkb(r, lkb, DLM_LKSTS_CONVERT); add_timeout(lkb); @@ -3708,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, /* convert was granted on remote master */ receive_flags_reply(lkb, ms); if (is_demoted(lkb)) - munge_demoted(lkb, ms); + munge_demoted(lkb); grant_lock_pc(r, lkb, ms); queue_cast(r, lkb, 0); break; @@ -4082,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) dlm_put_lockspace(ls); } -static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) +static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms_stub) { if (middle_conversion(lkb)) { hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; - ls->ls_stub_ms.m_result = -EINPROGRESS; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_convert_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_CONVERT_REPLY; + ms_stub->m_result = -EINPROGRESS; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_convert_reply(lkb, ms_stub); /* Same special case as in receive_rcom_lock_args() */ lkb->lkb_grmode = DLM_LOCK_IV; @@ -4131,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) void dlm_recover_waiters_pre(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; + struct dlm_message *ms_stub; int wait_type, stub_unlock_result, stub_cancel_result; + ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); + if (!ms_stub) { + log_error(ls, "dlm_recover_waiters_pre no mem"); + return; + } + mutex_lock(&ls->ls_waiters_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { - log_debug(ls, "pre recover waiter lkid %x type %d flags %x", - lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); + + /* exclude debug messages about unlocks because there can be so + many and they aren't very interesting */ + + if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { + log_debug(ls, "recover_waiter %x nodeid %d " + "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid, + lkb->lkb_wait_type, lkb->lkb_wait_nodeid); + } /* all outstanding lookups, regardless of destination will be resent after recovery is done */ @@ -4183,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) break; case DLM_MSG_CONVERT: - recover_convert_waiter(ls, lkb); + recover_convert_waiter(ls, lkb, ms_stub); break; case DLM_MSG_UNLOCK: hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; - ls->ls_stub_ms.m_result = stub_unlock_result; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_unlock_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_UNLOCK_REPLY; + ms_stub->m_result = stub_unlock_result; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_unlock_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; case DLM_MSG_CANCEL: hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; - ls->ls_stub_ms.m_result = stub_cancel_result; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_cancel_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_CANCEL_REPLY; + ms_stub->m_result = stub_cancel_result; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_cancel_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; @@ -4213,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) schedule(); } mutex_unlock(&ls->ls_waiters_mutex); + kfree(ms_stub); } static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) @@ -4277,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) ou = is_overlap_unlock(lkb); err = 0; - log_debug(ls, "recover_waiters_post %x type %d flags %x %s", - lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); + log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d", + lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid); /* At this point we assume that we won't get a reply to any previous op or overlap op on this lock. First, do a big -- cgit v0.10.2 From 364de77831213be20f7f33c39ca1c194593b5c11 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Sat, 2 Apr 2011 14:20:47 +0800 Subject: drivers, pch_dma: Fix uninitialized var before use In the function pdc_desc_get(), var 'i' is not initialized before use. This patch fixes it. Signed-off-by: Liu Yuan Signed-off-by: Vinod Koul diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 8d8fef1..6eebc62 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -403,7 +403,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) { struct pch_dma_desc *desc, *_d; struct pch_dma_desc *ret = NULL; - int i; + int i = 0; spin_lock(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { -- cgit v0.10.2 From eba71de2cb7c02c5ae4f2ad3656343da71bc4661 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 25 Mar 2011 10:13:43 -0400 Subject: selinux: Fix regression for Xorg Commit 6f5317e730505d5cbc851c435a2dfe3d5a21d343 introduced a bug in the handling of userspace object classes that is causing breakage for Xorg when XSELinux is enabled. Fix the bug by changing map_class() to return SECCLASS_NULL when the class cannot be mapped to a kernel object class. Reported-by: "Justin P. Mattock" Signed-off-by: Stephen Smalley Signed-off-by: James Morris diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 39d7321..f3f5dca 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -213,7 +213,7 @@ static u16 map_class(u16 pol_value) return i; } - return pol_value; + return SECCLASS_NULL; } static void map_decision(u16 tclass, struct av_decision *avd, -- cgit v0.10.2 From 1214eac73f798bccabc6adb55e7b2d787527c13c Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Thu, 7 Apr 2011 14:12:57 +0800 Subject: Initialize policydb.process_class eariler. Initialize policydb.process_class once all symtabs read from policy image, so that it could be used to setup the role_trans.tclass field when a lower version policy.X is loaded. Signed-off-by: Harry Ciao Signed-off-by: Eric Paris diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index a493eae..82373eb 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2275,6 +2275,11 @@ int policydb_read(struct policydb *p, void *fp) p->symtab[i].nprim = nprim; } + rc = -EINVAL; + p->process_class = string_to_security_class(p, "process"); + if (!p->process_class) + goto bad; + rc = avtab_read(&p->te_avtab, fp, p); if (rc) goto bad; @@ -2359,11 +2364,6 @@ int policydb_read(struct policydb *p, void *fp) goto bad; rc = -EINVAL; - p->process_class = string_to_security_class(p, "process"); - if (!p->process_class) - goto bad; - - rc = -EINVAL; p->process_trans_perms = string_to_av_perm(p, p->process_class, "transition"); p->process_trans_perms |= string_to_av_perm(p, p->process_class, "dyntransition"); if (!p->process_trans_perms) -- cgit v0.10.2 From 0601f793921157603831d00a9541d92e8f5763f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: requeue tcp socket less frequently Don't requeue the socket in some cases where we know it's unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c..7a3e4bf 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -965,7 +965,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) goto err_again; /* record not complete */ } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; error: @@ -1115,6 +1114,10 @@ out: /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) -- cgit v0.10.2 From 5ee78d483c5812228e971e145b912e0a7e35e571 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: svc_tcp_recvfrom cleanup Minor cleanup in preparation for later patches. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7a3e4bf..733c2f6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -893,6 +893,7 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) @@ -915,9 +916,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -1040,8 +1041,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; struct rpc_rqst *req = NULL; + unsigned int vlen; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1072,7 +1074,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) } pnum = 1; - while (vlen < len) { + while (vlen < svsk->sk_reclen - 8) { vec[pnum].iov_base = (req) ? page_address(req->rq_private_buf.pages[pnum - 1]) : page_address(rqstp->rq_pages[pnum]); @@ -1083,29 +1085,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); if (len < 0) goto err_again; - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; goto out; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; @@ -1123,7 +1119,7 @@ out: if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + return rqstp->rq_arg.len; err_again: if (len == -EAGAIN) { -- cgit v0.10.2 From 48e6555c7b3bf0d92f8167d8b8b8ecf4a3fdab84 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 14:52:03 -0500 Subject: svcrpc: note network-order types in svc_process_calldir Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 733c2f6..1955e1a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -982,9 +982,9 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, struct rpc_rqst **reqpp, struct kvec *vec) { struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; + __be32 *p; + __be32 xid; + __be32 calldir; int len; len = svc_recvfrom(rqstp, vec, 1, 8); -- cgit v0.10.2 From cc6c2127f2316c2b2ad1e8919b45cde5e03f65aa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 15:03:35 -0500 Subject: svcrpc: close connection if client sends short packet If the client sents a record too short to contain even the beginning of the rpc header, then just close the connection. The current code drops the record data and continues. I don't see the point. It's a hopeless situation and simpler just to cut off the connection completely. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1955e1a..62ff7c5c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -955,6 +955,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ + /* Check whether enough data is available */ len = svc_recv_available(svsk); if (len < 0) @@ -1058,20 +1061,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + len = svc_process_calldir(svsk, rqstp, &req, vec); + if (len < 0) + goto err_again; + vlen -= 8; pnum = 1; while (vlen < svsk->sk_reclen - 8) { -- cgit v0.10.2 From 586c52cc61b5b84c70102208b78269ef5924bf49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: svcrpc: copy cb reply instead of pages It's much simpler just to copy the cb reply data than to play tricks with pages. Callback replies will typically be very small (at least until we implement cb_getattr, in which case files with very long ACLs could pose a problem), so there's no loss in efficiency. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 62ff7c5c..40b502b 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -981,57 +981,58 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - __be32 *p; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; __be32 xid; __be32 calldir; - int len; - - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; +} + +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; } /* @@ -1044,8 +1045,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - struct rpc_rqst *req = NULL; - unsigned int vlen; + __be32 *p; + __be32 calldir; int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", @@ -1058,35 +1059,17 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - - pnum = 1; - while (vlen < svsk->sk_reclen - 8) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); + rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); if (len < 0) goto err_again; - if (req) { - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); - rqstp->rq_arg.len = 0; - goto out; - } dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; @@ -1099,7 +1082,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) { + len = receive_cb_reply(svsk, rqstp); + if (len < 0) + goto err_again; + } + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; -- cgit v0.10.2 From 31d68ef65c7d49def19c1bae4e01b87d66cf5a56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Feb 2011 11:25:33 -0800 Subject: SUNRPC: Don't wait for full record to receive tcp data Ensure that we immediately read and buffer data from the incoming TCP stream so that we grow the receive window quickly, and don't deadlock on large READ or WRITE requests. Also do some minor exit cleanup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23..85c50b4 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 40b502b..a4fafcb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -884,6 +911,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -928,7 +1005,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -958,26 +1035,14 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (svsk->sk_reclen < 8) goto err_delete; /* client is nuts. */ - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; - - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } @@ -1035,6 +1100,7 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } + /* * Receive data from a TCP socket. */ @@ -1045,6 +1111,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; + unsigned int want, base; __be32 *p; __be32 calldir; int pnum; @@ -1058,6 +1125,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], @@ -1066,11 +1136,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); - if (len < 0) - goto err_again; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; + } - dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { @@ -1087,7 +1164,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (calldir) { len = receive_cb_reply(svsk, rqstp); if (len < 0) - goto err_again; + goto error; } /* Reset TCP read info */ @@ -1102,20 +1179,20 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1286,6 +1363,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; @@ -1544,8 +1622,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* -- cgit v0.10.2 From 9660439861aa8dbd5e2b8087f33e20760c2c9afc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 21 Oct 2008 14:13:47 -0400 Subject: svcrpc: take advantage of tcp autotuning Allow the NFSv4 server to make use of TCP autotuning behaviour, which was previously disabled by setting the sk_userlocks variable. Set the receive buffers to be big enough to receive the whole RPC request, and set this for the listening socket, not the accept socket. Remove the code that readjusts the receive/send buffer sizes for the accepted socket. Previously this code was used to influence the TCP window management behaviour, which is no longer needed when autotuning is enabled. This can improve IO bandwidth on networks with high bandwidth-delay products, where a large tcp window is required. It also simplifies performance tuning, since getting adequate tcp buffers previously required increasing the number of nfsd threads. Signed-off-by: Olga Kornievskaia Cc: Jim Rees Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a4fafcb..213dea8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -436,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -973,23 +972,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { @@ -1367,15 +1349,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1439,8 +1412,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); -- cgit v0.10.2 From 466de9183570fe9fd21ef167951488fc9d513fcb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 19 Mar 2011 04:26:10 +0000 Subject: kconfig: Avoid buffer underrun in choice input commit 40aee729b350672c2550640622416a855e27938f ('kconfig: fix default value for choice input') fixed some cases where kconfig would select the wrong option from a choice with a single valid option and thus enter an infinite loop. However, this broke the test for user input of the form 'N?', because when kconfig selects the single valid option the input is zero-length and the test will read the byte before the input buffer. If this happens to contain '?' (as it will in a mips build on Debian unstable today) then kconfig again enters an infinite loop. Signed-off-by: Ben Hutchings Cc: stable@kernel.org [2.6.17+] Signed-off-by: Michal Marek diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 659326c..006ad81 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -332,7 +332,7 @@ static int conf_choice(struct menu *menu) } if (!child) continue; - if (line[strlen(line) - 1] == '?') { + if (line[0] && line[strlen(line) - 1] == '?') { print_help(child); continue; } -- cgit v0.10.2 From 8985ef0b8af895c3b85a8c1b7108e0169fcbd20b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 10:03:10 -0400 Subject: svcrpc: complete svsk processing on cb receive failure Currently when there's some failure to receive a callback (because we couldn't find a matching xid, for example), we exit svc_recv with sk_tcplen still set but without any pages saved with the socket. This will cause a crash later in svc_tcp_restore_pages. Instead, make sure we reset that tcp information whether the callback received failed or succeeded. Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 213dea8..af04f77 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1143,11 +1143,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; - if (calldir) { + if (calldir) len = receive_cb_reply(svsk, rqstp); - if (len < 0) - goto error; - } /* Reset TCP read info */ svsk->sk_reclen = 0; @@ -1156,6 +1153,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) -- cgit v0.10.2 From aea93397db4b39c9d15443a0e7cc9a380ba990c6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 10 Apr 2011 10:35:12 -0400 Subject: nfsd: distinguish functions of NFSD_MAY_* flags Most of the NFSD_MAY_* flags actually request permissions, but over the years we've accreted a few that modify the behavior of the permission or open code in other ways. Distinguish the two cases a little more. In particular, allow the shortcut at the start of nfsd_permission to ignore the non-permission-requesting bits. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2e1cebd..a76ef7e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2027,7 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == NFSD_MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5..1036913 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -17,6 +17,9 @@ #define NFSD_MAY_SATTR 8 #define NFSD_MAY_TRUNC 16 #define NFSD_MAY_LOCK 32 +#define NFSD_MAY_MASK 63 + +/* extra hints to permission and open routines: */ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 -- cgit v0.10.2 From 204f4ce75434c3453907813f8a819d4cf2a5728f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Apr 2011 16:32:54 -0400 Subject: nfsd4: allow fh_verify caller to skip pseudoflavor checks Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63..90c6aa6 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK) + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) goto skip_pseudoflavor_check; /* * Clients may expect to be able to use auth_sys during mount, diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 1036913..4d2509f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -24,6 +24,7 @@ #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_NOT_BREAK_LEASE 512 +#define NFSD_MAY_BYPASS_GSS 1024 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) -- cgit v0.10.2 From 22b03214962ec2a9748abc9987fc2e66dec4626d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:23:24 -0400 Subject: nfsd4: introduce OPDESC helper Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb139..126b8f7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1031,6 +1031,11 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + /* * COMPOUND call. */ @@ -1108,7 +1113,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); if (!cstate->current_fh.fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { -- cgit v0.10.2 From 29a78a3ed7fc9c4ee49962751eb321b038c190a2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:28:53 -0400 Subject: nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller The secinfo caller actually won't want this. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 126b8f7..8059ada 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a76ef7e..e533139 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; -- cgit v0.10.2 From 0d60b281dc7fd2ae7ec6463f916138fd2d182bee Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Apr 2011 07:22:58 +0000 Subject: video: s3c-fb: make runtime pm functions static This patch makes runtime pm functions static. Signed-off-by: Jingoo Han Signed-off-by: Paul Mundt diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 3b6cdca..2d075f0 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -1549,7 +1549,7 @@ static int s3c_fb_resume(struct device *dev) return 0; } -int s3c_fb_runtime_suspend(struct device *dev) +static int s3c_fb_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); @@ -1569,7 +1569,7 @@ int s3c_fb_runtime_suspend(struct device *dev) return 0; } -int s3c_fb_runtime_resume(struct device *dev) +static int s3c_fb_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); -- cgit v0.10.2 From b07f3bbee12163a6b48991138a37b87a1126462a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Apr 2011 07:25:37 +0000 Subject: video: s3c-fb: add spinlock to interrupt routine The spinlock is added to interrupt routine to ensure that the driver is protected against multiple accesses. Signed-off-by: Jingoo Han Signed-off-by: Paul Mundt diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 2d075f0..3fa7911 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -182,6 +182,7 @@ struct s3c_fb_vsync { /** * struct s3c_fb - overall hardware state of the hardware + * @slock: The spinlock protection for this data sturcture. * @dev: The device that we bound to, for printing, etc. * @regs_res: The resource we claimed for the IO registers. * @bus_clk: The clk (hclk) feeding our interface and possibly pixclk. @@ -195,6 +196,7 @@ struct s3c_fb_vsync { * @vsync_info: VSYNC-related information (count, queues...) */ struct s3c_fb { + spinlock_t slock; struct device *dev; struct resource *regs_res; struct clk *bus_clk; @@ -947,6 +949,8 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) void __iomem *regs = sfb->regs; u32 irq_sts_reg; + spin_lock(&sfb->slock); + irq_sts_reg = readl(regs + VIDINTCON1); if (irq_sts_reg & VIDINTCON1_INT_FRAME) { @@ -963,6 +967,7 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) */ s3c_fb_disable_irq(sfb); + spin_unlock(&sfb->slock); return IRQ_HANDLED; } @@ -1339,6 +1344,8 @@ static int __devinit s3c_fb_probe(struct platform_device *pdev) sfb->pdata = pd; sfb->variant = fbdrv->variant; + spin_lock_init(&sfb->slock); + sfb->bus_clk = clk_get(dev, "lcd"); if (IS_ERR(sfb->bus_clk)) { dev_err(dev, "failed to get bus clock\n"); -- cgit v0.10.2 From 92a47674f57b4a84a43ce93b0dfdb596c0543749 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 11 Apr 2011 23:34:37 -0700 Subject: Input: gpio_keys - add support for EV_ABS With this patch you can setup a group of GPIOs representing a specific position on an EV_ABS axis. Signed-off-by: Alexander Stein Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index eb30063..73e58a9 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -324,7 +324,12 @@ static void gpio_keys_report_event(struct gpio_button_data *bdata) unsigned int type = button->type ?: EV_KEY; int state = (gpio_get_value_cansleep(button->gpio) ? 1 : 0) ^ button->active_low; - input_event(input, type, button->code, !!state); + if (type == EV_ABS) { + if (state) + input_event(input, type, button->code, button->value); + } else { + input_event(input, type, button->code, !!state); + } input_sync(input); } @@ -363,7 +368,7 @@ static int __devinit gpio_keys_setup_key(struct platform_device *pdev, struct gpio_button_data *bdata, struct gpio_keys_button *button) { - char *desc = button->desc ? button->desc : "gpio_keys"; + const char *desc = button->desc ? button->desc : "gpio_keys"; struct device *dev = &pdev->dev; unsigned long irqflags; int irq, error; diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index dd1a56f..3204edf 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -3,14 +3,15 @@ struct gpio_keys_button { /* Configuration parameters */ - int code; /* input event code (KEY_*, SW_*) */ + unsigned int code; /* input event code (KEY_*, SW_*) */ int gpio; int active_low; - char *desc; - int type; /* input event type (EV_KEY, EV_SW) */ + const char *desc; + unsigned int type; /* input event type (EV_KEY, EV_SW, EV_ABS) */ int wakeup; /* configure the button as a wake-up source */ int debounce_interval; /* debounce ticks interval in msecs */ bool can_disable; + int value; /* axis value for EV_ABS */ }; struct gpio_keys_platform_data { -- cgit v0.10.2 From 467112777c462a592c27338eeea5d1a320e82b5f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 11 Apr 2011 23:34:48 -0700 Subject: Input: gpio-keys - add support for setting device name This patch allows to set a device name which helps distinguishing several gpio-keys devices. Signed-off-by: Alexander Stein Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 73e58a9..6e6145b 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -473,7 +473,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ddata); input_set_drvdata(input, ddata); - input->name = pdev->name; + input->name = pdata->name ? : pdev->name; input->phys = "gpio-keys/input0"; input->dev.parent = &pdev->dev; input->open = gpio_keys_open; diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 3204edf..b5ca4b2 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -22,6 +22,7 @@ struct gpio_keys_platform_data { unsigned int rep:1; /* enable input subsystem auto repeat */ int (*enable)(struct device *dev); void (*disable)(struct device *dev); + const char *name; /* input device name */ }; #endif -- cgit v0.10.2 From 4203306506ebe4eaaa84a2cbd7c1eb2fc0128faa Mon Sep 17 00:00:00 2001 From: Zhang Jiejing Date: Mon, 11 Apr 2011 23:48:23 -0700 Subject: Input: add driver for Maxim max11801 touchscreen controller Add MAXI max11801 resistive touchscreen controller driver. This driver uses Auto Mode and Aperture Mode. Support for other max1180x devices can be added to this driver as well, as they use almost the same register addresses and codes. You can find data sheet here: http://www.maxim-ic.com/datasheet/index.mvp/id/5943 Signed-off-by: Zhang Jiejing Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 112ec55..6b2d441 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -248,6 +248,18 @@ config TOUCHSCREEN_LPC32XX To compile this driver as a module, choose M here: the module will be called lpc32xx_ts. +config TOUCHSCREEN_MAX11801 + tristate "MAX11801 based touchscreens" + depends on I2C + help + Say Y here if you have a MAX11801 based touchscreen + controller. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called max11801_ts. + config TOUCHSCREEN_MCS5000 tristate "MELFAS MCS-5000 touchscreen" depends on I2C diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index ca94098..282d6f7 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o obj-$(CONFIG_TOUCHSCREEN_INTEL_MID) += intel-mid-touch.o obj-$(CONFIG_TOUCHSCREEN_LPC32XX) += lpc32xx_ts.o +obj-$(CONFIG_TOUCHSCREEN_MAX11801) += max11801_ts.o obj-$(CONFIG_TOUCHSCREEN_MC13783) += mc13783_ts.o obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c new file mode 100644 index 0000000..4f2713d --- /dev/null +++ b/drivers/input/touchscreen/max11801_ts.c @@ -0,0 +1,272 @@ +/* + * Driver for MAXI MAX11801 - A Resistive touch screen controller with + * i2c interface + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. + * Author: Zhang Jiejing + * + * Based on mcs5000_ts.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ + +/* + * This driver aims to support the series of MAXI touch chips max11801 + * through max11803. The main difference between these 4 chips can be + * found in the table below: + * ----------------------------------------------------- + * | CHIP | AUTO MODE SUPPORT(FIFO) | INTERFACE | + * |----------------------------------------------------| + * | max11800 | YES | SPI | + * | max11801 | YES | I2C | + * | max11802 | NO | SPI | + * | max11803 | NO | I2C | + * ------------------------------------------------------ + * + * Currently, this driver only supports max11801. + * + * Data Sheet: + * http://www.maxim-ic.com/datasheet/index.mvp/id/5943 + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Register Address define */ +#define GENERNAL_STATUS_REG 0x00 +#define GENERNAL_CONF_REG 0x01 +#define MESURE_RES_CONF_REG 0x02 +#define MESURE_AVER_CONF_REG 0x03 +#define ADC_SAMPLE_TIME_CONF_REG 0x04 +#define PANEL_SETUPTIME_CONF_REG 0x05 +#define DELAY_CONVERSION_CONF_REG 0x06 +#define TOUCH_DETECT_PULLUP_CONF_REG 0x07 +#define AUTO_MODE_TIME_CONF_REG 0x08 /* only for max11800/max11801 */ +#define APERTURE_CONF_REG 0x09 /* only for max11800/max11801 */ +#define AUX_MESURE_CONF_REG 0x0a +#define OP_MODE_CONF_REG 0x0b + +/* FIFO is found only in max11800 and max11801 */ +#define FIFO_RD_CMD (0x50 << 1) +#define MAX11801_FIFO_INT (1 << 2) +#define MAX11801_FIFO_OVERFLOW (1 << 3) + +#define XY_BUFSIZE 4 +#define XY_BUF_OFFSET 4 + +#define MAX11801_MAX_X 0xfff +#define MAX11801_MAX_Y 0xfff + +#define MEASURE_TAG_OFFSET 2 +#define MEASURE_TAG_MASK (3 << MEASURE_TAG_OFFSET) +#define EVENT_TAG_OFFSET 0 +#define EVENT_TAG_MASK (3 << EVENT_TAG_OFFSET) +#define MEASURE_X_TAG (0 << MEASURE_TAG_OFFSET) +#define MEASURE_Y_TAG (1 << MEASURE_TAG_OFFSET) + +/* These are the state of touch event state machine */ +enum { + EVENT_INIT, + EVENT_MIDDLE, + EVENT_RELEASE, + EVENT_FIFO_END +}; + +struct max11801_data { + struct i2c_client *client; + struct input_dev *input_dev; +}; + +static u8 read_register(struct i2c_client *client, int addr) +{ + /* XXX: The chip ignores LSB of register address */ + return i2c_smbus_read_byte_data(client, addr << 1); +} + +static int max11801_write_reg(struct i2c_client *client, int addr, int data) +{ + /* XXX: The chip ignores LSB of register address */ + return i2c_smbus_write_byte_data(client, addr << 1, data); +} + +static irqreturn_t max11801_ts_interrupt(int irq, void *dev_id) +{ + struct max11801_data *data = dev_id; + struct i2c_client *client = data->client; + int status, i, ret; + u8 buf[XY_BUFSIZE]; + int x = -1; + int y = -1; + + status = read_register(data->client, GENERNAL_STATUS_REG); + + if (status & (MAX11801_FIFO_INT | MAX11801_FIFO_OVERFLOW)) { + status = read_register(data->client, GENERNAL_STATUS_REG); + + ret = i2c_smbus_read_i2c_block_data(client, FIFO_RD_CMD, + XY_BUFSIZE, buf); + + /* + * We should get 4 bytes buffer that contains X,Y + * and event tag + */ + if (ret < XY_BUFSIZE) + goto out; + + for (i = 0; i < XY_BUFSIZE; i += XY_BUFSIZE / 2) { + if ((buf[i + 1] & MEASURE_TAG_MASK) == MEASURE_X_TAG) + x = (buf[i] << XY_BUF_OFFSET) + + (buf[i + 1] >> XY_BUF_OFFSET); + else if ((buf[i + 1] & MEASURE_TAG_MASK) == MEASURE_Y_TAG) + y = (buf[i] << XY_BUF_OFFSET) + + (buf[i + 1] >> XY_BUF_OFFSET); + } + + if ((buf[1] & EVENT_TAG_MASK) != (buf[3] & EVENT_TAG_MASK)) + goto out; + + switch (buf[1] & EVENT_TAG_MASK) { + case EVENT_INIT: + /* fall through */ + case EVENT_MIDDLE: + input_report_abs(data->input_dev, ABS_X, x); + input_report_abs(data->input_dev, ABS_Y, y); + input_event(data->input_dev, EV_KEY, BTN_TOUCH, 1); + input_sync(data->input_dev); + break; + + case EVENT_RELEASE: + input_event(data->input_dev, EV_KEY, BTN_TOUCH, 0); + input_sync(data->input_dev); + break; + + case EVENT_FIFO_END: + break; + } + } +out: + return IRQ_HANDLED; +} + +static void __devinit max11801_ts_phy_init(struct max11801_data *data) +{ + struct i2c_client *client = data->client; + + /* Average X,Y, take 16 samples, average eight media sample */ + max11801_write_reg(client, MESURE_AVER_CONF_REG, 0xff); + /* X,Y panel setup time set to 20us */ + max11801_write_reg(client, PANEL_SETUPTIME_CONF_REG, 0x11); + /* Rough pullup time (2uS), Fine pullup time (10us) */ + max11801_write_reg(client, TOUCH_DETECT_PULLUP_CONF_REG, 0x10); + /* Auto mode init period = 5ms , scan period = 5ms*/ + max11801_write_reg(client, AUTO_MODE_TIME_CONF_REG, 0xaa); + /* Aperture X,Y set to +- 4LSB */ + max11801_write_reg(client, APERTURE_CONF_REG, 0x33); + /* Enable Power, enable Automode, enable Aperture, enable Average X,Y */ + max11801_write_reg(client, OP_MODE_CONF_REG, 0x36); +} + +static int __devinit max11801_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct max11801_data *data; + struct input_dev *input_dev; + int error; + + data = kzalloc(sizeof(struct max11801_data), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!data || !input_dev) { + dev_err(&client->dev, "Failed to allocate memory\n"); + error = -ENOMEM; + goto err_free_mem; + } + + data->client = client; + data->input_dev = input_dev; + + input_dev->name = "max11801_ts"; + input_dev->id.bustype = BUS_I2C; + input_dev->dev.parent = &client->dev; + + __set_bit(EV_ABS, input_dev->evbit); + __set_bit(EV_KEY, input_dev->evbit); + __set_bit(BTN_TOUCH, input_dev->keybit); + input_set_abs_params(input_dev, ABS_X, 0, MAX11801_MAX_X, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 0, MAX11801_MAX_Y, 0, 0); + input_set_drvdata(input_dev, data); + + max11801_ts_phy_init(data); + + error = request_threaded_irq(client->irq, NULL, max11801_ts_interrupt, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "max11801_ts", data); + if (error) { + dev_err(&client->dev, "Failed to register interrupt\n"); + goto err_free_mem; + } + + error = input_register_device(data->input_dev); + if (error) + goto err_free_irq; + + i2c_set_clientdata(client, data); + return 0; + +err_free_irq: + free_irq(client->irq, data); +err_free_mem: + input_free_device(input_dev); + kfree(data); + return error; +} + +static __devexit int max11801_ts_remove(struct i2c_client *client) +{ + struct max11801_data *data = i2c_get_clientdata(client); + + free_irq(client->irq, data); + input_unregister_device(data->input_dev); + kfree(data); + + return 0; +} + +static const struct i2c_device_id max11801_ts_id[] = { + {"max11801", 0}, + { } +}; +MODULE_DEVICE_TABLE(i2c, max11801_ts_id); + +static struct i2c_driver max11801_ts_driver = { + .driver = { + .name = "max11801_ts", + .owner = THIS_MODULE, + }, + .id_table = max11801_ts_id, + .probe = max11801_ts_probe, + .remove = __devexit_p(max11801_ts_remove), +}; + +static int __init max11801_ts_init(void) +{ + return i2c_add_driver(&max11801_ts_driver); +} + +static void __exit max11801_ts_exit(void) +{ + i2c_del_driver(&max11801_ts_driver); +} + +module_init(max11801_ts_init); +module_exit(max11801_ts_exit); + +MODULE_AUTHOR("Zhang Jiejing "); +MODULE_DESCRIPTION("Touchscreen driver for MAXI MAX11801 controller"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 910d80513056589d3b12b3aad8598d19e0a0a5bd Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 12 Apr 2011 23:14:38 -0700 Subject: Input: atmel_mxt_ts - support 12bit resolution Atmel touchscreen chip can support 12bit resolution and this patch modifies to get maximum x and y size from platform data. Signed-off-by: Joonyoung Shim Acked-by: Iiro Valkonen Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 4012436..a97905a 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -196,9 +196,12 @@ #define MXT_PRESS (1 << 6) #define MXT_DETECT (1 << 7) +/* Touch orient bits */ +#define MXT_XY_SWITCH (1 << 0) +#define MXT_X_INVERT (1 << 1) +#define MXT_Y_INVERT (1 << 2) + /* Touchscreen absolute values */ -#define MXT_MAX_XC 0x3ff -#define MXT_MAX_YC 0x3ff #define MXT_MAX_AREA 0xff #define MXT_MAX_FINGER 10 @@ -246,6 +249,8 @@ struct mxt_data { struct mxt_info info; struct mxt_finger finger[MXT_MAX_FINGER]; unsigned int irq; + unsigned int max_x; + unsigned int max_y; }; static bool mxt_object_readable(unsigned int type) @@ -549,8 +554,13 @@ static void mxt_input_touchevent(struct mxt_data *data, if (!(status & (MXT_PRESS | MXT_MOVE))) return; - x = (message->message[1] << 2) | ((message->message[3] & ~0x3f) >> 6); - y = (message->message[2] << 2) | ((message->message[3] & ~0xf3) >> 2); + x = (message->message[1] << 4) | ((message->message[3] >> 4) & 0xf); + y = (message->message[2] << 4) | ((message->message[3] & 0xf)); + if (data->max_x < 1024) + x = x >> 2; + if (data->max_y < 1024) + y = y >> 2; + area = message->message[4]; dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id, @@ -845,6 +855,20 @@ static int mxt_initialize(struct mxt_data *data) return 0; } +static void mxt_calc_resolution(struct mxt_data *data) +{ + unsigned int max_x = data->pdata->x_size - 1; + unsigned int max_y = data->pdata->y_size - 1; + + if (data->pdata->orient & MXT_XY_SWITCH) { + data->max_x = max_y; + data->max_y = max_x; + } else { + data->max_x = max_x; + data->max_y = max_y; + } +} + static ssize_t mxt_object_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1052,31 +1076,32 @@ static int __devinit mxt_probe(struct i2c_client *client, input_dev->open = mxt_input_open; input_dev->close = mxt_input_close; + data->client = client; + data->input_dev = input_dev; + data->pdata = pdata; + data->irq = client->irq; + + mxt_calc_resolution(data); + __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); /* For single touch */ input_set_abs_params(input_dev, ABS_X, - 0, MXT_MAX_XC, 0, 0); + 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_Y, - 0, MXT_MAX_YC, 0, 0); + 0, data->max_y, 0, 0); /* For multi touch */ input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, MXT_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, - 0, MXT_MAX_XC, 0, 0); + 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, - 0, MXT_MAX_YC, 0, 0); + 0, data->max_y, 0, 0); input_set_drvdata(input_dev, data); - - data->client = client; - data->input_dev = input_dev; - data->pdata = pdata; - data->irq = client->irq; - i2c_set_clientdata(client, data); error = mxt_initialize(data); -- cgit v0.10.2 From 08960a070add74cda8c968b8ace5418a5acf17c3 Mon Sep 17 00:00:00 2001 From: Iiro Valkonen Date: Tue, 12 Apr 2011 23:16:40 -0700 Subject: Input: atmel_mxt_ts - make CHG line high after enabling interrupts Make the CHG line (interrupt line) go high after the interrupts have been enabled to make sure we don't miss the falling edge. Signed-off-by: Iiro Valkonen Acked-by: Joonyoung Shim Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index a97905a..2accf1d 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -814,10 +814,6 @@ static int mxt_initialize(struct mxt_data *data) if (error) return error; - error = mxt_make_highchg(data); - if (error) - return error; - mxt_handle_pdata(data); /* Backup to memory */ @@ -1005,6 +1001,10 @@ static ssize_t mxt_update_fw_store(struct device *dev, enable_irq(data->irq); + error = mxt_make_highchg(data); + if (error) + return error; + return count; } @@ -1115,6 +1115,10 @@ static int __devinit mxt_probe(struct i2c_client *client, goto err_free_object; } + error = mxt_make_highchg(data); + if (error) + goto err_free_irq; + error = input_register_device(input_dev); if (error) goto err_free_irq; -- cgit v0.10.2 From 8b86c1c28f569301aa1a113a060f9ed803300903 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 12 Apr 2011 23:18:59 -0700 Subject: Input: atmel_mxt_ts - convert to MT protocol B Atmel touchscreen chips can use MT protocol B because they can assign unique id to ABS_MT_TRACKING_ID from finger id provided by hardware. Signed-off-by: Joonyoung Shim Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 2accf1d..1e61387 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -504,19 +504,21 @@ static void mxt_input_report(struct mxt_data *data, int single_id) if (!finger[id].status) continue; - input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, - finger[id].status != MXT_RELEASE ? - finger[id].area : 0); - input_report_abs(input_dev, ABS_MT_POSITION_X, - finger[id].x); - input_report_abs(input_dev, ABS_MT_POSITION_Y, - finger[id].y); - input_mt_sync(input_dev); + input_mt_slot(input_dev, id); + input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, + finger[id].status != MXT_RELEASE); - if (finger[id].status == MXT_RELEASE) - finger[id].status = 0; - else + if (finger[id].status != MXT_RELEASE) { finger_num++; + input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, + finger[id].area); + input_report_abs(input_dev, ABS_MT_POSITION_X, + finger[id].x); + input_report_abs(input_dev, ABS_MT_POSITION_Y, + finger[id].y); + } else { + finger[id].status = 0; + } } input_report_key(input_dev, BTN_TOUCH, finger_num > 0); @@ -1094,6 +1096,7 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_y, 0, 0); /* For multi touch */ + input_mt_init_slots(input_dev, MXT_MAX_FINGER); input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, MXT_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, -- cgit v0.10.2 From e10b376e98332edcc2530aaed384a7e248477052 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 21 Mar 2011 16:50:18 +0200 Subject: UBI: make the control character device non-seekable This patch makes the UBI control device (/dev/ubi_ctrl) non-seekable. The seek operation does is not applicable to this file, so it is cleaner to explicitly return error (which the added 'no_llseek()') does than trying to change the position (which the removed 'default_llseek()' does). This is an API break, but the only known user of this interface is mtd-utils which does not need the seeking functionality. And any app which relies on this is broken, but I'm not aware of such apps. Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index af9fb0f..9a17032 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -1100,5 +1100,5 @@ const struct file_operations ubi_ctrl_cdev_operations = { .owner = THIS_MODULE, .unlocked_ioctl = ctrl_cdev_ioctl, .compat_ioctl = ctrl_cdev_compat_ioctl, - .llseek = noop_llseek, + .llseek = no_llseek, }; -- cgit v0.10.2 From 6748482f4153fc0e095aa3dc831d5edac5656a80 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 15 Mar 2011 16:25:38 +0200 Subject: UBI: re-name set volume properties ioctl Rename the ioctl which sets volume properties from 'UBI_IOCSETPROP' to 'UBI_IOCSETVOLPROP' to reflect the fact that this ioctl is about volume properties, not device properties. This is also consistent with the other volume ioctl name - 'UBI_IOCVOLUP'. The main motivation for the re-name, however, is that we are going to introduce the per-UBI device "set properties" ioctl, so we need good and logical naming. At the same time, re-name the "set volume properties request" data structure from 'struct ubi_set_prop_req' to 'struct ubi_set_vol_prop_req'. And re-name 'UBI_PROP_DIRECT_WRITE' to 'UBI_VOL_PROP_DIRECT_WRITE'. Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 9a17032..4119cb8 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -561,18 +561,18 @@ static long vol_cdev_ioctl(struct file *file, unsigned int cmd, } /* Set volume property command */ - case UBI_IOCSETPROP: + case UBI_IOCSETVOLPROP: { - struct ubi_set_prop_req req; + struct ubi_set_vol_prop_req req; err = copy_from_user(&req, argp, - sizeof(struct ubi_set_prop_req)); + sizeof(struct ubi_set_vol_prop_req)); if (err) { err = -EFAULT; break; } switch (req.property) { - case UBI_PROP_DIRECT_WRITE: + case UBI_VOL_PROP_DIRECT_WRITE: mutex_lock(&ubi->device_mutex); desc->vol->direct_writes = !!req.value; mutex_unlock(&ubi->device_mutex); diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index c0d47ad..8d8484b 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -131,7 +131,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~ * * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be - * used. A pointer to a &struct ubi_set_prop_req object is expected to be + * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be * passed. The object describes which property should be set, and to which value * it should be set. */ @@ -186,7 +186,8 @@ /* Check if LEB is mapped command */ #define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, __s32) /* Set an UBI volume property */ -#define UBI_IOCSETPROP _IOW(UBI_VOL_IOC_MAGIC, 6, struct ubi_set_prop_req) +#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \ + struct ubi_set_vol_prop_req) /* Maximum MTD device name length supported by UBI */ #define MAX_UBI_MTD_NAME_LEN 127 @@ -225,11 +226,11 @@ enum { /* * UBI set property ioctl constants * - * @UBI_PROP_DIRECT_WRITE: allow / disallow user to directly write and - * erase individual eraseblocks on dynamic volumes + * @UBI_VOL_PROP_DIRECT_WRITE: allow / disallow user to directly write and + * erase individual eraseblocks on dynamic volumes */ enum { - UBI_PROP_DIRECT_WRITE = 1, + UBI_VOL_PROP_DIRECT_WRITE = 1, }; /** @@ -397,13 +398,13 @@ struct ubi_map_req { /** - * struct ubi_set_prop_req - a data structure used to set an ubi volume - * property. - * @property: property to set (%UBI_PROP_DIRECT_WRITE) + * struct ubi_set_vol_prop_req - a data structure used to set an ubi volume + * property. + * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) * @padding: reserved for future, not used, has to be zeroed * @value: value to set */ -struct ubi_set_prop_req { +struct ubi_set_vol_prop_req { __u8 property; __u8 padding[7]; __u64 value; -- cgit v0.10.2 From e8e088de305d7cc00b2c8b2a857ceb62d5fa68d3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 15 Mar 2011 16:37:57 +0200 Subject: UBI: cleanup comments around volume properties Cleanup and improve commentaries around the "set volume properties" ioctl, make a simple indentation fix as well. Signed-off-by: Artem Bityutskiy diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index 8d8484b..e70bd34 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -224,13 +224,14 @@ enum { }; /* - * UBI set property ioctl constants + * UBI set volume property ioctl constants. * - * @UBI_VOL_PROP_DIRECT_WRITE: allow / disallow user to directly write and - * erase individual eraseblocks on dynamic volumes + * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0) + * user to directly write and erase individual + * eraseblocks on dynamic volumes */ enum { - UBI_VOL_PROP_DIRECT_WRITE = 1, + UBI_VOL_PROP_DIRECT_WRITE = 1, }; /** @@ -398,7 +399,7 @@ struct ubi_map_req { /** - * struct ubi_set_vol_prop_req - a data structure used to set an ubi volume + * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume * property. * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) * @padding: reserved for future, not used, has to be zeroed -- cgit v0.10.2 From 3627924acf70a9a26587712e4888ee7144489678 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 28 Mar 2011 10:04:09 +0300 Subject: UBI: use __packed instead of __attribute__((packed)) There was an attempt to standartize various "__attribute__" and other macros in order to have potentially portable and more consistent code, see commit 82ddcb040570411fc2d421d96b3e69711c670328. Note, that commit refers Rober Love's blog post, but the URL is broken, the valid URL is: http://blog.rlove.org/2005/10/with-little-help-from-your-compiler.html Moreover, nowadays checkpatch.pl warns about using __attribute__((packed)): "WARNING: __packed is preferred over __attribute__((packed))" It is not a big deal for UBI to use __packed, so let's do it. Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index 503ea9b..6fb8ec2 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -164,7 +164,7 @@ struct ubi_ec_hdr { __be32 image_seq; __u8 padding2[32]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_vid_hdr - on-flash UBI volume identifier header. @@ -292,7 +292,7 @@ struct ubi_vid_hdr { __be64 sqnum; __u8 padding3[12]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /* Internal UBI volumes count */ #define UBI_INT_VOL_COUNT 1 @@ -373,6 +373,6 @@ struct ubi_vtbl_record { __u8 flags; __u8 padding[23]; __be32 crc; -} __attribute__ ((packed)); +} __packed; #endif /* !__UBI_MEDIA_H__ */ diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index e70bd34..a390342 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -310,7 +310,7 @@ struct ubi_mkvol_req { __s16 name_len; __s8 padding2[4]; char name[UBI_MAX_VOLUME_NAME + 1]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_rsvol_req - a data structure used in volume re-size requests. @@ -326,7 +326,7 @@ struct ubi_mkvol_req { struct ubi_rsvol_req { __s64 bytes; __s32 vol_id; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_rnvol_req - volumes re-name request. @@ -368,7 +368,7 @@ struct ubi_rnvol_req { __s8 padding2[2]; char name[UBI_MAX_VOLUME_NAME + 1]; } ents[UBI_MAX_RNVOL]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_leb_change_req - a data structure used in atomic LEB change @@ -383,7 +383,7 @@ struct ubi_leb_change_req { __s32 bytes; __s8 dtype; __s8 padding[7]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_map_req - a data structure used in map LEB requests. @@ -395,7 +395,7 @@ struct ubi_map_req { __s32 lnum; __s8 dtype; __s8 padding[3]; -} __attribute__ ((packed)); +} __packed; /** @@ -409,6 +409,6 @@ struct ubi_set_vol_prop_req { __u8 property; __u8 padding[7]; __u64 value; -} __attribute__ ((packed)); +} __packed; #endif /* __UBI_USER_H__ */ -- cgit v0.10.2 From feddbb34ebd75e9b6bf573b852079e327a88c07a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 28 Mar 2011 10:12:25 +0300 Subject: UBI: fix minor stylistic issues Fix checkpatch.pl errors and warnings: * space before tab * line over 80 characters * include linux/ioctl.h instead of asm/ioctl.h Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 4119cb8..191f3bb 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -115,7 +115,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file) mode = UBI_READONLY; dbg_gen("open device %d, volume %d, mode %d", - ubi_num, vol_id, mode); + ubi_num, vol_id, mode); desc = ubi_open_volume(ubi_num, vol_id, mode); if (IS_ERR(desc)) @@ -158,7 +158,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) loff_t new_offset; if (vol->updating) { - /* Update is in progress, seeking is prohibited */ + /* Update is in progress, seeking is prohibited */ dbg_err("updating"); return -EBUSY; } diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index d4d07e5..0cd5bea 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -75,15 +75,15 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { printk(KERN_DEBUG "Volume identifier header dump:\n"); printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); - printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); - printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); - printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); - printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); - printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); - printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); - printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); - printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); - printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); + printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); + printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); + printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); + printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); + printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); + printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); + printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); + printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); printk(KERN_DEBUG "\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index e347cc4..d58ceb1 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -189,8 +189,8 @@ retry: } if (retries++ < UBI_IO_RETRIES) { - dbg_io("error %d%s while reading %d bytes from PEB %d:%d," - " read only %zd bytes, retry", + dbg_io("error %d%s while reading %d bytes from PEB " + "%d:%d, read only %zd bytes, retry", err, errstr, len, pnum, offset, read); yield(); goto retry; diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index d2d12ab..2135a53 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -1103,7 +1103,7 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) * otherwise, only print a warning. */ if (si->corr_peb_count >= max_corr) { - ubi_err("too many corrupted PEBs, refusing this device"); + ubi_err("too many corrupted PEBs, refusing"); return -EINVAL; } } diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index f1be8b7..c6c2229 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -341,8 +341,8 @@ struct ubi_wl_entry; * protected from the wear-leveling worker) * @pq_head: protection queue head * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, - * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, - * @erroneous, and @erroneous_peb_count fields + * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, + * @erroneous, and @erroneous_peb_count fields * @move_mutex: serializes eraseblock moves * @work_sem: synchronizes the WL worker with use tasks * @wl_scheduled: non-zero if the wear-leveling was scheduled diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b4cf57d..ff2c495 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1570,7 +1570,8 @@ void ubi_wl_close(struct ubi_device *ubi) * @ec: the erase counter to check * * This function returns zero if the erase counter of physical eraseblock @pnum - * is equivalent to @ec, and a negative error code if not or if an error occurred. + * is equivalent to @ec, and a negative error code if not or if an error + * occurred. */ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) { diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 84854ed..15da0e9 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -21,7 +21,7 @@ #ifndef __LINUX_UBI_H__ #define __LINUX_UBI_H__ -#include +#include #include #include @@ -87,7 +87,7 @@ enum { * physical eraseblock size and on how much bytes UBI headers consume. But * because of the volume alignment (@alignment), the usable size of logical * eraseblocks if a volume may be less. The following equation is true: - * @usable_leb_size = LEB size - (LEB size mod @alignment), + * @usable_leb_size = LEB size - (LEB size mod @alignment), * where LEB size is the logical eraseblock size defined by the UBI device. * * The alignment is multiple to the minimal flash input/output unit size or %1 diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index a390342..3c41097 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -406,9 +406,9 @@ struct ubi_map_req { * @value: value to set */ struct ubi_set_vol_prop_req { - __u8 property; - __u8 padding[7]; - __u64 value; + __u8 property; + __u8 padding[7]; + __u64 value; } __packed; #endif /* __UBI_USER_H__ */ -- cgit v0.10.2 From 1426414431a8d37a6e631e0b5e2ad6186b81876a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 14 Apr 2011 11:36:31 +0300 Subject: UBI: fix typo in a message When a PEB passes the torture test, UBI prints "do not mark it a bad", but should print "do not mark it as bad". This patch corrects the typo. Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index d58ceb1..8c1b1c7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -465,7 +465,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } err = patt_count; - ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); + ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum); out: mutex_unlock(&ubi->buf_mutex); -- cgit v0.10.2 From 4d05a28db56225bbab5e1321d818f318e92a4657 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 18:25:47 -0400 Subject: xen: add blkback support Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/Makefile diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b..fb1af62 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,6 +37,14 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND && BLOCK + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f420f1f..29c0a41 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile new file mode 100644 index 0000000..8bab63d --- /dev/null +++ b/drivers/xen/blkback/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o + +blkbk-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c new file mode 100644 index 0000000..5b8d50e --- /dev/null +++ b/drivers/xen/blkback/blkback.c @@ -0,0 +1,656 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/blkif/frontend + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define BLKBACK_INVALID_HANDLE (~0) + +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static void unplug_queue(blkif_t *blkif) +{ + if (blkif->plug == NULL) + return; + if (blkif->plug->unplug_fn) + blkif->plug->unplug_fn(blkif->plug); + blk_put_queue(blkif->plug); + blkif->plug = NULL; +} + +static void plug_queue(blkif_t *blkif, struct block_device *bdev) +{ + request_queue_t *q = bdev_get_queue(bdev); + + if (q == blkif->plug) + return; + unplug_queue(blkif); + blk_get_queue(q); + blkif->plug = q; +} + +static void fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + unplug_queue(blkif); + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + +static void __end_block_io_op(pending_req_t *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + if (atomic_dec_and_test(&pending_req->pendcnt)) { + fast_flush_area(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +static int end_block_io_op(struct bio *bio, unsigned int done, int error) +{ + if (bio->bi_size != 0) + return 1; + __end_block_io_op(bio->bi_private, error); + bio_put(bio); + return error; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + blkif_request_t req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct phys_req preq; + struct { + unsigned long buf; unsigned int nsec; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + int ret, i; + int operation; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + uint32_t flags; + + seg[i].nsec = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->seg[i].last_sect < req->seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + flags = GNTMAP_host_map; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + seg[i].buf = map[i].dev_bus_addr | + (req->seg[i].first_sect << 9); + } + + if (ret) + goto fail_flush; + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_flush; + } + + plug_queue(blkif, preq.bdev); + atomic_set(&pending_req->pendcnt, 1); + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_put_bio; + } + + while ((bio == NULL) || + (bio_add_page(bio, + virt_to_page(vaddr(pending_req, i)), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + if (bio) { + atomic_inc(&pending_req->pendcnt); + submit_bio(operation, bio); + } + + bio = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + submit_bio(operation, bio); + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + fast_flush_area(pending_req); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + __end_block_io_op(pending_req, -EINVAL); + if (bio) + bio_put(bio); + unplug_queue(blkif); + msleep(1); /* back off a bit */ + return; +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st) +{ + blkif_response_t resp; + unsigned long flags; + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + + blkif_interface_init(); + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); + + blkif_xenbus_init(); + + return 0; + + out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h new file mode 100644 index 0000000..422e935 --- /dev/null +++ b/drivers/xen/blkback/common.h @@ -0,0 +1,139 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + blkif_back_rings_t blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + request_queue_t *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +} blkif_t; + +blkif_t *blkif_alloc(domid_t domid); +void blkif_disconnect(blkif_t *blkif); +void blkif_free(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); + +void blkif_interface_init(void); + +void blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c new file mode 100644 index 0000000..81821bd --- /dev/null +++ b/drivers/xen/blkback/interface.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include + +static kmem_cache_t *blkif_cachep; + +blkif_t *blkif_alloc(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) + { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(blkif_t *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(blkif_t *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); +} diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c new file mode 100644 index 0000000..1fb31d0 --- /dev/null +++ b/drivers/xen/blkback/vbd.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * blkback/vbd.c + * + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly?VDISK_READONLY:0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_hardsect_size(vbd->bdev); +} + +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = open_by_devnum(vbd->pdevice, + vbd->readonly ? FMODE_READ : FMODE_WRITE); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c new file mode 100644 index 0000000..80d9aa6 --- /dev/null +++ b/drivers/xen/blkback/xenbus.c @@ -0,0 +1,541 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(blkif_t *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/**************************************************************** + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev->dev.driver_data; \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->dev.driver_data; + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev->dev.driver_data = NULL; + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev->dev.driver_data = be; + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_path2(dev, dev->nodename, "physical-device", + &be->backend_watch, backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle = simple_strtoul(p, NULL, 0); + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev->dev.driver_data; + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) + strcpy(protocol, "unspecified, assuming native"); + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +void blkif_xenbus_init(void) +{ + xenbus_register_backend(&blkback); +} diff --git a/include/xen/blkif.h b/include/xen/blkif.h new file mode 100644 index 0000000..3d56b75 --- /dev/null +++ b/include/xen/blkif.h @@ -0,0 +1,123 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include +#include +#include + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +union blkif_back_rings { + blkif_back_ring_t native; + blkif_common_back_ring_t common; + blkif_x86_32_back_ring_t x86_32; + blkif_x86_64_back_ring_t x86_64; +}; +typedef union blkif_back_rings blkif_back_rings_t; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + +static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +#endif /* __XEN_BLKIF_H__ */ -- cgit v0.10.2 From 8812293323a79134e06c3bf82eba1e217d23382e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:51 -0800 Subject: xen-blkback-porting diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 5b8d50e..43fd070 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -39,8 +39,12 @@ #include #include #include +#include #include -#include +#include +#include +#include +#include #include "common.h" /* @@ -106,7 +110,7 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(blkif_t *blkif); static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req); static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); @@ -153,7 +157,7 @@ static void unplug_queue(blkif_t *blkif) static void plug_queue(blkif_t *blkif, struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); if (q == blkif->plug) return; @@ -268,13 +272,10 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } @@ -288,7 +289,7 @@ static void blkif_notify_work(blkif_t *blkif) wake_up(&blkif->wq); } -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t blkif_be_int(int irq, void *dev_id) { blkif_notify_work(dev_id); return IRQ_HANDLED; @@ -302,8 +303,8 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) static int do_block_io_op(blkif_t *blkif) { - blkif_back_rings_t *blk_rings = &blkif->blk_rings; - blkif_request_t req; + union blkif_back_rings *blk_rings = &blkif->blk_rings; + struct blkif_request req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -379,7 +380,7 @@ static int do_block_io_op(blkif_t *blkif) } static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); @@ -560,9 +561,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { - blkif_response_t resp; + struct blkif_response resp; unsigned long flags; - blkif_back_rings_t *blk_rings = &blkif->blk_rings; + union blkif_back_rings *blk_rings = &blkif->blk_rings; int more_to_do = 0; int notify; @@ -614,7 +615,8 @@ static int __init blkif_init(void) { int i, mmap_pages; - if (!is_running_on_xen()) + printk(KERN_CRIT "***blkif_init\n"); + if (!xen_pv_domain()) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 422e935..1c422b0 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -40,8 +40,7 @@ #include #include #include -#include -#include +#include #include #define DPRINTK(_f, _a...) \ @@ -66,7 +65,7 @@ typedef struct blkif_st { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; - blkif_back_rings_t blk_rings; + union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ struct vbd vbd; @@ -79,7 +78,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; unsigned int waiting_reqs; - request_queue_t *plug; + struct request_queue *plug; /* statistics */ unsigned long st_print; @@ -130,7 +129,7 @@ void blkif_interface_init(void); void blkif_xenbus_init(void); -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 81821bd..c6c3e14 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -31,10 +31,11 @@ */ #include "common.h" -#include +#include +#include #include -static kmem_cache_t *blkif_cachep; +static struct kmem_cache *blkif_cachep; blkif_t *blkif_alloc(domid_t domid) { @@ -107,22 +108,22 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) switch (blkif->blk_protocol) { case BLKIF_PROTOCOL_NATIVE: { - blkif_sring_t *sring; - sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_32: { - blkif_x86_32_sring_t *sring_x86_32; - sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_64: { - blkif_x86_64_sring_t *sring_x86_64; - sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); break; } @@ -177,5 +178,5 @@ void blkif_free(blkif_t *blkif) void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); } diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 1fb31d0..7e9a1cd 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -33,7 +33,7 @@ #include "common.h" #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -94,7 +94,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, void vbd_free(struct vbd *vbd) { if (vbd->bdev) - blkdev_put(vbd->bdev); + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); vbd->bdev = NULL; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 80d9aa6..650f4b3 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -238,8 +238,8 @@ static int blkback_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_path2(dev, dev->nodename, "physical-device", - &be->backend_watch, backend_changed); + err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; @@ -537,5 +537,6 @@ static struct xenbus_driver blkback = { void blkif_xenbus_init(void) { - xenbus_register_backend(&blkback); + /* XXX must_check */ + (void)xenbus_register_backend(&blkback); } diff --git a/include/xen/blkif.h b/include/xen/blkif.h index 3d56b75..d274280 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -77,12 +77,11 @@ DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); union blkif_back_rings { - blkif_back_ring_t native; - blkif_common_back_ring_t common; - blkif_x86_32_back_ring_t x86_32; - blkif_x86_64_back_ring_t x86_64; + struct blkif_back_ring native; + struct blkif_common_back_ring common; + struct blkif_x86_32_back_ring x86_32; + struct blkif_x86_64_back_ring x86_64; }; -typedef union blkif_back_rings blkif_back_rings_t; enum blkif_protocol { BLKIF_PROTOCOL_NATIVE = 1, @@ -90,7 +89,7 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; @@ -105,7 +104,7 @@ static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_reque dst->seg[i] = src->seg[i]; } -static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; -- cgit v0.10.2 From dd3672424caa7b302433635831afbb6787476b96 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 16:39:58 -0800 Subject: xen/blkback: don't include xen/evtchn.h It's a user-mode header for users of /dev/evtchn Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 1c422b0..57b7825 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 8270b45bc8a45eef4a224bd256bd0997d4fd857e Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Fri, 6 Mar 2009 08:29:15 +0000 Subject: blkback: Fix potential resource leak. diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 43fd070..8d988f4 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -318,14 +318,14 @@ static int do_block_io_op(blkif_t *blkif) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; - pending_req = alloc_req(); - if (NULL == pending_req) { - blkif->st_oo_req++; + if (kthread_should_stop()) { more_to_do = 1; break; } - if (kthread_should_stop()) { + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; more_to_do = 1; break; } -- cgit v0.10.2 From 690f1b63b2db88330834d8482f3b125990c8e609 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Mar 2009 23:34:19 -0700 Subject: block: export blk_get/put_queue for blkback Impact: build fix I'm not sure if blkback should be using these functions, but in the meantime export them to allow blkback to be a module. Signed-off-by: Jeremy Fitzhardinge diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc..9b60e69 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,6 +351,7 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } +EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -572,6 +573,7 @@ int blk_get_queue(struct request_queue *q) return 1; } +EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { -- cgit v0.10.2 From 05d43865ddc00bdb33d12c8e9d9f176ed5d3797b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 29 Jun 2009 14:58:45 -0700 Subject: xen/blkback: deal with hardsect_size to logical_block_size rename Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8d988f4..ac5af91 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -484,7 +484,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { DPRINTK("Misaligned I/O request from domain %d", blkif->domid); goto fail_put_bio; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 7e9a1cd..410c2ea 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd) unsigned long vbd_secsize(struct vbd *vbd) { - return bdev_hardsect_size(vbd->bdev); + return bdev_logical_block_size(vbd->bdev); } int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, -- cgit v0.10.2 From 0660c7dbf228a06345392a64ebb43734875a3b91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 9 Sep 2009 15:15:16 -0700 Subject: xen/blkback: remove spurious debug output noise Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index ac5af91..31458bd 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -615,7 +615,6 @@ static int __init blkif_init(void) { int i, mmap_pages; - printk(KERN_CRIT "***blkif_init\n"); if (!xen_pv_domain()) return -ENODEV; -- cgit v0.10.2 From afd91d07ff72919071e37086c0664384b3875688 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Sep 2009 14:12:37 -0700 Subject: xen/blkback: little cleanups Remove unused local prototype; group headers. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 31458bd..e9e3de1 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -40,6 +40,7 @@ #include #include #include + #include #include #include @@ -383,7 +384,6 @@ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) { - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; struct { -- cgit v0.10.2 From 8770b2683f9f98d4c1d6caf2e28f625592bba4f3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 8 Oct 2009 13:23:09 -0400 Subject: Fix compile warnings: ignoring return value of 'xenbus_register_backend' .. We neglect to check the return value of xenbus_register_backend and take actions when that fails. This patch fixes that and adds code to deal with those type of failures. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index e9e3de1..a2ac718 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -614,6 +614,7 @@ static void make_response(blkif_t *blkif, u64 id, static int __init blkif_init(void) { int i, mmap_pages; + int rc = 0; if (!xen_pv_domain()) return -ENODEV; @@ -626,13 +627,17 @@ static int __init blkif_init(void) mmap_pages, GFP_KERNEL); pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) + if (!pending_reqs || !pending_grant_handles || !pending_pages) { + rc = -ENOMEM; goto out_of_memory; + } for (i = 0; i < mmap_pages; i++) pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkif_interface_init(); + rc = blkif_interface_init(); + if (rc) + goto failed_init; memset(pending_reqs, 0, sizeof(pending_reqs)); INIT_LIST_HEAD(&pending_free); @@ -640,16 +645,19 @@ static int __init blkif_init(void) for (i = 0; i < blkif_reqs; i++) list_add_tail(&pending_reqs[i].free_list, &pending_free); - blkif_xenbus_init(); + rc = blkif_xenbus_init(); + if (rc) + goto failed_init; return 0; out_of_memory: + printk(KERN_ERR "%s: out of memory\n", __func__); + failed_init: kfree(pending_reqs); kfree(pending_grant_handles); free_empty_pages_and_pagevec(pending_pages, mmap_pages); - printk("%s: out of memory\n", __FUNCTION__); - return -ENOMEM; + return rc; } module_init(blkif_init); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 57b7825..aaf3648 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -124,9 +124,9 @@ struct phys_req { int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); -void blkif_interface_init(void); +int blkif_interface_init(void); -void blkif_xenbus_init(void); +int blkif_xenbus_init(void); irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index c6c3e14..e397a41 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -175,8 +175,12 @@ void blkif_free(blkif_t *blkif) kmem_cache_free(blkif_cachep, blkif); } -void __init blkif_interface_init(void) +int __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 650f4b3..04c0a12 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -535,8 +535,7 @@ static struct xenbus_driver blkback = { }; -void blkif_xenbus_init(void) +int blkif_xenbus_init(void) { - /* XXX must_check */ - (void)xenbus_register_backend(&blkback); + return xenbus_register_backend(&blkback); } -- cgit v0.10.2 From e7579a99b598f8e4a2b4df4854fbda2cc961bb02 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Dec 2009 21:56:18 +0000 Subject: xen: rename blkbk module xen-blkback. blkbk is rather generic for a modular distro style kernel. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile index 8bab63d..f1ae1ff 100644 --- a/drivers/xen/blkback/Makefile +++ b/drivers/xen/blkback/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -blkbk-y := blkback.o xenbus.o interface.o vbd.o +xen-blkback-y := blkback.o xenbus.o interface.o vbd.o -- cgit v0.10.2 From 5cf6e4f6f6d5549904db6ecb3ffd5b8f71f41250 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Feb 2010 16:07:31 -0800 Subject: xen/blkback: use drv_get/set_drvdata rather than directly accessing driver_data. Direct driver_data access is obsolete and will disappear. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 04c0a12..34f8e40 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -105,7 +105,7 @@ static void update_blkif_status(blkif_t *blkif) char *buf) \ { \ struct xenbus_device *dev = to_xenbus_device(_dev); \ - struct backend_info *be = dev->dev.driver_data; \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ \ return sprintf(buf, format, ##args); \ } \ @@ -169,7 +169,7 @@ void xenvbd_sysfs_delif(struct xenbus_device *dev) static int blkback_remove(struct xenbus_device *dev) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); DPRINTK(""); @@ -190,7 +190,7 @@ static int blkback_remove(struct xenbus_device *dev) } kfree(be); - dev->dev.driver_data = NULL; + dev_set_drvdata(&dev->dev, NULL); return 0; } @@ -225,7 +225,7 @@ static int blkback_probe(struct xenbus_device *dev, return -ENOMEM; } be->dev = dev; - dev->dev.driver_data = be; + dev_set_drvdata(&dev->dev, be); be->blkif = blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { @@ -348,7 +348,7 @@ static void backend_changed(struct xenbus_watch *watch, static void frontend_changed(struct xenbus_device *dev, enum xenbus_state frontend_state) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); int err; DPRINTK("%s", xenbus_strstate(frontend_state)); -- cgit v0.10.2 From 2ccbfe26c106a1a93a402567b7853c1484c4a0b0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 11 Mar 2010 13:39:50 -0800 Subject: xen/blkback: Propagate changed size of VBDs Support dynamic resizing of virtual block devices. This patch supports both file backed block devices as well as physical devices that can be dynamically resized on the host side. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a2ac718..6d89766 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -207,6 +207,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { blkif_t *blkif = arg; + struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -216,6 +217,8 @@ int blkif_schedule(void *arg) while (!kthread_should_stop()) { if (try_to_freeze()) continue; + if (unlikely(vbd->size != vbd_size(vbd))) + vbd_resize(blkif); wait_event_interruptible( blkif->wq, diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index aaf3648..cebcc2b 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -52,6 +52,7 @@ struct vbd { unsigned char type; /* VDISK_xxx */ u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; + sector_t size; /* Cached size parameter */ }; struct backend_info; @@ -98,6 +99,7 @@ blkif_t *blkif_alloc(domid_t domid); void blkif_disconnect(blkif_t *blkif); void blkif_free(blkif_t *blkif); int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); +void vbd_resize(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 410c2ea..0635c54 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -73,6 +73,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, } vbd->bdev = bdev; + vbd->size = vbd_size(vbd); if (vbd->bdev->bd_disk == NULL) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", @@ -116,3 +117,45 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) out: return rc; } + +void vbd_resize(blkif_t *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkif->be->dev; + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} -- cgit v0.10.2 From 98e036a356747cfaa225478b1e4875e190257b09 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Mar 2010 15:35:05 -0700 Subject: xen/blkback: add accessor for xenbus backend device Since backend_info is hidden away now. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index cebcc2b..0f91830 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -136,4 +136,6 @@ int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); +struct xenbus_device *blkback_xenbus(struct backend_info *be); + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 0635c54..943ec23 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -123,7 +123,7 @@ void vbd_resize(blkif_t *blkif) struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; - struct xenbus_device *dev = blkif->be->dev; + struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 34f8e40..c31e5c4 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -42,6 +42,11 @@ static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +struct xenbus_device *blkback_xenbus(struct backend_info *be) +{ + return be->dev; +} + static int blkback_name(blkif_t *blkif, char *buf) { char *devpath, *devname; -- cgit v0.10.2 From cbf462908c8080f47c2a3300072877589dd1275f Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 21 Jul 2010 12:41:45 -0700 Subject: xen/blkback: Flush blkback data when connecting. First cut at flushing blkback data when first connecting blkback. This should avoid the pygrub issues we are experiencing in (RedHat bugzilla) 466681. [ 2.6.18-xen.hg commit 63b4d7f56688 ] Signed-off-by: Chris Lalancette Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index c31e5c4..a0534fc 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -91,6 +91,13 @@ static void update_blkif_status(blkif_t *blkif) return; } + err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "block flush"); + return; + } + invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); -- cgit v0.10.2 From a81135d90bf176e6139c352c7b96c03d00131836 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 16 Aug 2010 13:43:06 -0700 Subject: xen/blkback: Print additional information when a vbd is resized. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 943ec23..dc25723 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -126,6 +126,8 @@ void vbd_resize(blkif_t *blkif) struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); vbd->size = new_size; again: -- cgit v0.10.2 From 313d7b003ceceb797e8c0d18ab085ed0638b4aff Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Wed, 24 Nov 2010 22:08:20 -0800 Subject: blkback: Fix CVE-2010-3699 A guest can cause the backend driver to leak a kernel thread. Such leaked threads hold references to the device, whichmakes the device impossible to tear down. If shut down, the guest remains a zombie domain, the xenwatch process hangs, and most xm commands will stop working. This patch tries to do the following for blkback: - identify/extract idempotent teardown operations, - add/move the invocation of said teardown operation right before we're about to allocate new resources in the Connected states. [ linux-2.6.18-xen.hg 59f097ef181b ] Signed-off-by: Laszlo Ersek Signed-off-by: Keir Fraser Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index a0534fc..031bc3d 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -382,6 +382,11 @@ static void frontend_changed(struct xenbus_device *dev, if (dev->state == XenbusStateConnected) break; + /* Enforce precondition before potential leak point. + * blkif_disconnect() is idempotent. + */ + blkif_disconnect(be->blkif); + err = connect_ring(be); if (err) break; @@ -399,6 +404,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: + /* implies blkif_disconnect() via blkback_remove() */ device_unregister(&dev->dev); break; -- cgit v0.10.2 From 248e9f7539f8351cd857d12a74bd52133a3a900f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 24 Feb 2011 17:22:41 -0500 Subject: xen/blkback: Replace WRITE_BARRIER with (REQ_FLUSH | REQ_FUA) TODO: Double check xen-blkfront.c diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 6d89766..cb844f7 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -405,7 +405,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; + operation = REQ_FLUSH | REQ_FUA; break; default: operation = 0; /* make gcc happy */ @@ -414,7 +414,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -517,7 +517,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != WRITE_BARRIER); + BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -532,7 +532,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) + else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) blkif->st_wr_sect += preq.nr_sects; return; -- cgit v0.10.2 From bc0c081b0e7a4afc4d2c7bc0666f5cd169e96814 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:02:39 -0500 Subject: xen/blkback: Update to use blkdev_get_by_dev instead of open_by_devnum. The API for opening a block device has changed since 2.6.32. The correct function to open a device is blkdev_get_by_dev. diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index dc25723..8c91a2f 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -63,8 +63,8 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, vbd->pdevice = MKDEV(major, minor); - bdev = open_by_devnum(vbd->pdevice, - vbd->readonly ? FMODE_READ : FMODE_WRITE); + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { DPRINTK("vbd_creat: device %08x could not be opened.\n", -- cgit v0.10.2 From efe08a3eecf15ab022afba48c691d02c7de2fbbb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 5 Feb 2010 14:19:33 -0500 Subject: xen/blkback: simplify address translations Cherry-pick and modified from 69d64727c42eecd47fdf82c15a54474d21a4012a ("blkback/blktap2: simplify address translations"): "There are quite a number of places where e.g. page->va->page translations happen. Besides yielding smaller code (source and binary), a second goal is to make it easier to determine where virtual addresses of pages allocated through alloc_empty_pages_and_pagevec() are really used (in turn in order to determine whether using highmem pages would be possible there)." The second goal is not the purpose of this patch - it is just to make it easier to read the code. linux-2.6-pvops: * Stripped drivers/xen/gntdev/* * Stripped drivers/xen/netback/* [v2: Stripped blktap off] Signed-off-by: Jan Beulich Signed-off-by: Daniel Stodden Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index cb844f7..7c9421c 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -99,9 +99,11 @@ static inline int vaddr_pagenr(pending_req_t *req, int seg) return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + unsigned long pfn = page_to_pfn(pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } @@ -463,8 +465,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; - set_phys_to_machine(__pa(vaddr( - pending_req, i)) >> PAGE_SHIFT, + set_phys_to_machine( + page_to_pfn(pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -495,7 +497,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - virt_to_page(vaddr(pending_req, i)), + pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { -- cgit v0.10.2 From e8e28871edf0d0adb0bd7e597c044cbaf7a7f137 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:51:29 -0500 Subject: xen/blkback: Move global/static variables into struct xen_blkbk. Bundle the lot of discrete variables into a single structure. This is based on what was done in the xen-netback driver: xen: netback: Move global/static variables into struct xen_netbk. (094944631cc5a9d6e623302c987f78117c0bf7ac) Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 7c9421c..c08875b 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -84,31 +84,34 @@ typedef struct { struct list_head free_list; } pending_req_t; -static pending_req_t *pending_reqs; -static struct list_head pending_free; -static DEFINE_SPINLOCK(pending_free_lock); -static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); - #define BLKBACK_INVALID_HANDLE (~0) -static struct page **pending_pages; -static grant_handle_t *pending_grant_handles; +struct xen_blkbk { + pending_req_t *pending_reqs; + struct list_head pending_free; + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + struct page **pending_pages; + grant_handle_t *pending_grant_handles; +}; + +static struct xen_blkbk *blkbk; static inline int vaddr_pagenr(pending_req_t *req, int seg) { - return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_page(req, seg)); + unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } #define pending_handle(_req, _seg) \ - (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) static int do_block_io_op(blkif_t *blkif); @@ -126,12 +129,12 @@ static pending_req_t* alloc_req(void) pending_req_t *req = NULL; unsigned long flags; - spin_lock_irqsave(&pending_free_lock, flags); - if (!list_empty(&pending_free)) { - req = list_entry(pending_free.next, pending_req_t, free_list); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + if (!list_empty(&blkbk->pending_free)) { + req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); list_del(&req->free_list); } - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); return req; } @@ -140,12 +143,12 @@ static void free_req(pending_req_t *req) unsigned long flags; int was_empty; - spin_lock_irqsave(&pending_free_lock, flags); - was_empty = list_empty(&pending_free); - list_add(&req->free_list, &pending_free); - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + was_empty = list_empty(&blkbk->pending_free); + list_add(&req->free_list, &blkbk->pending_free); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); if (was_empty) - wake_up(&pending_free_wq); + wake_up(&blkbk->pending_free_wq); } static void unplug_queue(blkif_t *blkif) @@ -226,8 +229,8 @@ int blkif_schedule(void *arg) blkif->wq, blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( - pending_free_wq, - !list_empty(&pending_free) || kthread_should_stop()); + blkbk->pending_free_wq, + !list_empty(&blkbk->pending_free) || kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -466,7 +469,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, continue; set_phys_to_machine( - page_to_pfn(pending_page(pending_req, i)), + page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -497,7 +500,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - pending_page(pending_req, i), + blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { @@ -624,31 +627,40 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; + blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + if (!blkbk) { + printk(KERN_ALERT "%s: out of memory!\n", __func__); + return -ENOMEM; + } + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); - pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } for (i = 0; i < mmap_pages; i++) - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; rc = blkif_interface_init(); if (rc) goto failed_init; - memset(pending_reqs, 0, sizeof(pending_reqs)); - INIT_LIST_HEAD(&pending_free); + memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); + + INIT_LIST_HEAD(&blkbk->pending_free); + spin_lock_init(&blkbk->pending_free_lock); + init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&pending_reqs[i].free_list, &pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) @@ -659,9 +671,11 @@ static int __init blkif_init(void) out_of_memory: printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: - kfree(pending_reqs); - kfree(pending_grant_handles); - free_empty_pages_and_pagevec(pending_pages, mmap_pages); + kfree(blkbk->pending_reqs); + kfree(blkbk->pending_grant_handles); + free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk); + blkbk = NULL; return rc; } -- cgit v0.10.2 From c35950bfa9abaaf16548a287a8d5d782a361414f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:22:28 -0500 Subject: xen/blkback: Union the blkif_request request specific fields Following in the steps of patch: "xen: Union the blkif_request request specific fields" this patch changes the blkback. Per the original patch: "Prepare for extending the block device ring to allow request specific fields, by moving the request specific fields for reads, writes and barrier requests to a union member." Cc: Owen Smith Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c08875b..eda5064 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -426,7 +426,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } preq.dev = req->handle; - preq.sector_number = req->sector_number; + preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; @@ -438,11 +438,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { uint32_t flags; - seg[i].nsec = req->seg[i].last_sect - - req->seg[i].first_sect + 1; + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; - if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->seg[i].last_sect < req->seg[i].first_sect)) + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; @@ -450,7 +450,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation != READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->seg[i].gref, blkif->domid); + req->u.rw.seg[i].gref, blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); @@ -472,7 +472,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | - (req->seg[i].first_sect << 9); + (req->u.rw.seg[i].first_sect << 9); } if (ret) diff --git a/include/xen/blkif.h b/include/xen/blkif.h index d274280..ab79426 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -96,12 +96,12 @@ static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) @@ -111,12 +111,12 @@ static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } #endif /* __XEN_BLKIF_H__ */ -- cgit v0.10.2 From 464fb419e17083a18b636c9f4714fc49ef6857d2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:26:10 -0500 Subject: xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages. Previously we would allocate the array for page using 'kmalloc' which we can as easily do with 'vzalloc'. The pre-allocation of pages was done a bit differently in the past - it used to be that the balloon driver would export "alloc_empty_pages_and_pagevec" which would have in one function created an array, allocated the pages, balloned the pages out (so the memory behind those pages would be non-present), and provide us those pages. This was OK as those pages were shared between other guest and the only thing we needed was to "swizzel" the MFN of those pages to point to the other guest MFN. We can still "swizzel" the MFNs using the M2P (and P2M) override API calls, but for the sake of simplicity we are dropping the balloon API calls. We can return to those later on. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index eda5064..d32198d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -637,18 +637,23 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages); + blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } - for (i = 0; i < mmap_pages; i++) + for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (blkbk->pending_pages[i] == NULL) { + rc = -ENOMEM; + goto out_of_memory; + } + } rc = blkif_interface_init(); if (rc) goto failed_init; @@ -672,8 +677,12 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); - free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk->pending_grant_handles); + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + vfree(blkbk->pending_pages); vfree(blkbk); blkbk = NULL; return rc; -- cgit v0.10.2 From 5dc03639cc903f887931831d69895facb5260f4b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:46:45 -0500 Subject: xen/blkback: Utilize the M2P override mechanism for GNTMAP_host_map Instead of doing copy grants lets do mapping grants using the M2P(and P2M) override mechanism. Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/blkback/blkback.c diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d32198d..15790ae 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -41,7 +41,6 @@ #include #include -#include #include #include #include @@ -192,6 +191,17 @@ static void fast_flush_area(pending_req_t *req) ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } } /****************************************************************** @@ -467,10 +477,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + continue; + } - set_phys_to_machine( - page_to_pfn(blkbk->pending_page(pending_req, i)), - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->u.rw.seg[i].first_sect << 9); } -- cgit v0.10.2 From a742b02c75e6e76bd0833f9b6e702f1be7d7e008 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 14 Mar 2011 12:41:26 -0400 Subject: xen/blkback: Use kzalloc's, and GFP_KERNEL for data structures. The patch titled:"xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages." allocates the structures and its member variables using the 'vzalloc'. Daniel Stodden pointed out that vzalloc is good when we use big number of pages - while these are at the max two pages. We can do this using kzalloc. Also the GFP_HIGHMEM does not work properly with Xen, so take that out. We will have to revisit this when a "get_empty_pages_and_pagevec" type API shows up to leverage that. BugLink: http://mid.gmane.org/1299898639.11681.227.camel@agari.van.xensource.com CC: Daniel Stodden Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 15790ae..a6f8f13 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -642,7 +642,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -652,9 +652,10 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages); - blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); + blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * + mmap_pages, GFP_KERNEL); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; @@ -663,7 +664,7 @@ static int __init blkif_init(void) for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); if (blkbk->pending_pages[i] == NULL) { rc = -ENOMEM; goto out_of_memory; @@ -692,13 +693,13 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - vfree(blkbk->pending_grant_handles); + kfree(blkbk->pending_grant_handles); for (i = 0; i < mmap_pages; i++) { if (blkbk->pending_pages[i]) __free_page(blkbk->pending_pages[i]); } - vfree(blkbk->pending_pages); - vfree(blkbk); + kfree(blkbk->pending_pages); + kfree(blkbk); blkbk = NULL; return rc; } -- cgit v0.10.2 From 314146e515710f8a7d7eaf7a58b7ed590c9c14c3 Mon Sep 17 00:00:00 2001 From: Tom Goetz Date: Thu, 17 Mar 2011 12:14:29 -0400 Subject: xen/blkback: Fix the WRITE_BARRIER The WRITE_BARRIER was missing the REQ_WRITE option. This was causing the blktap to die. Signed-off-by: Tom Goetz Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a6f8f13..4cd5b49 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -47,6 +47,8 @@ #include #include "common.h" +#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) + /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -420,7 +422,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = REQ_FLUSH | REQ_FUA; + operation = WRITE_BARRIER; break; default: operation = 0; /* make gcc happy */ @@ -429,7 +431,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -537,7 +539,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); + BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -552,7 +554,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) + else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; return; -- cgit v0.10.2 From a1397fa3090c25c6c51c04b4101f2786d16b615f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:05:23 -0400 Subject: xen/blkback: Add some comments. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 4cd5b49..8a4b1e8 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -1,11 +1,10 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/main.c * * Back-end of the driver for virtual block devices. This portion of the * driver exports a 'unified' block-device interface that can be accessed * by any operating system that implements a compatible front end. A * reference front-end implementation can be found in: - * arch/xen/drivers/blkif/frontend + * drivers/block/xen-blkfront.c * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Copyright (c) 2005, Christopher Clark @@ -88,16 +87,25 @@ typedef struct { #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + pending_req_t *pending_reqs; + /* List of all 'pending_req' available */ struct list_head pending_free; + /* And its spinlock. */ spinlock_t pending_free_lock; wait_queue_head_t pending_free_wq; + /* The list of all pages that are available. */ struct page **pending_pages; + /* And the grant handles that are available. */ grant_handle_t *pending_grant_handles; }; static struct xen_blkbk *blkbk; +/* + * Little helpful macro to figure out the index and virtual address of the + * pending_pages[..]. For each 'pending_req' we have have up to + * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through + * 10 and would index in the pending_pages[..]. */ static inline int vaddr_pagenr(pending_req_t *req, int seg) { return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; @@ -122,8 +130,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); -/****************************************************************** - * misc small helpers +/* + * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ static pending_req_t* alloc_req(void) { @@ -139,6 +147,10 @@ static pending_req_t* alloc_req(void) return req; } +/* + * Return the 'pending_req' structure back to the freepool. We also + * wake up the thread if it was waiting for a free page. + */ static void free_req(pending_req_t *req) { unsigned long flags; @@ -152,6 +164,11 @@ static void free_req(pending_req_t *req) wake_up(&blkbk->pending_free_wq); } +/* + * Give back a reference count on the underlaying storage. + * It is OK to make multiple calls in this function as it + * resets the plug to NULL when it is done on the first call. + */ static void unplug_queue(blkif_t *blkif) { if (blkif->plug == NULL) @@ -162,6 +179,12 @@ static void unplug_queue(blkif_t *blkif) blkif->plug = NULL; } +/* + * Take a reference count on the underlaying storage. + * It is OK to call this multiple times as we check to make sure + * not to double reference. We also give back a reference count + * if it corresponds to another queue. + */ static void plug_queue(blkif_t *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -173,6 +196,10 @@ static void plug_queue(blkif_t *blkif, struct block_device *bdev) blkif->plug = q; } +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ static void fast_flush_area(pending_req_t *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -266,8 +293,8 @@ int blkif_schedule(void *arg) return 0; } -/****************************************************************** - * COMPLETION CALLBACK -- Called as bh->b_end_io() +/* + * Completion callback on the bio's. Called as bh->b_end_io() */ static void __end_block_io_op(pending_req_t *pending_req, int error) @@ -284,6 +311,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) pending_req->status = BLKIF_RSP_ERROR; } + /* If all of the bio's have completed it is time to unmap + * the grant references associated with 'request' and provide + * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -293,6 +323,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } +/* + * bio callback. + */ static void end_block_io_op(struct bio *bio, int error) { __end_block_io_op(bio->bi_private, error); @@ -300,8 +333,8 @@ static void end_block_io_op(struct bio *bio, int error) } -/****************************************************************************** - * NOTIFICATION FROM GUEST OS. +/* + * Notification from the guest OS. */ static void blkif_notify_work(blkif_t *blkif) @@ -318,10 +351,11 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) -/****************************************************************** - * DOWNWARD CALLS -- These interface with the block-device layer proper. +/* + * Function to copy the from the ring buffer the 'struct blkif_request' + * (which has the sectors we want, number of them, grant references, etc), + * and transmute it to the block API to hand it over to the proper block disk. */ - static int do_block_io_op(blkif_t *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; @@ -400,6 +434,10 @@ static int do_block_io_op(blkif_t *blkif) return more_to_do; } +/* + * Transumation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlaying storage. + */ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) @@ -429,7 +467,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, BUG(); } - /* Check that number of segments is sane. */ + /* Check that the number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { @@ -447,12 +485,14 @@ static void dispatch_rw_block_io(blkif_t *blkif, pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page.*/ for (i = 0; i < nseg; i++) { uint32_t flags; seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; @@ -468,6 +508,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); BUG_ON(ret); + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of the + * page from the other domain. */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -485,6 +528,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ continue; } @@ -492,6 +536,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, (req->u.rw.seg[i].first_sect << 9); } + /* If we have failed at this point, we need to undo the M2P override, set + * gnttab_set_unmap_op on all of the grant references and perform the + * hypercall to unmap the grants - that is all done in fast_flush_area. */ if (ret) goto fail_flush; @@ -503,7 +550,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, goto fail_flush; } + /* Get a reference count for the disk queue and start sending I/O */ plug_queue(blkif, preq.bdev); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -524,7 +575,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -538,6 +589,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, preq.sector_number += seg[i].nsec; } + /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); @@ -578,11 +630,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, -/****************************************************************** - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING +/* + * Put a response on the ring on how the operation fared. */ - - static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 0f91830..4c140c8 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -76,6 +76,7 @@ typedef struct blkif_st { atomic_t refcnt; wait_queue_head_t wq; + /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; struct request_queue *plug; diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index e397a41..a4a1535 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -1,6 +1,4 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/interface.c - * * Block-device interface management. * * Copyright (c) 2004, Keir Fraser diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 8c91a2f..95156c9 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -1,6 +1,4 @@ /****************************************************************************** - * blkback/vbd.c - * * Routines for managing virtual block devices (VBDs). * * Copyright (c) 2003-2005, Keir Fraser & Steve Hand diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 031bc3d..e9c4f80 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -107,7 +107,7 @@ static void update_blkif_status(blkif_t *blkif) } -/**************************************************************** +/* * sysfs interface for VBD I/O requests */ -- cgit v0.10.2 From 5489377ce40d52fb722dcd811617114cebad7bba Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:21:50 -0400 Subject: xen/blkback: blkif->struct blkif_st checkpatch.pl suggested that we don't use the typdef in common.h and this triggered this avalanche of patches. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8a4b1e8..d07ad53 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -75,7 +75,7 @@ module_param(debug_lvl, int, 0644); * response queued for it, with the saved 'id' passed back. */ typedef struct { - blkif_t *blkif; + struct blkif_st *blkif; u64 id; int nr_pages; atomic_t pendcnt; @@ -123,11 +123,11 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) -static int do_block_io_op(blkif_t *blkif); -static void dispatch_rw_block_io(blkif_t *blkif, +static int do_block_io_op(struct blkif_st *blkif); +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req); -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* @@ -169,7 +169,7 @@ static void free_req(pending_req_t *req) * It is OK to make multiple calls in this function as it * resets the plug to NULL when it is done on the first call. */ -static void unplug_queue(blkif_t *blkif) +static void unplug_queue(struct blkif_st *blkif) { if (blkif->plug == NULL) return; @@ -185,7 +185,7 @@ static void unplug_queue(blkif_t *blkif) * not to double reference. We also give back a reference count * if it corresponds to another queue. */ -static void plug_queue(blkif_t *blkif, struct block_device *bdev) +static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -237,7 +237,7 @@ static void fast_flush_area(pending_req_t *req) * SCHEDULER FUNCTIONS */ -static void print_stats(blkif_t *blkif) +static void print_stats(struct blkif_st *blkif) { printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", current->comm, blkif->st_oo_req, @@ -250,7 +250,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { - blkif_t *blkif = arg; + struct blkif_st *blkif = arg; struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -337,7 +337,7 @@ static void end_block_io_op(struct bio *bio, int error) * Notification from the guest OS. */ -static void blkif_notify_work(blkif_t *blkif) +static void blkif_notify_work(struct blkif_st *blkif) { blkif->waiting_reqs = 1; wake_up(&blkif->wq); @@ -356,7 +356,7 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) * (which has the sectors we want, number of them, grant references, etc), * and transmute it to the block API to hand it over to the proper block disk. */ -static int do_block_io_op(blkif_t *blkif) +static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; @@ -438,7 +438,7 @@ static int do_block_io_op(blkif_t *blkif) * Transumation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlaying storage. */ -static void dispatch_rw_block_io(blkif_t *blkif, +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req) { @@ -633,7 +633,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* * Put a response on the ring on how the operation fared. */ -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st) { struct blkif_response resp; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 4c140c8..be3fc93 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,7 +44,7 @@ #define DPRINTK(_f, _a...) \ pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) + __FILE__ , __LINE__ , ## _a) struct vbd { blkif_vdev_t handle; /* what the domain refers to this vbd as */ @@ -57,7 +57,7 @@ struct vbd { struct backend_info; -typedef struct blkif_st { +struct blkif_st { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; @@ -94,13 +94,14 @@ typedef struct blkif_st { grant_handle_t shmem_handle; grant_ref_t shmem_ref; -} blkif_t; +}; -blkif_t *blkif_alloc(domid_t domid); -void blkif_disconnect(blkif_t *blkif); -void blkif_free(blkif_t *blkif); -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); -void vbd_resize(blkif_t *blkif); +struct blkif_st *blkif_alloc(domid_t domid); +void blkif_disconnect(struct blkif_st *blkif); +void blkif_free(struct blkif_st *blkif); +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn); +void vbd_resize(struct blkif_st *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ @@ -110,7 +111,7 @@ void vbd_resize(blkif_t *blkif); } while (0) /* Create a vbd. */ -int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, unsigned minor, int readonly, int cdrom); void vbd_free(struct vbd *vbd); @@ -125,7 +126,7 @@ struct phys_req { blkif_sector_t sector_number; }; -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); int blkif_interface_init(void); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index a4a1535..7d59f13 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -35,9 +35,9 @@ static struct kmem_cache *blkif_cachep; -blkif_t *blkif_alloc(domid_t domid) +struct blkif_st *blkif_alloc(domid_t domid) { - blkif_t *blkif; + struct blkif_st *blkif; blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); if (!blkif) @@ -54,7 +54,7 @@ blkif_t *blkif_alloc(domid_t domid) return blkif; } -static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; @@ -75,7 +75,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) return 0; } -static void unmap_frontend_page(blkif_t *blkif) +static void unmap_frontend_page(struct blkif_st *blkif) { struct gnttab_unmap_grant_ref op; @@ -86,7 +86,7 @@ static void unmap_frontend_page(blkif_t *blkif) BUG(); } -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) { int err; @@ -143,7 +143,7 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) return 0; } -void blkif_disconnect(blkif_t *blkif) +void blkif_disconnect(struct blkif_st *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -166,7 +166,7 @@ void blkif_disconnect(blkif_t *blkif) } } -void blkif_free(blkif_t *blkif) +void blkif_free(struct blkif_st *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -175,7 +175,7 @@ void blkif_free(blkif_t *blkif) int __init blkif_interface_init(void) { - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), 0, 0, NULL); if (!blkif_cachep) return -ENOMEM; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 95156c9..26a37df 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -48,7 +48,7 @@ unsigned long vbd_secsize(struct vbd *vbd) return bdev_logical_block_size(vbd->bdev); } -int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) { struct vbd *vbd; @@ -97,7 +97,7 @@ void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) { struct vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -116,7 +116,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) return rc; } -void vbd_resize(blkif_t *blkif) +void vbd_resize(struct blkif_st *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index e9c4f80..67462c4 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -30,7 +30,7 @@ struct backend_info { struct xenbus_device *dev; - blkif_t *blkif; + struct blkif_st *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; @@ -47,7 +47,7 @@ struct xenbus_device *blkback_xenbus(struct backend_info *be) return be->dev; } -static int blkback_name(blkif_t *blkif, char *buf) +static int blkback_name(struct blkif_st *blkif, char *buf) { char *devpath, *devname; struct xenbus_device *dev = blkif->be->dev; @@ -67,7 +67,7 @@ static int blkback_name(blkif_t *blkif, char *buf) return 0; } -static void update_blkif_status(blkif_t *blkif) +static void update_blkif_status(struct blkif_st *blkif) { int err; char name[TASK_COMM_LEN]; -- cgit v0.10.2 From 3c64b58cd614c976dcb19e16fa59ab620b3fe130 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:24:45 -0400 Subject: xen/blkback: Fix checkpatch warnings in vbd.c Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 26a37df..d0ff4cf 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -30,8 +30,9 @@ #include "common.h" -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -40,7 +41,7 @@ unsigned long long vbd_size(struct vbd *vbd) unsigned int vbd_info(struct vbd *vbd) { - return vbd->type | (vbd->readonly?VDISK_READONLY:0); + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); } unsigned long vbd_secsize(struct vbd *vbd) @@ -126,7 +127,7 @@ void vbd_resize(struct blkif_st *blkif) printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); @@ -134,7 +135,7 @@ again: printk(KERN_WARNING "Error starting transaction"); return; } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", vbd_size(vbd)); if (err) { printk(KERN_WARNING "Error writing new size"); -- cgit v0.10.2 From e5f4b3c498623fc3d83f6d92e00a2b2dbf500cd0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:27:29 -0400 Subject: xen/blkback: Fix interface.c checkpatch warnings .. except + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; WARNING: line over 80 characters + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); as breaking them up really does not help that much. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 7d59f13..163aed4 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -86,7 +86,8 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) { int err; @@ -94,7 +95,8 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev if (blkif->irq) return 0; - if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) return -ENOMEM; err = map_frontend_page(blkif, shared_page); @@ -131,8 +133,7 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev err = bind_interdomain_evtchn_to_irqhandler( blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) - { + if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); blkif->blk_rings.common.sring = NULL; -- cgit v0.10.2 From d6091b217dd4fdabc4a8cd6fa61775f1e3eb6efe Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:33:30 -0400 Subject: xen/blkback: Fix checkpatch warnings of xenbus.c Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 67462c4..b41ed65d 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -25,10 +25,9 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ - __FUNCTION__, __LINE__, ##args) + __func__, __LINE__, ##args) -struct backend_info -{ +struct backend_info { struct xenbus_device *dev; struct blkif_st *blkif; struct xenbus_watch backend_watch; @@ -56,7 +55,8 @@ static int blkback_name(struct blkif_st *blkif, char *buf) if (IS_ERR(devpath)) return PTR_ERR(devpath); - if ((devname = strstr(devpath, "/dev/")) != NULL) + devname = strstr(devpath, "/dev/"); + if (devname != NULL) devname += strlen("/dev/"); else devname = devpath; @@ -153,7 +153,7 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) int error; error = device_create_file(&dev->dev, &dev_attr_physical_device); - if (error) + if (error) goto fail1; error = device_create_file(&dev->dev, &dev_attr_mode); @@ -327,7 +327,10 @@ static void backend_changed(struct xenbus_watch *watch, /* Front end dir is a number, which is used as the handle. */ char *p = strrchr(dev->otherend, '/') + 1; - long handle = simple_strtoul(p, NULL, 0); + long handle; + err = strict_strtoul(p, 0, &handle); + if (err) + return; be->major = major; be->minor = minor; @@ -369,7 +372,7 @@ static void frontend_changed(struct xenbus_device *dev, case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + __func__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -494,8 +497,8 @@ static int connect_ring(struct backend_info *be) DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, - "event-channel", "%u", &evtchn, NULL); + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading %s/ring-ref and event-channel", -- cgit v0.10.2 From 2e9977c21f7679d5f616132ae1f7857e932ccd19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:42:07 -0400 Subject: xen/blkback: Fix checkpatch warnings in blkback.c Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d07ad53..2d41393 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -63,8 +63,8 @@ module_param_named(reqs, blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ -static unsigned int log_stats = 0; -static unsigned int debug_lvl = 0; +static unsigned int log_stats; +static unsigned int debug_lvl; module_param(log_stats, int, 0644); module_param(debug_lvl, int, 0644); @@ -74,7 +74,7 @@ module_param(debug_lvl, int, 0644); * the pendcnt towards zero. When it hits zero, the specified domain has a * response queued for it, with the saved 'id' passed back. */ -typedef struct { +struct pending_req { struct blkif_st *blkif; u64 id; int nr_pages; @@ -82,12 +82,12 @@ typedef struct { unsigned short operation; int status; struct list_head free_list; -} pending_req_t; +}; #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + struct pending_req *pending_reqs; /* List of all 'pending_req' available */ struct list_head pending_free; /* And its spinlock. */ @@ -106,14 +106,15 @@ static struct xen_blkbk *blkbk; * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through * 10 and would index in the pending_pages[..]. */ -static inline int vaddr_pagenr(pending_req_t *req, int seg) +static inline int vaddr_pagenr(struct pending_req *req, int seg) { - return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * + BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] -static inline unsigned long vaddr(pending_req_t *req, int seg) +static inline unsigned long vaddr(struct pending_req *req, int seg) { unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); @@ -126,21 +127,22 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(struct blkif_st *blkif); static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req); + struct pending_req *pending_req); static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ -static pending_req_t* alloc_req(void) +static struct pending_req *alloc_req(void) { - pending_req_t *req = NULL; + struct pending_req *req = NULL; unsigned long flags; spin_lock_irqsave(&blkbk->pending_free_lock, flags); if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); + req = list_entry(blkbk->pending_free.next, struct pending_req, + free_list); list_del(&req->free_list); } spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); @@ -151,7 +153,7 @@ static pending_req_t* alloc_req(void) * Return the 'pending_req' structure back to the freepool. We also * wake up the thread if it was waiting for a free page. */ -static void free_req(pending_req_t *req) +static void free_req(struct pending_req *req) { unsigned long flags; int was_empty; @@ -200,7 +202,7 @@ static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(pending_req_t *req) +static void fast_flush_area(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -221,7 +223,8 @@ static void fast_flush_area(pending_req_t *req) GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). */ + * using vaddr(req, i). + */ for (i = 0; i < invcount; i++) { ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); @@ -233,7 +236,7 @@ static void fast_flush_area(pending_req_t *req) } } -/****************************************************************** +/* * SCHEDULER FUNCTIONS */ @@ -269,7 +272,8 @@ int blkif_schedule(void *arg) blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || kthread_should_stop()); + !list_empty(&blkbk->pending_free) || + kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -297,7 +301,7 @@ int blkif_schedule(void *arg) * Completion callback on the bio's. Called as bh->b_end_io() */ -static void __end_block_io_op(pending_req_t *pending_req, int error) +static void __end_block_io_op(struct pending_req *pending_req, int error) { /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && @@ -313,7 +317,8 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) /* If all of the bio's have completed it is time to unmap * the grant references associated with 'request' and provide - * the proper response on the ring. */ + * the proper response on the ring. + */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -360,7 +365,7 @@ static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; - pending_req_t *pending_req; + struct pending_req *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -440,7 +445,7 @@ static int do_block_io_op(struct blkif_st *blkif) */ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req) + struct pending_req *pending_req) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; @@ -487,7 +492,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page.*/ + * corresponding grant reference for each page. + */ for (i = 0; i < nseg; i++) { uint32_t flags; @@ -509,8 +515,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, BUG_ON(ret); /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of the - * page from the other domain. */ + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -522,12 +529,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (ret) continue; - + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; } @@ -536,9 +544,11 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].first_sect << 9); } - /* If we have failed at this point, we need to undo the M2P override, set - * gnttab_set_unmap_op on all of the grant references and perform the - * hypercall to unmap the grants - that is all done in fast_flush_area. */ + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * fast_flush_area. + */ if (ret) goto fail_flush; @@ -554,7 +564,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, plug_queue(blkif, preq.bdev); /* We set it one so that the last submit_bio does not have to call - * atomic_inc. */ + * atomic_inc. + */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -575,7 +586,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -694,7 +705,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); + blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -709,7 +720,8 @@ static int __init blkif_init(void) blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || + !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } @@ -733,7 +745,8 @@ static int __init blkif_init(void) init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, + &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) -- cgit v0.10.2 From 0faa8cca883bbc6a0919e3c89128672659b75820 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:58:19 -0400 Subject: xen/blkback: remove per-queue plugging commit 7eaceaccab5f40bbfda044629a6298616aeaed50 ("block: remove per-queue plugging") added two new interfaces to plug and unplug: blk_start_plug and blk_finish_plug. Lets use those. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 2d41393..464f2e0 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -167,38 +167,6 @@ static void free_req(struct pending_req *req) } /* - * Give back a reference count on the underlaying storage. - * It is OK to make multiple calls in this function as it - * resets the plug to NULL when it is done on the first call. - */ -static void unplug_queue(struct blkif_st *blkif) -{ - if (blkif->plug == NULL) - return; - if (blkif->plug->unplug_fn) - blkif->plug->unplug_fn(blkif->plug); - blk_put_queue(blkif->plug); - blkif->plug = NULL; -} - -/* - * Take a reference count on the underlaying storage. - * It is OK to call this multiple times as we check to make sure - * not to double reference. We also give back a reference count - * if it corresponds to another queue. - */ -static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q == blkif->plug) - return; - unplug_queue(blkif); - blk_get_queue(q); - blkif->plug = q; -} - -/* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ @@ -280,7 +248,6 @@ int blkif_schedule(void *arg) if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; - unplug_queue(blkif); if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); @@ -456,6 +423,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *bio = NULL; int ret, i; int operation; + struct blk_plug plug; + struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -561,7 +530,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } /* Get a reference count for the disk queue and start sending I/O */ - plug_queue(blkif, preq.bdev); + blk_get_queue(q); + blk_start_plug(&plug); /* We set it one so that the last submit_bio does not have to call * atomic_inc. @@ -620,11 +590,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; + blk_finish_plug(&plug); + blk_put_queue(q); return; fail_flush: fast_flush_area(pending_req); fail_response: + /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ @@ -634,7 +607,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, __end_block_io_op(pending_req, -EINVAL); if (bio) bio_put(bio); - unplug_queue(blkif); + blk_finish_plug(&plug); + blk_put_queue(q); msleep(1); /* back off a bit */ return; } diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index be3fc93..6257c11 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -79,7 +79,6 @@ struct blkif_st { /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; - struct request_queue *plug; /* statistics */ unsigned long st_print; -- cgit v0.10.2 From ae038af12c2bc0859279a1a62b5f8cb0ef00f5f8 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 7 Apr 2011 15:28:47 +0300 Subject: OMAP: DSS2: DSI: fix use_sys_clk & highfreq use_sys_clk and highfreq fields in dsi.current_cinfo were never set. Luckily they weren't used anywhere so it didn't cause any problems. This patch fixes those fields and they are now set at the same time as the rest of the fields. Signed-off-by: Tomi Valkeinen diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 0a7f1a4..86041535 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1276,6 +1276,9 @@ int dsi_pll_set_clock_div(struct dsi_clock_info *cinfo) DSSDBGF(); + dsi.current_cinfo.use_sys_clk = cinfo->use_sys_clk; + dsi.current_cinfo.highfreq = cinfo->highfreq; + dsi.current_cinfo.fint = cinfo->fint; dsi.current_cinfo.clkin4ddr = cinfo->clkin4ddr; dsi.current_cinfo.dsi_pll_hsdiv_dispc_clk = -- cgit v0.10.2 From 4eb68edb7d21aff81765a065680270693c23fbfc Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 4 Apr 2011 10:02:53 +0300 Subject: OMAP: DSS2: DSI: fix dsi_dump_clocks() On OMAP4, reading DSI_PLL_CONFIGURATION2 register requires the L3 clock (CIO_CLK_ICG) to PLL. Currently dsi_dump_clocks() tries to read that register without enabling the L3 clock, leading to crash if DSI is not in use. The status of the bit being read from DSI_PLL_CONFIGURATION2 is available from dsi_clock_info->use_sys_clk, so we can avoid the whole problem by just using that. Signed-off-by: Tomi Valkeinen diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 86041535..1464ac4 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1491,7 +1491,6 @@ void dsi_pll_uninit(void) void dsi_dump_clocks(struct seq_file *s) { - int clksel; struct dsi_clock_info *cinfo = &dsi.current_cinfo; enum dss_clk_source dispc_clk_src, dsi_clk_src; @@ -1500,13 +1499,10 @@ void dsi_dump_clocks(struct seq_file *s) enable_clocks(1); - clksel = REG_GET(DSI_PLL_CONFIGURATION2, 11, 11); - seq_printf(s, "- DSI PLL -\n"); seq_printf(s, "dsi pll source = %s\n", - clksel == 0 ? - "dss_sys_clk" : "pclkfree"); + cinfo->use_sys_clk ? "dss_sys_clk" : "pclkfree"); seq_printf(s, "Fint\t\t%-16luregn %u\n", cinfo->fint, cinfo->regn); -- cgit v0.10.2 From 6553b2105c8871dae8dfff244440e793f3a6bdb9 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 31 Mar 2011 13:23:35 +0530 Subject: OMAP: DSS2: Fix: Return correct lcd clock source for OMAP2/3 dss.lcd_clk_source is set to the default value DSS_CLK_SRC_FCK at dss_init. For OMAP2 and OMAP3, the dss.lcd_clk_source should always be the same as dss.dispc_clk_source. The function dss_get_lcd_clk_source() always returns the default value DSS_CLK_SRC_FCK for OMAP2/3. This leads to wrong clock dumps when dispc_clk_source is not DSS_CLK_SRC_FCK. Correct this function to always return dss.dispc_clk_source for OMAP2/3. Signed-off-by: Archit Taneja Signed-off-by: Tomi Valkeinen diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c index 3f1fee6..c3b48a0 100644 --- a/drivers/video/omap2/dss/dss.c +++ b/drivers/video/omap2/dss/dss.c @@ -385,8 +385,14 @@ enum dss_clk_source dss_get_dsi_clk_source(void) enum dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel) { - int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1; - return dss.lcd_clk_source[ix]; + if (dss_has_feature(FEAT_LCD_CLK_SRC)) { + int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1; + return dss.lcd_clk_source[ix]; + } else { + /* LCD_CLK source is the same as DISPC_FCLK source for + * OMAP2 and OMAP3 */ + return dss.dispc_clk_source; + } } /* calculate clock rates using dividers in cinfo */ -- cgit v0.10.2 From 0b41136c0d2f544b9b771643f849f82ed86f765e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 15 Apr 2011 10:42:59 +0300 Subject: OMAP: DSS2: DSI: Fix DSI PLL power bug OMAP3630 has a HW bug causing DSI PLL power command POWER_ON_DIV (0x3) to not work properly. The bug prevents us from enabling DSI PLL power only to HS divider block. This patch adds a dss feature for the bug and converts POWER_ON_DIV requests to POWER_ON_ALL (0x2). Signed-off-by: Tomi Valkeinen diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 1464ac4..cbd9ca4 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1059,6 +1059,11 @@ static int dsi_pll_power(enum dsi_pll_power_state state) { int t = 0; + /* DSI-PLL power command 0x3 is not working */ + if (dss_has_feature(FEAT_DSI_PLL_PWR_BUG) && + state == DSI_PLL_POWER_ON_DIV) + state = DSI_PLL_POWER_ON_ALL; + REG_FLD_MOD(DSI_CLK_CTRL, state, 31, 30); /* PLL_PWR_CMD */ /* PLL_PWR_STATUS */ diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c index aa16222..8c50e18 100644 --- a/drivers/video/omap2/dss/dss_features.c +++ b/drivers/video/omap2/dss/dss_features.c @@ -271,7 +271,7 @@ static struct omap_dss_features omap3630_dss_features = { FEAT_LCDENABLESIGNAL | FEAT_PCKFREEENABLE | FEAT_PRE_MULT_ALPHA | FEAT_FUNCGATED | FEAT_ROWREPEATENABLE | FEAT_LINEBUFFERSPLIT | - FEAT_RESIZECONF, + FEAT_RESIZECONF | FEAT_DSI_PLL_PWR_BUG, .num_mgrs = 2, .num_ovls = 3, diff --git a/drivers/video/omap2/dss/dss_features.h b/drivers/video/omap2/dss/dss_features.h index 12e9c4e..37922ce 100644 --- a/drivers/video/omap2/dss/dss_features.h +++ b/drivers/video/omap2/dss/dss_features.h @@ -40,6 +40,8 @@ enum dss_feat_id { /* Independent core clk divider */ FEAT_CORE_CLK_DIV = 1 << 11, FEAT_LCD_CLK_SRC = 1 << 12, + /* DSI-PLL power command 0x3 is not working */ + FEAT_DSI_PLL_PWR_BUG = 1 << 13, }; /* DSS register field id */ -- cgit v0.10.2 From 0fd08060f1bb9d7d0d712f39257dc3574a632271 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 Apr 2011 09:30:27 +0300 Subject: OMAP: DSS2: fix panel Kconfig dependencies All DPI panels were missing dependency to OMAP2_DSS_DPI. Add the dependency. Signed-off-by: Tomi Valkeinen diff --git a/drivers/video/omap2/displays/Kconfig b/drivers/video/omap2/displays/Kconfig index d18ad6b..609a280 100644 --- a/drivers/video/omap2/displays/Kconfig +++ b/drivers/video/omap2/displays/Kconfig @@ -3,6 +3,7 @@ menu "OMAP2/3 Display Device Drivers" config PANEL_GENERIC_DPI tristate "Generic DPI Panel" + depends on OMAP2_DSS_DPI help Generic DPI panel driver. Supports DVI output for Beagle and OMAP3 SDP. @@ -11,20 +12,20 @@ config PANEL_GENERIC_DPI config PANEL_LGPHILIPS_LB035Q02 tristate "LG.Philips LB035Q02 LCD Panel" - depends on OMAP2_DSS && SPI + depends on OMAP2_DSS_DPI && SPI help LCD Panel used on the Gumstix Overo Palo35 config PANEL_SHARP_LS037V7DW01 tristate "Sharp LS037V7DW01 LCD Panel" - depends on OMAP2_DSS + depends on OMAP2_DSS_DPI select BACKLIGHT_CLASS_DEVICE help LCD Panel used in TI's SDP3430 and EVM boards config PANEL_NEC_NL8048HL11_01B tristate "NEC NL8048HL11-01B Panel" - depends on OMAP2_DSS + depends on OMAP2_DSS_DPI help This NEC NL8048HL11-01B panel is TFT LCD used in the Zoom2/3/3630 sdp boards. @@ -37,7 +38,7 @@ config PANEL_TAAL config PANEL_TPO_TD043MTEA1 tristate "TPO TD043MTEA1 LCD Panel" - depends on OMAP2_DSS && SPI + depends on OMAP2_DSS_DPI && SPI help LCD Panel used in OMAP3 Pandora -- cgit v0.10.2 From f094f8a1b2737a4f3ca46742ff9aaf460d39285e Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 24 Feb 2011 19:36:42 +0100 Subject: kconfig: allow multiple inclusion of the same file Allow 'source'ing the same file from multiple places (eg. from different files, and/or under different conditions). To avoid circular inclusion, scan the source-ancestry of the current file, and abort if already sourced in this branch. Regenerate the pre-parsed lex.zconf.c_shipped file. Signed-off-by: "Yann E. MORIN" Signed-off-by: Michal Marek diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped index 6eb0397..f4b3b1a 100644 --- a/scripts/kconfig/lex.zconf.c_shipped +++ b/scripts/kconfig/lex.zconf.c_shipped @@ -2368,6 +2368,7 @@ void zconf_initscan(const char *name) void zconf_nextfile(const char *name) { + struct file *iter; struct file *file = file_lookup(name); struct buffer *buf = malloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); @@ -2383,16 +2384,24 @@ void zconf_nextfile(const char *name) buf->parent = current_buf; current_buf = buf; - if (file->flags & FILE_BUSY) { - printf("%s:%d: do not source '%s' from itself\n", - zconf_curname(), zconf_lineno(), name); - exit(1); - } - if (file->flags & FILE_SCANNED) { - printf("%s:%d: file '%s' is already sourced from '%s'\n", - zconf_curname(), zconf_lineno(), name, - file->parent->name); - exit(1); + for (iter = current_file->parent; iter; iter = iter->parent ) { + if (!strcmp(current_file->name,iter->name) ) { + printf("%s:%d: recursive inclusion detected. " + "Inclusion path:\n current file : '%s'\n", + zconf_curname(), zconf_lineno(), + zconf_curname()); + iter = current_file->parent; + while (iter && \ + strcmp(iter->name,current_file->name)) { + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno-1); + iter = iter->parent; + } + if (iter) + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno+1); + exit(1); + } } file->flags |= FILE_BUSY; file->lineno = 1; diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 3dbaec1..f23e3af 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -299,6 +299,7 @@ void zconf_initscan(const char *name) void zconf_nextfile(const char *name) { + struct file *iter; struct file *file = file_lookup(name); struct buffer *buf = malloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); @@ -314,16 +315,24 @@ void zconf_nextfile(const char *name) buf->parent = current_buf; current_buf = buf; - if (file->flags & FILE_BUSY) { - printf("%s:%d: do not source '%s' from itself\n", - zconf_curname(), zconf_lineno(), name); - exit(1); - } - if (file->flags & FILE_SCANNED) { - printf("%s:%d: file '%s' is already sourced from '%s'\n", - zconf_curname(), zconf_lineno(), name, - file->parent->name); - exit(1); + for (iter = current_file->parent; iter; iter = iter->parent ) { + if (!strcmp(current_file->name,iter->name) ) { + printf("%s:%d: recursive inclusion detected. " + "Inclusion path:\n current file : '%s'\n", + zconf_curname(), zconf_lineno(), + zconf_curname()); + iter = current_file->parent; + while (iter && \ + strcmp(iter->name,current_file->name)) { + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno-1); + iter = iter->parent; + } + if (iter) + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno+1); + exit(1); + } } file->flags |= FILE_BUSY; file->lineno = 1; -- cgit v0.10.2 From 2b2112f617e8ca600ec24271c93bbd49aa2acce4 Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 24 Feb 2011 19:36:43 +0100 Subject: kconfig: get rid of unused flags Now that we detect recusrion of sourced files, get rid of now unused flags. Regenerate lex.zconf.c_shipped file. Signed-off-by: "Yann E. MORIN" Signed-off-by: Michal Marek diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index 3d238db..16bfae2 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -20,12 +20,8 @@ struct file { struct file *parent; const char *name; int lineno; - int flags; }; -#define FILE_BUSY 0x0001 -#define FILE_SCANNED 0x0002 - typedef enum tristate { no, mod, yes } tristate; diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped index f4b3b1a..d918291 100644 --- a/scripts/kconfig/lex.zconf.c_shipped +++ b/scripts/kconfig/lex.zconf.c_shipped @@ -2363,7 +2363,6 @@ void zconf_initscan(const char *name) current_file = file_lookup(name); current_file->lineno = 1; - current_file->flags = FILE_BUSY; } void zconf_nextfile(const char *name) @@ -2403,7 +2402,6 @@ void zconf_nextfile(const char *name) exit(1); } } - file->flags |= FILE_BUSY; file->lineno = 1; file->parent = current_file; current_file = file; @@ -2413,8 +2411,6 @@ static void zconf_endfile(void) { struct buffer *parent; - current_file->flags |= FILE_SCANNED; - current_file->flags &= ~FILE_BUSY; current_file = current_file->parent; parent = current_buf->parent; diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index f23e3af..b22f884 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -294,7 +294,6 @@ void zconf_initscan(const char *name) current_file = file_lookup(name); current_file->lineno = 1; - current_file->flags = FILE_BUSY; } void zconf_nextfile(const char *name) @@ -334,7 +333,6 @@ void zconf_nextfile(const char *name) exit(1); } } - file->flags |= FILE_BUSY; file->lineno = 1; file->parent = current_file; current_file = file; @@ -344,8 +342,6 @@ static void zconf_endfile(void) { struct buffer *parent; - current_file->flags |= FILE_SCANNED; - current_file->flags &= ~FILE_BUSY; current_file = current_file->parent; parent = current_buf->parent; -- cgit v0.10.2 From 71a83ec7da8910f374a1c82e96d2704aa45d9238 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 13:24:57 +0300 Subject: Kconfig: improve KALLSYMS_ALL documentation Dumb users like myself are not able to grasp from the existing KALLSYMS_ALL documentation that this option is not what they need. Improve the help message and make it clearer that KALLSYMS is enough in the majority of use cases, and KALLSYMS_ALL should really be used very rarely. Signed-off-by: Artem Bityutskiy Signed-off-by: Michal Marek diff --git a/init/Kconfig b/init/Kconfig index 56240e7..563065d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -968,12 +968,18 @@ config KALLSYMS_ALL bool "Include all symbols in kallsyms" depends on DEBUG_KERNEL && KALLSYMS help - Normally kallsyms only contains the symbols of functions, for nicer - OOPS messages. Some debuggers can use kallsyms for other - symbols too: say Y here to include all symbols, if you need them - and you don't care about adding 300k to the size of your kernel. - - Say N. + Normally kallsyms only contains the symbols of functions for nicer + OOPS messages and backtraces (i.e., symbols from the text and inittext + sections). This is sufficient for most cases. And only in very rare + cases (e.g., when a debugger is used) all symbols are required (e.g., + names of variables from the data sections, etc). + + This option makes sure that all symbols are loaded into the kernel + image (i.e., symbols from all sections) in cost of increased kernel + size (depending on the kernel configuration, it may be 300KiB or + something like this). + + Say N unless you really need all symbols. config KALLSYMS_EXTRA_PASS bool "Do an extra kallsyms pass" -- cgit v0.10.2 From 1e2795a1191bb5ff05e80d77feffd51ac875c06d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 13:24:58 +0300 Subject: kbuild: move KALLSYMS_EXTRA_PASS from Kconfig to Makefile At the moment we have the CONFIG_KALLSYMS_EXTRA_PASS Kconfig switch, which users can enable or disable while configuring the kernel. This option is then used by 'make' to determine whether an extra kallsyms pass is needed or not. However, this approach is not nice and confusing, and this patch moves CONFIG_KALLSYMS_EXTRA_PASS from Kconfig to Makefile instead. The rationale is below. 1. CONFIG_KALLSYMS_EXTRA_PASS is really about the build time, not run-time. There is no real need for it to be in Kconfig. It is just an additional work-around which should be used only in rare cases, when someone breaks kallsyms, so Kbuild/Makefile is much better place for this option. 2. Grepping CONFIG_KALLSYMS_EXTRA_PASS shows that many defconfigs have it enabled, probably not because they try to work-around a kallsyms bug, but just because the Kconfig help text is confusing and does not really make it clear that this option should not be used unless except when kallsyms is broken. 3. And since many people have CONFIG_KALLSYMS_EXTRA_PASS enabled in their Kconfig, we do might fail to notice kallsyms bugs in time. E.g., many testers use "make allyesconfig" to test builds, which will enable CONFIG_KALLSYMS_EXTRA_PASS and kallsyms breakage will not be noticed. To address that, this patch: 1. Kills CONFIG_KALLSYMS_EXTRA_PASS 2. Changes Makefile so that people can use "make KALLSYMS_EXTRA_PASS=1" to enable the extra pass if needed. Additionally, they may define KALLSYMS_EXTRA_PASS as an environment variable. 3. By default KALLSYMS_EXTRA_PASS is disabled and if kallsyms has issues, "make" should print a warning and suggest using KALLSYMS_EXTRA_PASS Signed-off-by: Artem Bityutskiy [mmarek: Removed make help text, is not necessary] Signed-off-by: Michal Marek diff --git a/Makefile b/Makefile index ba7a55c..c3bd316 100644 --- a/Makefile +++ b/Makefile @@ -797,15 +797,17 @@ ifdef CONFIG_KALLSYMS # o The correct .tmp_kallsyms2.o is linked into the final vmlinux. # o Verify that the System.map from vmlinux matches the map from # .tmp_vmlinux2, just in case we did not generate kallsyms correctly. -# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using +# o If 'make KALLSYMS_EXTRA_PASS=1" was used, do an extra pass using # .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a # temporary bypass to allow the kernel to be built while the # maintainers work out what went wrong with kallsyms. -ifdef CONFIG_KALLSYMS_EXTRA_PASS -last_kallsyms := 3 -else last_kallsyms := 2 + +ifdef KALLSYMS_EXTRA_PASS +ifneq ($(KALLSYMS_EXTRA_PASS),0) +last_kallsyms := 3 +endif endif kallsyms.o := .tmp_kallsyms$(last_kallsyms).o @@ -816,7 +818,8 @@ define verify_kallsyms $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map $(Q)cmp -s System.map .tmp_System.map || \ (echo Inconsistent kallsyms data; \ - echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ + echo This is a bug - please report about it; \ + echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround; \ rm .tmp_kallsyms* ; /bin/false ) endef diff --git a/init/Kconfig b/init/Kconfig index 563065d..0edda61 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -981,18 +981,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols. -config KALLSYMS_EXTRA_PASS - bool "Do an extra kallsyms pass" - depends on KALLSYMS - help - If kallsyms is not working correctly, the build will fail with - inconsistent kallsyms data. If that occurs, log a bug report and - turn on KALLSYMS_EXTRA_PASS which should result in a stable build. - Always say N here unless you find a bug in kallsyms, which must be - reported. KALLSYMS_EXTRA_PASS is only a temporary workaround while - you wait for kallsyms to be fixed. - - config HOTPLUG bool "Support for hot-pluggable devices" if EXPERT default y -- cgit v0.10.2 From 7708992616487c00d5ca8ed7612111180d8e1b68 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:51:27 -0400 Subject: xen/blkback: Seperate the bio allocation and the bio submission. We seperate the bio allocation (bio_alloc) from the bio submission so that the error paths are much easier, and also so that the bio submission can be done in one tight loop. It also makes the plug/unplug calls much much easier. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 464f2e0..3c10499 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -421,7 +421,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; - int ret, i; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int ret, i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -529,14 +530,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_flush; } - /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); - blk_start_plug(&plug); - - /* We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ - atomic_set(&pending_req->pendcnt, 1); + /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -552,12 +546,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { - if (bio) { - atomic_inc(&pending_req->pendcnt); - submit_bio(operation, bio); - } - bio = bio_alloc(GFP_KERNEL, nseg-i); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -573,7 +563,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); - bio = bio_alloc(GFP_KERNEL, 0); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -583,15 +573,28 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_sector = -1; } - submit_bio(operation, bio); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. + */ + atomic_set(&pending_req->pendcnt, nbio); + + /* Get a reference count for the disk queue and start sending I/O */ + blk_get_queue(q); + blk_start_plug(&plug); + + for (i = 0; i < nbio; i++) + submit_bio(operation, biolist[i]); + + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; - blk_finish_plug(&plug); - blk_put_queue(q); return; fail_flush: @@ -604,11 +607,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); - if (bio) - bio_put(bio); - blk_finish_plug(&plug); - blk_put_queue(q); msleep(1); /* back off a bit */ return; } -- cgit v0.10.2 From b0aef17924a06646403cae8eecf6c73219a63c19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:58:05 -0400 Subject: xen/blkback: Cleanup move the code a bit around. Moving it so that the code that 'fast_flush_area' code is close to the code that deals with it so that the reader won't lose focus. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3c10499..f282463 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -167,41 +167,18 @@ static void free_req(struct pending_req *req) } /* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. -*/ -static void fast_flush_area(struct pending_req *req) + * Notification from the guest OS. + */ +static void blkif_notify_work(struct blkif_st *blkif) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int i, invcount = 0; - grant_handle_t handle; - int ret; - - for (i = 0; i < req->nr_pages; i++) { - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) - continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; - } + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); - BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). - */ - for (i = 0; i < invcount; i++) { - ret = m2p_remove_override( - virt_to_page(unmap[i].host_addr), false); - if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); - continue; - } - } +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; } /* @@ -265,6 +242,43 @@ int blkif_schedule(void *arg) } /* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ +static void fast_flush_area(struct pending_req *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). + */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } +} +/* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -305,23 +319,6 @@ static void end_block_io_op(struct bio *bio, int error) } -/* - * Notification from the guest OS. - */ - -static void blkif_notify_work(struct blkif_st *blkif) -{ - blkif->waiting_reqs = 1; - wake_up(&blkif->wq); -} - -irqreturn_t blkif_be_int(int irq, void *dev_id) -{ - blkif_notify_work(dev_id); - return IRQ_HANDLED; -} - - /* * Function to copy the from the ring buffer the 'struct blkif_request' -- cgit v0.10.2 From 1a95fe6e42cefc52c62c471ad87d7fe8643231df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:35:13 -0400 Subject: xen/blkback: Shuffle code around (vbd_translate moved higher). We take out the chunk of code dealing with mapping to the guest of pages into the xen_blk_map_buf code. And we also move the vbd_translate to be done much earlier. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index f282463..211b200 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -241,6 +241,10 @@ int blkif_schedule(void *arg) return 0; } +struct seg_buf { + unsigned long buf; + unsigned int nsec; +}; /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -278,6 +282,62 @@ static void fast_flush_area(struct pending_req *req) } } } +static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + int nseg = req->nr_segments; + int ret = 0; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page. + */ + for (i = 0; i < nseg; i++) { + uint32_t flags; + + flags = GNTMAP_host_map; + if (pending_req->operation != BLKIF_OP_READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->u.rw.seg[i].gref, pending_req->blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ + continue; + } + + seg[i].buf = map[i].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } + return ret; +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -411,15 +471,12 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, struct pending_req *pending_req) { - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; - struct { - unsigned long buf; unsigned int nsec; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int ret, i, nbio = 0; + int i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -444,6 +501,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); + /* Haven't submitted any bio's yet. */ goto fail_response; } @@ -456,77 +514,30 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - - /* Fill out preq.nr_sects with proper amount of sectors, and setup - * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page. - */ for (i = 0; i < nseg; i++) { - uint32_t flags; - seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; - - flags = GNTMAP_host_map; - if (operation != READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, blkif->domid); } - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); - BUG_ON(ret); - - /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of - * the page from the other domain. - */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; - } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), - blkbk->pending_page(pending_req, i), false); - if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); - /* We could switch over to GNTTABOP_copy */ - continue; - } - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_response; } - /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in * fast_flush_area. */ - if (ret) + if (xen_blk_map_buf(req, pending_req, seg)) goto fail_flush; - if (vbd_translate(&preq, blkif, operation) != 0) { - DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", - operation == READ ? "read" : "write", - preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); - goto fail_flush; - } - /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); -- cgit v0.10.2 From 976222e05ea5a9959ccf880d7a24efbf79b3c6cf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:38:29 -0400 Subject: xen/blkback: Move the check for misaligned I/O higher. We move it up higher to be in same loop that actually computes the sector number. This way, all of the code that deals with verifying that the request is correct is all done before we do any of the page mapping, I/O submission, etc. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 211b200..9598e0f 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -521,6 +521,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; + + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -542,13 +549,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkif_get(blkif); for (i = 0; i < nseg; i++) { - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_put_bio; - } - while ((bio == NULL) || (bio_add_page(bio, blkbk->pending_page(pending_req, i), -- cgit v0.10.2 From 9f3aedf573dd034d59e7eb6c4ee97648d5be8fc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:50:34 -0400 Subject: xen/blkback: Change fast_flush_area to xen_blkbk_unmap, and tweak xen_blk_map_seg. The previous name ('fast_flush_area') had nothing to do with what it does right now. Changing the names so that the code dealing with mapping pages in and out of the guest is called xen_blkbk_[map|unmap]. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 9598e0f..c645c83 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -249,7 +249,7 @@ struct seg_buf { * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(struct pending_req *req) +static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -282,8 +282,8 @@ static void fast_flush_area(struct pending_req *req) } } } -static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) +static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -361,7 +361,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); @@ -540,9 +540,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in - * fast_flush_area. + * xen_blkbk_unmap. */ - if (xen_blk_map_buf(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ @@ -606,7 +606,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_flush: - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); -- cgit v0.10.2 From 26626f1172fb4f3f323239a6a5cf4e082643fa46 Mon Sep 17 00:00:00 2001 From: Yang Ruirui Date: Sat, 16 Apr 2011 19:17:48 -0400 Subject: ext4: release page cache in ext4_mb_load_buddy error path Add missing page_cache_release in the error path of ext4_mb_load_buddy Signed-off-by: Yang Ruirui Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d8a16ee..15bfa44 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1273,6 +1273,8 @@ repeat_load_buddy: return 0; err: + if (page) + page_cache_release(page); if (e4b->bd_bitmap_page) page_cache_release(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) -- cgit v0.10.2 From a7f800131f35925299860a95259453c9bc0c272f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 14 Apr 2011 17:13:22 +0900 Subject: ARM: mach-shmobile: clock-sh7372: remove status check from fsidiv_recalc clock status check is not needed in recalc function. clk->rate will be 0 in clk_set_rate without this patch. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index e9731b5..6c79b40 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -421,9 +421,6 @@ static unsigned long fsidiv_recalc(struct clk *clk) value = __raw_readl(clk->mapping->base); - if ((value & 0x3) != 0x3) - return 0; - value >>= 16; if (value < 2) return 0; -- cgit v0.10.2 From c33724a43875786719f51916311308f2752d846e Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 28 Apr 2009 15:05:20 +0200 Subject: kconfig: Do not record timestamp in auto.conf and autoconf.h Timestamps in file data are useless and there is already one in .config Signed-off-by: Michal Marek diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 61c35bf..834eecb 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -784,7 +784,6 @@ int conf_write_autoconf(void) const char *str; const char *name; FILE *out, *tristate, *out_h; - time_t now; int i; sym_clear_all_valid(); @@ -811,22 +810,19 @@ int conf_write_autoconf(void) return 1; } - time(&now); fprintf(out, "#\n" "# Automatically generated make config: don't edit\n" "# %s\n" - "# %s" "#\n", - rootmenu.prompt->text, ctime(&now)); + rootmenu.prompt->text); fprintf(tristate, "#\n" "# Automatically generated - do not edit\n" "\n"); fprintf(out_h, "/*\n" " * Automatically generated C config: don't edit\n" " * %s\n" - " * %s" " */\n", - rootmenu.prompt->text, ctime(&now)); + rootmenu.prompt->text); for_all_symbols(i, sym) { sym_calc_value(sym); -- cgit v0.10.2 From 6ae9ecb86188cc8419024cdb299f18d4ae4f5713 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 15:47:55 +0200 Subject: kbuild: Call gzip with -n The timestamps recorded in the .gz files add no value. Signed-off-by: Michal Marek diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 1c702ca..93b2b59 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -197,7 +197,7 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ # --------------------------------------------------------------------------- quiet_cmd_gzip = GZIP $@ -cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \ +cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -n -f -9 > $@) || \ (rm -f $@ ; false) # DTC diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 55caecd..4a43fe1 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -226,7 +226,7 @@ cpio_list= output="/dev/stdout" output_file="" is_cpio_compressed= -compr="gzip -9 -f" +compr="gzip -n -9 -f" arg="$1" case "$arg" in @@ -240,7 +240,7 @@ case "$arg" in output_file="$1" cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)" output=${cpio_list} - echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f" + echo "$output_file" | grep -q "\.gz$" && compr="gzip -n -9 -f" echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f" echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f" echo "$output_file" | grep -q "\.xz$" && \ -- cgit v0.10.2 From 09ff9fecc039d60fff6c11d47522af61e89fff56 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 16:09:47 +0200 Subject: kbuild: Use the deterministic mode of ar Signed-off-by: Michal Marek diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d5f925a..5f87d37 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -345,7 +345,7 @@ quiet_cmd_link_o_target = LD $@ cmd_link_o_target = $(if $(strip $(obj-y)),\ $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^) \ $(cmd_secanalysis),\ - rm -f $@; $(AR) rcs $@) + rm -f $@; $(AR) rcsD $@) $(builtin-target): $(obj-y) FORCE $(call if_changed,link_o_target) @@ -371,7 +371,7 @@ $(modorder-target): $(subdir-ym) FORCE # ifdef lib-target quiet_cmd_link_l_target = AR $@ -cmd_link_l_target = rm -f $@; $(AR) rcs $@ $(lib-y) +cmd_link_l_target = rm -f $@; $(AR) rcsD $@ $(lib-y) $(lib-target): $(lib-y) FORCE $(call if_changed,link_l_target) -- cgit v0.10.2 From 061296dc2c14f852604fc6849669fe0b78bb1eda Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 17:13:55 +0200 Subject: kbuild: Drop unused LINUX_COMPILE_TIME and LINUX_COMPILE_DOMAIN macros Signed-off-by: Michal Marek diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 50ad317..82416a8 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -63,21 +63,9 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_TIME \"`date +%T`\" echo \#define LINUX_COMPILE_BY \"`whoami`\" echo \#define LINUX_COMPILE_HOST \"`hostname | $UTS_TRUNCATE`\" - domain=`dnsdomainname 2> /dev/null` - if [ -z "$domain" ]; then - domain=`domainname 2> /dev/null` - fi - - if [ -n "$domain" ]; then - echo \#define LINUX_COMPILE_DOMAIN \"`echo $domain | $UTS_TRUNCATE`\" - else - echo \#define LINUX_COMPILE_DOMAIN - fi - echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile @@ -91,8 +79,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" # first line. if [ -r $TARGET ] && \ - grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' $TARGET > .tmpver.1 && \ - grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' .tmpcompile > .tmpver.2 && \ + grep -v 'UTS_VERSION' $TARGET > .tmpver.1 && \ + grep -v 'UTS_VERSION' .tmpcompile > .tmpver.2 && \ cmp -s .tmpver.1 .tmpver.2; then rm -f .tmpcompile else -- cgit v0.10.2 From 53e6892c0411006848882eacfcfea9e93681b55d Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 5 Apr 2011 14:32:30 +0200 Subject: kbuild: Allow to override LINUX_COMPILE_BY and LINUX_COMPILE_HOST macros Make it possible to override the user@host string displayed during boot and in /proc/version by the environment variables KBUILD_BUILD_USER and KBUILD_BUILD_HOST. Several distributions patch scripts/mkcompile_h to achieve this, so let's provide an official way. Also, document the KBUILD_BUILD_TIMESTAMP variable while at it. Signed-off-by: Michal Marek diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index f1431d0..f11ebb3 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -201,3 +201,15 @@ KBUILD_ENABLE_EXTRA_GCC_CHECKS -------------------------------------------------- If enabled over the make command line with "W=1", it turns on additional gcc -W... options for more extensive build-time checking. + +KBUILD_BUILD_TIMESTAMP +-------------------------------------------------- +Setting this to a date string overrides the timestamp used in the +UTS_VERSION definition (uname -v in the running kernel). The default value +is the output of the date command at one point during build. + +KBUILD_BUILD_USER, KBUILD_BUILD_HOST +-------------------------------------------------- +These two variables allow to override the user@host string displayed during +boot and in /proc/version. The default value is the output of the commands +whoami and host, respectively. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 82416a8..7ad6bf7 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -42,6 +42,16 @@ if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then else TIMESTAMP=$KBUILD_BUILD_TIMESTAMP fi +if test -z "$KBUILD_BUILD_USER"; then + LINUX_COMPILE_BY=`whoami` +else + LINUX_COMPILE_BY=$KBUILD_BUILD_USER +fi +if test -z "$KBUILD_BUILD_HOST"; then + LINUX_COMPILE_HOST=`hostname` +else + LINUX_COMPILE_HOST=$KBUILD_BUILD_HOST +fi UTS_VERSION="#$VERSION" CONFIG_FLAGS="" @@ -63,8 +73,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_BY \"`whoami`\" - echo \#define LINUX_COMPILE_HOST \"`hostname | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile -- cgit v0.10.2 From a8b8017c34fefcb763d8b06c294b58d1c480b2e4 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 23:16:42 +0200 Subject: initramfs: Use KBUILD_BUILD_TIMESTAMP for generated entries gen_init_cpio gets the current time and uses it for each symlink, special file, and directory. Grab the current time once and make it possible to override it with the KBUILD_BUILD_TIMESTAMP variable for reproducible builds. Signed-off-by: Michal Marek diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index f11ebb3..646e2c1 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -205,7 +205,8 @@ gcc -W... options for more extensive build-time checking. KBUILD_BUILD_TIMESTAMP -------------------------------------------------- Setting this to a date string overrides the timestamp used in the -UTS_VERSION definition (uname -v in the running kernel). The default value +UTS_VERSION definition (uname -v in the running kernel). The value has to +be a string that can be passed to date -d. The default value is the output of the date command at one point during build. KBUILD_BUILD_USER, KBUILD_BUILD_HOST diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 4a43fe1..d44cf67 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -287,8 +287,15 @@ done # we are carefull to delete tmp files if [ ! -z ${output_file} ]; then if [ -z ${cpio_file} ]; then + timestamp= + if test -n "$KBUILD_BUILD_TIMESTAMP"; then + timestamp="$(date -d"$KBUILD_BUILD_TIMESTAMP" +%s || :)" + if test -n "$timestamp"; then + timestamp="-t $timestamp" + fi + fi cpio_tfile="$(mktemp ${TMPDIR:-/tmp}/cpiofile.XXXXXX)" - usr/gen_init_cpio ${cpio_list} > ${cpio_tfile} + usr/gen_init_cpio $timestamp ${cpio_list} > ${cpio_tfile} else cpio_tfile=${cpio_file} fi diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index 7f06884..af0f22f 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -22,6 +22,7 @@ static unsigned int offset; static unsigned int ino = 721; +static time_t default_mtime; struct file_handler { const char *type; @@ -102,7 +103,6 @@ static int cpio_mkslink(const char *name, const char *target, unsigned int mode, uid_t uid, gid_t gid) { char s[256]; - time_t mtime = time(NULL); if (name[0] == '/') name++; @@ -114,7 +114,7 @@ static int cpio_mkslink(const char *name, const char *target, (long) uid, /* uid */ (long) gid, /* gid */ 1, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ (unsigned)strlen(target)+1, /* filesize */ 3, /* major */ 1, /* minor */ @@ -152,7 +152,6 @@ static int cpio_mkgeneric(const char *name, unsigned int mode, uid_t uid, gid_t gid) { char s[256]; - time_t mtime = time(NULL); if (name[0] == '/') name++; @@ -164,7 +163,7 @@ static int cpio_mkgeneric(const char *name, unsigned int mode, (long) uid, /* uid */ (long) gid, /* gid */ 2, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ 0, /* filesize */ 3, /* major */ 1, /* minor */ @@ -242,7 +241,6 @@ static int cpio_mknod(const char *name, unsigned int mode, unsigned int maj, unsigned int min) { char s[256]; - time_t mtime = time(NULL); if (dev_type == 'b') mode |= S_IFBLK; @@ -259,7 +257,7 @@ static int cpio_mknod(const char *name, unsigned int mode, (long) uid, /* uid */ (long) gid, /* gid */ 1, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ 0, /* filesize */ 3, /* major */ 1, /* minor */ @@ -460,7 +458,7 @@ static int cpio_mkfile_line(const char *line) static void usage(const char *prog) { fprintf(stderr, "Usage:\n" - "\t%s \n" + "\t%s [-t ] \n" "\n" " is a file containing newline separated entries that\n" "describe the files to be included in the initramfs archive:\n" @@ -491,7 +489,11 @@ static void usage(const char *prog) "nod /dev/console 0600 0 0 c 5 1\n" "dir /root 0700 0 0\n" "dir /sbin 0755 0 0\n" - "file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n", + "file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n" + "\n" + " is time in seconds since Epoch that will be used\n" + "as mtime for symlinks, special files and directories. The default\n" + "is to use the current time for these entries.\n", prog); } @@ -529,17 +531,42 @@ int main (int argc, char *argv[]) char *args, *type; int ec = 0; int line_nr = 0; + const char *filename; + + default_mtime = time(NULL); + while (1) { + int opt = getopt(argc, argv, "t:h"); + char *invalid; - if (2 != argc) { + if (opt == -1) + break; + switch (opt) { + case 't': + default_mtime = strtol(optarg, &invalid, 10); + if (!*optarg || *invalid) { + fprintf(stderr, "Invalid timestamp: %s\n", + optarg); + usage(argv[0]); + exit(1); + } + break; + case 'h': + case '?': + usage(argv[0]); + exit(opt == 'h' ? 0 : 1); + } + } + + if (argc - optind != 1) { usage(argv[0]); exit(1); } - - if (!strcmp(argv[1], "-")) + filename = argv[optind]; + if (!strcmp(filename, "-")) cpio_list = stdin; - else if (! (cpio_list = fopen(argv[1], "r"))) { + else if (!(cpio_list = fopen(filename, "r"))) { fprintf(stderr, "ERROR: unable to open '%s': %s\n\n", - argv[1], strerror(errno)); + filename, strerror(errno)); usage(argv[0]); exit(1); } -- cgit v0.10.2 From e93504933ee6982bdc005fa5c24e1ea330faaf8b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 11:34:55 -0400 Subject: xen/blkback: Move the check for misaligned I/O once more. The commit 976222e05ea5a9959ccf880d7a24efbf79b3c6cf xen/blkback: Move the check for misaligned I/O higher. moved it a bit to high. The preq->vbdev was not set, so the check for misaligned I/O would cause a NULL pointer derefence. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c645c83..a0d3227 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -514,6 +514,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + for (i = 0; i < nseg; i++) { seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; @@ -522,12 +523,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; preq.nr_sects += seg[i].nsec; - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_response; - } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -537,6 +532,16 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } + /* This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. */ + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } + } /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in -- cgit v0.10.2 From 6fd17b5643bf05c29fc226a5aee96328056fca10 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 12:04:17 -0400 Subject: xen/blkback: Get the 'requeust_queue' properly. After the commit 0faa8cca883bbc6a0919e3c89128672659b75820 (" xen/blkback: remove per-queue plugging") we forgot to retrieve the 'struct request_queue' from the block device. This puts the functionality back in and fixes a NULL pointer bug. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a0d3227..3751325 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -542,6 +542,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } + q = bdev_get_queue(preq.bdev); + if (!q) + goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in -- cgit v0.10.2 From d2436eda2e81f1993bfe6349f17f52503bffeff5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:17:49 -0400 Subject: block, xen/blkback: remove blk_[get|put]_queue calls. They were used to check if the queue does not have QUEUE_FLAG_DEAD set. That is not necessary anymore as the 'submit_io' call ends up doing that for us. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/block/blk-core.c b/block/blk-core.c index 9b60e69..90f22cc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,7 +351,6 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } -EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -573,7 +572,6 @@ int blk_get_queue(struct request_queue *q) return 1; } -EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3751325..59a2bae 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -479,7 +479,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, int i, nbio = 0; int operation; struct blk_plug plug; - struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -542,9 +541,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } - q = bdev_get_queue(preq.bdev); - if (!q) - goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in @@ -596,7 +592,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_set(&pending_req->pendcnt, nbio); /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); blk_start_plug(&plug); for (i = 0; i < nbio; i++) @@ -604,7 +599,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blk_finish_plug(&plug); /* Let the I/Os go.. */ - blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; -- cgit v0.10.2 From dfc07b13dcacefda6ebdea14584ed8724dc980ef Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:24:23 -0400 Subject: xen/blkback: Move it from drivers/xen to drivers/block .. and modify the Makefile and Kconfig files appropriately. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb..9abb646 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -470,6 +470,14 @@ config XEN_BLKDEV_FRONTEND block device driver. It communicates with a back-end driver in another domain which drives the actual block device. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba..76646e9 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o diff --git a/drivers/block/xen-blkback/Makefile b/drivers/block/xen-blkback/Makefile new file mode 100644 index 0000000..f1ae1ff --- /dev/null +++ b/drivers/block/xen-blkback/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o + +xen-blkback-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c new file mode 100644 index 0000000..59a2bae --- /dev/null +++ b/drivers/block/xen-blkback/blkback.c @@ -0,0 +1,759 @@ +/****************************************************************************** + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * drivers/block/xen-blkfront.c + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "common.h" + +#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats; +static unsigned int debug_lvl; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +struct pending_req { + struct blkif_st *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +}; + +#define BLKBACK_INVALID_HANDLE (~0) + +struct xen_blkbk { + struct pending_req *pending_reqs; + /* List of all 'pending_req' available */ + struct list_head pending_free; + /* And its spinlock. */ + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + /* The list of all pages that are available. */ + struct page **pending_pages; + /* And the grant handles that are available. */ + grant_handle_t *pending_grant_handles; +}; + +static struct xen_blkbk *blkbk; + +/* + * Little helpful macro to figure out the index and virtual address of the + * pending_pages[..]. For each 'pending_req' we have have up to + * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through + * 10 and would index in the pending_pages[..]. */ +static inline int vaddr_pagenr(struct pending_req *req, int seg) +{ + return (req - blkbk->pending_reqs) * + BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + +static inline unsigned long vaddr(struct pending_req *req, int seg) +{ + unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(struct blkif_st *blkif); +static void dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req); +static void make_response(struct blkif_st *blkif, u64 id, + unsigned short op, int st); + +/* + * Retrieve from the 'pending_reqs' a free pending_req structure to be used. + */ +static struct pending_req *alloc_req(void) +{ + struct pending_req *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + if (!list_empty(&blkbk->pending_free)) { + req = list_entry(blkbk->pending_free.next, struct pending_req, + free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + return req; +} + +/* + * Return the 'pending_req' structure back to the freepool. We also + * wake up the thread if it was waiting for a free page. + */ +static void free_req(struct pending_req *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + was_empty = list_empty(&blkbk->pending_free); + list_add(&req->free_list, &blkbk->pending_free); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); + if (was_empty) + wake_up(&blkbk->pending_free_wq); +} + +/* + * Notification from the guest OS. + */ +static void blkif_notify_work(struct blkif_st *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + +/* + * SCHEDULER FUNCTIONS + */ + +static void print_stats(struct blkif_st *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + struct blkif_st *blkif = arg; + struct vbd *vbd = &blkif->vbd; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + if (unlikely(vbd->size != vbd_size(vbd))) + vbd_resize(blkif); + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + blkbk->pending_free_wq, + !list_empty(&blkbk->pending_free) || + kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +struct seg_buf { + unsigned long buf; + unsigned int nsec; +}; +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ +static void xen_blkbk_unmap(struct pending_req *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). + */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } +} +static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + int nseg = req->nr_segments; + int ret = 0; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page. + */ + for (i = 0; i < nseg; i++) { + uint32_t flags; + + flags = GNTMAP_host_map; + if (pending_req->operation != BLKIF_OP_READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->u.rw.seg[i].gref, pending_req->blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ + continue; + } + + seg[i].buf = map[i].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } + return ret; +} + +/* + * Completion callback on the bio's. Called as bh->b_end_io() + */ + +static void __end_block_io_op(struct pending_req *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + /* If all of the bio's have completed it is time to unmap + * the grant references associated with 'request' and provide + * the proper response on the ring. + */ + if (atomic_dec_and_test(&pending_req->pendcnt)) { + xen_blkbk_unmap(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +/* + * bio callback. + */ +static void end_block_io_op(struct bio *bio, int error) +{ + __end_block_io_op(bio->bi_private, error); + bio_put(bio); +} + + + +/* + * Function to copy the from the ring buffer the 'struct blkif_request' + * (which has the sectors we want, number of them, grant references, etc), + * and transmute it to the block API to hand it over to the proper block disk. + */ +static int do_block_io_op(struct blkif_st *blkif) +{ + union blkif_back_rings *blk_rings = &blkif->blk_rings; + struct blkif_request req; + struct pending_req *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +/* + * Transumation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlaying storage. + */ +static void dispatch_rw_block_io(struct blkif_st *blkif, + struct blkif_request *req, + struct pending_req *pending_req) +{ + struct phys_req preq; + struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i, nbio = 0; + int operation; + struct blk_plug plug; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that the number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + /* Haven't submitted any bio's yet. */ + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->u.rw.sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + } + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_response; + } + /* This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. */ + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } + } + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * xen_blkbk_unmap. + */ + if (xen_blkbk_map(req, pending_req, seg)) + goto fail_flush; + + /* This corresponding blkif_put is done in __end_block_io_op */ + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + while ((bio == NULL) || + (bio_add_page(bio, + blkbk->pending_page(pending_req, i), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + /* This will be hit if the operation was a barrier. */ + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. + */ + atomic_set(&pending_req->pendcnt, nbio); + + /* Get a reference count for the disk queue and start sending I/O */ + blk_start_plug(&plug); + + for (i = 0; i < nbio; i++) + submit_bio(operation, biolist[i]); + + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + xen_blkbk_unmap(pending_req); + fail_response: + /* Haven't submitted any bio's yet. */ + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); + __end_block_io_op(pending_req, -EINVAL); + msleep(1); /* back off a bit */ + return; +} + + + +/* + * Put a response on the ring on how the operation fared. + */ +static void make_response(struct blkif_st *blkif, u64 id, + unsigned short op, int st) +{ + struct blkif_response resp; + unsigned long flags; + union blkif_back_rings *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + int rc = 0; + + if (!xen_pv_domain()) + return -ENODEV; + + blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); + if (!blkbk) { + printk(KERN_ALERT "%s: out of memory!\n", __func__); + return -ENOMEM; + } + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * + mmap_pages, GFP_KERNEL); + + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || + !blkbk->pending_pages) { + rc = -ENOMEM; + goto out_of_memory; + } + + for (i = 0; i < mmap_pages; i++) { + blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); + if (blkbk->pending_pages[i] == NULL) { + rc = -ENOMEM; + goto out_of_memory; + } + } + rc = blkif_interface_init(); + if (rc) + goto failed_init; + + memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); + + INIT_LIST_HEAD(&blkbk->pending_free); + spin_lock_init(&blkbk->pending_free_lock); + init_waitqueue_head(&blkbk->pending_free_wq); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&blkbk->pending_reqs[i].free_list, + &blkbk->pending_free); + + rc = blkif_xenbus_init(); + if (rc) + goto failed_init; + + return 0; + + out_of_memory: + printk(KERN_ERR "%s: out of memory\n", __func__); + failed_init: + kfree(blkbk->pending_reqs); + kfree(blkbk->pending_grant_handles); + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + kfree(blkbk->pending_pages); + kfree(blkbk); + blkbk = NULL; + return rc; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h new file mode 100644 index 0000000..6257c11 --- /dev/null +++ b/drivers/block/xen-blkback/common.h @@ -0,0 +1,142 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; + sector_t size; /* Cached size parameter */ +}; + +struct backend_info; + +struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + union blkif_back_rings blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + /* One thread per one blkif. */ + struct task_struct *xenblkd; + unsigned int waiting_reqs; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +}; + +struct blkif_st *blkif_alloc(domid_t domid); +void blkif_disconnect(struct blkif_st *blkif); +void blkif_free(struct blkif_st *blkif); +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn); +void vbd_resize(struct blkif_st *blkif); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); + +int blkif_interface_init(void); + +int blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +struct xenbus_device *blkback_xenbus(struct backend_info *be); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/block/xen-blkback/interface.c b/drivers/block/xen-blkback/interface.c new file mode 100644 index 0000000..163aed4 --- /dev/null +++ b/drivers/block/xen-blkback/interface.c @@ -0,0 +1,185 @@ +/****************************************************************************** + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include +#include + +static struct kmem_cache *blkif_cachep; + +struct blkif_st *blkif_alloc(domid_t domid) +{ + struct blkif_st *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(struct blkif_st *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(struct blkif_st *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(struct blkif_st *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +int __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), + 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; +} diff --git a/drivers/block/xen-blkback/vbd.c b/drivers/block/xen-blkback/vbd.c new file mode 100644 index 0000000..d0ff4cf --- /dev/null +++ b/drivers/block/xen-blkback/vbd.c @@ -0,0 +1,162 @@ +/****************************************************************************** + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_logical_block_size(vbd->bdev); +} + +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + vbd->size = vbd_size(vbd); + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} + +void vbd_resize(struct blkif_st *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkback_xenbus(blkif->be); + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c new file mode 100644 index 0000000..b41ed65d --- /dev/null +++ b/drivers/block/xen-blkback/xenbus.c @@ -0,0 +1,562 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +struct backend_info { + struct xenbus_device *dev; + struct blkif_st *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +struct xenbus_device *blkback_xenbus(struct backend_info *be) +{ + return be->dev; +} + +static int blkback_name(struct blkif_st *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + devname = strstr(devpath, "/dev/"); + if (devname != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(struct blkif_st *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "block flush"); + return; + } + invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/* + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev_set_drvdata(&dev->dev, NULL); + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev_set_drvdata(&dev->dev, be); + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + "%s/%s", dev->nodename, "physical-device"); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle; + err = strict_strtoul(p, 0, &handle); + if (err) + return; + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev_get_drvdata(&dev->dev); + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __func__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + /* Enforce precondition before potential leak point. + * blkif_disconnect() is idempotent. + */ + blkif_disconnect(be->blkif); + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + /* implies blkif_disconnect() via blkback_remove() */ + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) + strcpy(protocol, "unspecified, assuming native"); + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +int blkif_xenbus_init(void) +{ + return xenbus_register_backend(&blkback); +} diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index fb1af62..a59638b 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,14 +37,6 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. -config XEN_BLKDEV_BACKEND - tristate "Block-device backend driver" - depends on XEN_BACKEND && BLOCK - help - The block-device backend driver allows the kernel to export its - block devices to other guests via a high-performance shared-memory - interface. - config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 29c0a41..f420f1f 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o -obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile deleted file mode 100644 index f1ae1ff..0000000 --- a/drivers/xen/blkback/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o - -xen-blkback-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c deleted file mode 100644 index 59a2bae..0000000 --- a/drivers/xen/blkback/blkback.c +++ /dev/null @@ -1,759 +0,0 @@ -/****************************************************************************** - * - * Back-end of the driver for virtual block devices. This portion of the - * driver exports a 'unified' block-device interface that can be accessed - * by any operating system that implements a compatible front end. A - * reference front-end implementation can be found in: - * drivers/block/xen-blkfront.c - * - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand - * Copyright (c) 2005, Christopher Clark - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "common.h" - -#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) - -/* - * These are rather arbitrary. They are fairly large because adjacent requests - * pulled from a communication ring are quite likely to end up being part of - * the same scatter/gather request at the disc. - * - * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** - * - * This will increase the chances of being able to write whole tracks. - * 64 should be enough to keep us competitive with Linux. - */ -static int blkif_reqs = 64; -module_param_named(reqs, blkif_reqs, int, 0); -MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); - -/* Run-time switchable: /sys/module/blkback/parameters/ */ -static unsigned int log_stats; -static unsigned int debug_lvl; -module_param(log_stats, int, 0644); -module_param(debug_lvl, int, 0644); - -/* - * Each outstanding request that we've passed to the lower device layers has a - * 'pending_req' allocated to it. Each buffer_head that completes decrements - * the pendcnt towards zero. When it hits zero, the specified domain has a - * response queued for it, with the saved 'id' passed back. - */ -struct pending_req { - struct blkif_st *blkif; - u64 id; - int nr_pages; - atomic_t pendcnt; - unsigned short operation; - int status; - struct list_head free_list; -}; - -#define BLKBACK_INVALID_HANDLE (~0) - -struct xen_blkbk { - struct pending_req *pending_reqs; - /* List of all 'pending_req' available */ - struct list_head pending_free; - /* And its spinlock. */ - spinlock_t pending_free_lock; - wait_queue_head_t pending_free_wq; - /* The list of all pages that are available. */ - struct page **pending_pages; - /* And the grant handles that are available. */ - grant_handle_t *pending_grant_handles; -}; - -static struct xen_blkbk *blkbk; - -/* - * Little helpful macro to figure out the index and virtual address of the - * pending_pages[..]. For each 'pending_req' we have have up to - * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through - * 10 and would index in the pending_pages[..]. */ -static inline int vaddr_pagenr(struct pending_req *req, int seg) -{ - return (req - blkbk->pending_reqs) * - BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; -} - -#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] - -static inline unsigned long vaddr(struct pending_req *req, int seg) -{ - unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); - return (unsigned long)pfn_to_kaddr(pfn); -} - -#define pending_handle(_req, _seg) \ - (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) - - -static int do_block_io_op(struct blkif_st *blkif); -static void dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req); -static void make_response(struct blkif_st *blkif, u64 id, - unsigned short op, int st); - -/* - * Retrieve from the 'pending_reqs' a free pending_req structure to be used. - */ -static struct pending_req *alloc_req(void) -{ - struct pending_req *req = NULL; - unsigned long flags; - - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, struct pending_req, - free_list); - list_del(&req->free_list); - } - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); - return req; -} - -/* - * Return the 'pending_req' structure back to the freepool. We also - * wake up the thread if it was waiting for a free page. - */ -static void free_req(struct pending_req *req) -{ - unsigned long flags; - int was_empty; - - spin_lock_irqsave(&blkbk->pending_free_lock, flags); - was_empty = list_empty(&blkbk->pending_free); - list_add(&req->free_list, &blkbk->pending_free); - spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); - if (was_empty) - wake_up(&blkbk->pending_free_wq); -} - -/* - * Notification from the guest OS. - */ -static void blkif_notify_work(struct blkif_st *blkif) -{ - blkif->waiting_reqs = 1; - wake_up(&blkif->wq); -} - -irqreturn_t blkif_be_int(int irq, void *dev_id) -{ - blkif_notify_work(dev_id); - return IRQ_HANDLED; -} - -/* - * SCHEDULER FUNCTIONS - */ - -static void print_stats(struct blkif_st *blkif) -{ - printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", - current->comm, blkif->st_oo_req, - blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); - blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); - blkif->st_rd_req = 0; - blkif->st_wr_req = 0; - blkif->st_oo_req = 0; -} - -int blkif_schedule(void *arg) -{ - struct blkif_st *blkif = arg; - struct vbd *vbd = &blkif->vbd; - - blkif_get(blkif); - - if (debug_lvl) - printk(KERN_DEBUG "%s: started\n", current->comm); - - while (!kthread_should_stop()) { - if (try_to_freeze()) - continue; - if (unlikely(vbd->size != vbd_size(vbd))) - vbd_resize(blkif); - - wait_event_interruptible( - blkif->wq, - blkif->waiting_reqs || kthread_should_stop()); - wait_event_interruptible( - blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || - kthread_should_stop()); - - blkif->waiting_reqs = 0; - smp_mb(); /* clear flag *before* checking for work */ - - if (do_block_io_op(blkif)) - blkif->waiting_reqs = 1; - - if (log_stats && time_after(jiffies, blkif->st_print)) - print_stats(blkif); - } - - if (log_stats) - print_stats(blkif); - if (debug_lvl) - printk(KERN_DEBUG "%s: exiting\n", current->comm); - - blkif->xenblkd = NULL; - blkif_put(blkif); - - return 0; -} - -struct seg_buf { - unsigned long buf; - unsigned int nsec; -}; -/* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. -*/ -static void xen_blkbk_unmap(struct pending_req *req) -{ - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int i, invcount = 0; - grant_handle_t handle; - int ret; - - for (i = 0; i < req->nr_pages; i++) { - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) - continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; - } - - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); - BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). - */ - for (i = 0; i < invcount; i++) { - ret = m2p_remove_override( - virt_to_page(unmap[i].host_addr), false); - if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); - continue; - } - } -} -static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) -{ - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i; - int nseg = req->nr_segments; - int ret = 0; - /* Fill out preq.nr_sects with proper amount of sectors, and setup - * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page. - */ - for (i = 0; i < nseg; i++) { - uint32_t flags; - - flags = GNTMAP_host_map; - if (pending_req->operation != BLKIF_OP_READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, pending_req->blkif->domid); - } - - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); - BUG_ON(ret); - - /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of - * the page from the other domain. - */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; - } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), - blkbk->pending_page(pending_req, i), false); - if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); - /* We could switch over to GNTTABOP_copy */ - continue; - } - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); - } - return ret; -} - -/* - * Completion callback on the bio's. Called as bh->b_end_io() - */ - -static void __end_block_io_op(struct pending_req *pending_req, int error) -{ - /* An error fails the entire request. */ - if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && - (error == -EOPNOTSUPP)) { - DPRINTK("blkback: write barrier op failed, not supported\n"); - blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); - pending_req->status = BLKIF_RSP_EOPNOTSUPP; - } else if (error) { - DPRINTK("Buffer not up-to-date at end of operation, " - "error=%d\n", error); - pending_req->status = BLKIF_RSP_ERROR; - } - - /* If all of the bio's have completed it is time to unmap - * the grant references associated with 'request' and provide - * the proper response on the ring. - */ - if (atomic_dec_and_test(&pending_req->pendcnt)) { - xen_blkbk_unmap(pending_req); - make_response(pending_req->blkif, pending_req->id, - pending_req->operation, pending_req->status); - blkif_put(pending_req->blkif); - free_req(pending_req); - } -} - -/* - * bio callback. - */ -static void end_block_io_op(struct bio *bio, int error) -{ - __end_block_io_op(bio->bi_private, error); - bio_put(bio); -} - - - -/* - * Function to copy the from the ring buffer the 'struct blkif_request' - * (which has the sectors we want, number of them, grant references, etc), - * and transmute it to the block API to hand it over to the proper block disk. - */ -static int do_block_io_op(struct blkif_st *blkif) -{ - union blkif_back_rings *blk_rings = &blkif->blk_rings; - struct blkif_request req; - struct pending_req *pending_req; - RING_IDX rc, rp; - int more_to_do = 0; - - rc = blk_rings->common.req_cons; - rp = blk_rings->common.sring->req_prod; - rmb(); /* Ensure we see queued requests up to 'rp'. */ - - while (rc != rp) { - - if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) - break; - - if (kthread_should_stop()) { - more_to_do = 1; - break; - } - - pending_req = alloc_req(); - if (NULL == pending_req) { - blkif->st_oo_req++; - more_to_do = 1; - break; - } - - switch (blkif->blk_protocol) { - case BLKIF_PROTOCOL_NATIVE: - memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); - break; - case BLKIF_PROTOCOL_X86_32: - blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); - break; - case BLKIF_PROTOCOL_X86_64: - blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); - break; - default: - BUG(); - } - blk_rings->common.req_cons = ++rc; /* before make_response() */ - - /* Apply all sanity checks to /private copy/ of request. */ - barrier(); - - switch (req.operation) { - case BLKIF_OP_READ: - blkif->st_rd_req++; - dispatch_rw_block_io(blkif, &req, pending_req); - break; - case BLKIF_OP_WRITE_BARRIER: - blkif->st_br_req++; - /* fall through */ - case BLKIF_OP_WRITE: - blkif->st_wr_req++; - dispatch_rw_block_io(blkif, &req, pending_req); - break; - default: - /* A good sign something is wrong: sleep for a while to - * avoid excessive CPU consumption by a bad guest. */ - msleep(1); - DPRINTK("error: unknown block io operation [%d]\n", - req.operation); - make_response(blkif, req.id, req.operation, - BLKIF_RSP_ERROR); - free_req(pending_req); - break; - } - - /* Yield point for this unbounded loop. */ - cond_resched(); - } - - return more_to_do; -} - -/* - * Transumation of the 'struct blkif_request' to a proper 'struct bio' - * and call the 'submit_bio' to pass it to the underlaying storage. - */ -static void dispatch_rw_block_io(struct blkif_st *blkif, - struct blkif_request *req, - struct pending_req *pending_req) -{ - struct phys_req preq; - struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int nseg; - struct bio *bio = NULL; - struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int i, nbio = 0; - int operation; - struct blk_plug plug; - - switch (req->operation) { - case BLKIF_OP_READ: - operation = READ; - break; - case BLKIF_OP_WRITE: - operation = WRITE; - break; - case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; - break; - default: - operation = 0; /* make gcc happy */ - BUG(); - } - - /* Check that the number of segments is sane. */ - nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { - DPRINTK("Bad number of segments in request (%d)\n", nseg); - /* Haven't submitted any bio's yet. */ - goto fail_response; - } - - preq.dev = req->handle; - preq.sector_number = req->u.rw.sector_number; - preq.nr_sects = 0; - - pending_req->blkif = blkif; - pending_req->id = req->id; - pending_req->operation = req->operation; - pending_req->status = BLKIF_RSP_OKAY; - pending_req->nr_pages = nseg; - - for (i = 0; i < nseg; i++) { - seg[i].nsec = req->u.rw.seg[i].last_sect - - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) - goto fail_response; - preq.nr_sects += seg[i].nsec; - - } - - if (vbd_translate(&preq, blkif, operation) != 0) { - DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", - operation == READ ? "read" : "write", - preq.sector_number, - preq.sector_number + preq.nr_sects, preq.dev); - goto fail_response; - } - /* This check _MUST_ be done after vbd_translate as the preq.bdev - * is set there. */ - for (i = 0; i < nseg; i++) { - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_response; - } - } - /* If we have failed at this point, we need to undo the M2P override, - * set gnttab_set_unmap_op on all of the grant references and perform - * the hypercall to unmap the grants - that is all done in - * xen_blkbk_unmap. - */ - if (xen_blkbk_map(req, pending_req, seg)) - goto fail_flush; - - /* This corresponding blkif_put is done in __end_block_io_op */ - blkif_get(blkif); - - for (i = 0; i < nseg; i++) { - while ((bio == NULL) || - (bio_add_page(bio, - blkbk->pending_page(pending_req, i), - seg[i].nsec << 9, - seg[i].buf & ~PAGE_MASK) == 0)) { - - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); - if (unlikely(bio == NULL)) - goto fail_put_bio; - - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; - bio->bi_sector = preq.sector_number; - } - - preq.sector_number += seg[i].nsec; - } - - /* This will be hit if the operation was a barrier. */ - if (!bio) { - BUG_ON(operation != WRITE_BARRIER); - bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); - if (unlikely(bio == NULL)) - goto fail_put_bio; - - bio->bi_bdev = preq.bdev; - bio->bi_private = pending_req; - bio->bi_end_io = end_block_io_op; - bio->bi_sector = -1; - } - - - /* We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ - atomic_set(&pending_req->pendcnt, nbio); - - /* Get a reference count for the disk queue and start sending I/O */ - blk_start_plug(&plug); - - for (i = 0; i < nbio; i++) - submit_bio(operation, biolist[i]); - - blk_finish_plug(&plug); - /* Let the I/Os go.. */ - - if (operation == READ) - blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) - blkif->st_wr_sect += preq.nr_sects; - - return; - - fail_flush: - xen_blkbk_unmap(pending_req); - fail_response: - /* Haven't submitted any bio's yet. */ - make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); - free_req(pending_req); - msleep(1); /* back off a bit */ - return; - - fail_put_bio: - for (i = 0; i < (nbio-1); i++) - bio_put(biolist[i]); - __end_block_io_op(pending_req, -EINVAL); - msleep(1); /* back off a bit */ - return; -} - - - -/* - * Put a response on the ring on how the operation fared. - */ -static void make_response(struct blkif_st *blkif, u64 id, - unsigned short op, int st) -{ - struct blkif_response resp; - unsigned long flags; - union blkif_back_rings *blk_rings = &blkif->blk_rings; - int more_to_do = 0; - int notify; - - resp.id = id; - resp.operation = op; - resp.status = st; - - spin_lock_irqsave(&blkif->blk_ring_lock, flags); - /* Place on the response ring for the relevant domain. */ - switch (blkif->blk_protocol) { - case BLKIF_PROTOCOL_NATIVE: - memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), - &resp, sizeof(resp)); - break; - case BLKIF_PROTOCOL_X86_32: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), - &resp, sizeof(resp)); - break; - case BLKIF_PROTOCOL_X86_64: - memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), - &resp, sizeof(resp)); - break; - default: - BUG(); - } - blk_rings->common.rsp_prod_pvt++; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); - if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { - /* - * Tail check for pending requests. Allows frontend to avoid - * notifications if requests are already in flight (lower - * overheads and promotes batching). - */ - RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); - - } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { - more_to_do = 1; - } - - spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); - - if (more_to_do) - blkif_notify_work(blkif); - if (notify) - notify_remote_via_irq(blkif->irq); -} - -static int __init blkif_init(void) -{ - int i, mmap_pages; - int rc = 0; - - if (!xen_pv_domain()) - return -ENODEV; - - blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); - if (!blkbk) { - printk(KERN_ALERT "%s: out of memory!\n", __func__); - return -ENOMEM; - } - - mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - - blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * - blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * - mmap_pages, GFP_KERNEL); - - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || - !blkbk->pending_pages) { - rc = -ENOMEM; - goto out_of_memory; - } - - for (i = 0; i < mmap_pages; i++) { - blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); - if (blkbk->pending_pages[i] == NULL) { - rc = -ENOMEM; - goto out_of_memory; - } - } - rc = blkif_interface_init(); - if (rc) - goto failed_init; - - memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); - - INIT_LIST_HEAD(&blkbk->pending_free); - spin_lock_init(&blkbk->pending_free_lock); - init_waitqueue_head(&blkbk->pending_free_wq); - - for (i = 0; i < blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, - &blkbk->pending_free); - - rc = blkif_xenbus_init(); - if (rc) - goto failed_init; - - return 0; - - out_of_memory: - printk(KERN_ERR "%s: out of memory\n", __func__); - failed_init: - kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); - for (i = 0; i < mmap_pages; i++) { - if (blkbk->pending_pages[i]) - __free_page(blkbk->pending_pages[i]); - } - kfree(blkbk->pending_pages); - kfree(blkbk); - blkbk = NULL; - return rc; -} - -module_init(blkif_init); - -MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h deleted file mode 100644 index 6257c11..0000000 --- a/drivers/xen/blkback/common.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __BLKIF__BACKEND__COMMON_H__ -#define __BLKIF__BACKEND__COMMON_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DPRINTK(_f, _a...) \ - pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a) - -struct vbd { - blkif_vdev_t handle; /* what the domain refers to this vbd as */ - unsigned char readonly; /* Non-zero -> read-only */ - unsigned char type; /* VDISK_xxx */ - u32 pdevice; /* phys device that this vbd maps to */ - struct block_device *bdev; - sector_t size; /* Cached size parameter */ -}; - -struct backend_info; - -struct blkif_st { - /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; - /* Physical parameters of the comms window. */ - unsigned int irq; - /* Comms information. */ - enum blkif_protocol blk_protocol; - union blkif_back_rings blk_rings; - struct vm_struct *blk_ring_area; - /* The VBD attached to this interface. */ - struct vbd vbd; - /* Back pointer to the backend_info. */ - struct backend_info *be; - /* Private fields. */ - spinlock_t blk_ring_lock; - atomic_t refcnt; - - wait_queue_head_t wq; - /* One thread per one blkif. */ - struct task_struct *xenblkd; - unsigned int waiting_reqs; - - /* statistics */ - unsigned long st_print; - int st_rd_req; - int st_wr_req; - int st_oo_req; - int st_br_req; - int st_rd_sect; - int st_wr_sect; - - wait_queue_head_t waiting_to_free; - - grant_handle_t shmem_handle; - grant_ref_t shmem_ref; -}; - -struct blkif_st *blkif_alloc(domid_t domid); -void blkif_disconnect(struct blkif_st *blkif); -void blkif_free(struct blkif_st *blkif); -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn); -void vbd_resize(struct blkif_st *blkif); - -#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) -#define blkif_put(_b) \ - do { \ - if (atomic_dec_and_test(&(_b)->refcnt)) \ - wake_up(&(_b)->waiting_to_free);\ - } while (0) - -/* Create a vbd. */ -int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, - unsigned minor, int readonly, int cdrom); -void vbd_free(struct vbd *vbd); - -unsigned long long vbd_size(struct vbd *vbd); -unsigned int vbd_info(struct vbd *vbd); -unsigned long vbd_secsize(struct vbd *vbd); - -struct phys_req { - unsigned short dev; - unsigned short nr_sects; - struct block_device *bdev; - blkif_sector_t sector_number; -}; - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); - -int blkif_interface_init(void); - -int blkif_xenbus_init(void); - -irqreturn_t blkif_be_int(int irq, void *dev_id); -int blkif_schedule(void *arg); - -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state); - -struct xenbus_device *blkback_xenbus(struct backend_info *be); - -#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c deleted file mode 100644 index 163aed4..0000000 --- a/drivers/xen/blkback/interface.c +++ /dev/null @@ -1,185 +0,0 @@ -/****************************************************************************** - * Block-device interface management. - * - * Copyright (c) 2004, Keir Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" -#include -#include -#include - -static struct kmem_cache *blkif_cachep; - -struct blkif_st *blkif_alloc(domid_t domid) -{ - struct blkif_st *blkif; - - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); - if (!blkif) - return ERR_PTR(-ENOMEM); - - memset(blkif, 0, sizeof(*blkif)); - blkif->domid = domid; - spin_lock_init(&blkif->blk_ring_lock); - atomic_set(&blkif->refcnt, 1); - init_waitqueue_head(&blkif->wq); - blkif->st_print = jiffies; - init_waitqueue_head(&blkif->waiting_to_free); - - return blkif; -} - -static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) -{ - struct gnttab_map_grant_ref op; - - gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, shared_page, blkif->domid); - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status) { - DPRINTK(" Grant table operation failure !\n"); - return op.status; - } - - blkif->shmem_ref = shared_page; - blkif->shmem_handle = op.handle; - - return 0; -} - -static void unmap_frontend_page(struct blkif_st *blkif) -{ - struct gnttab_unmap_grant_ref op; - - gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, - GNTMAP_host_map, blkif->shmem_handle); - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); -} - -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, - unsigned int evtchn) -{ - int err; - - /* Already connected through? */ - if (blkif->irq) - return 0; - - blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); - if (!blkif->blk_ring_area) - return -ENOMEM; - - err = map_frontend_page(blkif, shared_page); - if (err) { - free_vm_area(blkif->blk_ring_area); - return err; - } - - switch (blkif->blk_protocol) { - case BLKIF_PROTOCOL_NATIVE: - { - struct blkif_sring *sring; - sring = (struct blkif_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_32: - { - struct blkif_x86_32_sring *sring_x86_32; - sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); - break; - } - case BLKIF_PROTOCOL_X86_64: - { - struct blkif_x86_64_sring *sring_x86_64; - sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); - break; - } - default: - BUG(); - } - - err = bind_interdomain_evtchn_to_irqhandler( - blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - return err; - } - blkif->irq = err; - - return 0; -} - -void blkif_disconnect(struct blkif_st *blkif) -{ - if (blkif->xenblkd) { - kthread_stop(blkif->xenblkd); - blkif->xenblkd = NULL; - } - - atomic_dec(&blkif->refcnt); - wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); - atomic_inc(&blkif->refcnt); - - if (blkif->irq) { - unbind_from_irqhandler(blkif->irq, blkif); - blkif->irq = 0; - } - - if (blkif->blk_rings.common.sring) { - unmap_frontend_page(blkif); - free_vm_area(blkif->blk_ring_area); - blkif->blk_rings.common.sring = NULL; - } -} - -void blkif_free(struct blkif_st *blkif) -{ - if (!atomic_dec_and_test(&blkif->refcnt)) - BUG(); - kmem_cache_free(blkif_cachep, blkif); -} - -int __init blkif_interface_init(void) -{ - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), - 0, 0, NULL); - if (!blkif_cachep) - return -ENOMEM; - - return 0; -} diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c deleted file mode 100644 index d0ff4cf..0000000 --- a/drivers/xen/blkback/vbd.c +++ /dev/null @@ -1,162 +0,0 @@ -/****************************************************************************** - * Routines for managing virtual block devices (VBDs). - * - * Copyright (c) 2003-2005, Keir Fraser & Steve Hand - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "common.h" - -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) - -unsigned long long vbd_size(struct vbd *vbd) -{ - return vbd_sz(vbd); -} - -unsigned int vbd_info(struct vbd *vbd) -{ - return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); -} - -unsigned long vbd_secsize(struct vbd *vbd) -{ - return bdev_logical_block_size(vbd->bdev); -} - -int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, - unsigned minor, int readonly, int cdrom) -{ - struct vbd *vbd; - struct block_device *bdev; - - vbd = &blkif->vbd; - vbd->handle = handle; - vbd->readonly = readonly; - vbd->type = 0; - - vbd->pdevice = MKDEV(major, minor); - - bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? - FMODE_READ : FMODE_WRITE, NULL); - - if (IS_ERR(bdev)) { - DPRINTK("vbd_creat: device %08x could not be opened.\n", - vbd->pdevice); - return -ENOENT; - } - - vbd->bdev = bdev; - vbd->size = vbd_size(vbd); - - if (vbd->bdev->bd_disk == NULL) { - DPRINTK("vbd_creat: device %08x doesn't exist.\n", - vbd->pdevice); - vbd_free(vbd); - return -ENOENT; - } - - if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) - vbd->type |= VDISK_CDROM; - if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) - vbd->type |= VDISK_REMOVABLE; - - DPRINTK("Successful creation of handle=%04x (dom=%u)\n", - handle, blkif->domid); - return 0; -} - -void vbd_free(struct vbd *vbd) -{ - if (vbd->bdev) - blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); - vbd->bdev = NULL; -} - -int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) -{ - struct vbd *vbd = &blkif->vbd; - int rc = -EACCES; - - if ((operation != READ) && vbd->readonly) - goto out; - - if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) - goto out; - - req->dev = vbd->pdevice; - req->bdev = vbd->bdev; - rc = 0; - - out: - return rc; -} - -void vbd_resize(struct blkif_st *blkif) -{ - struct vbd *vbd = &blkif->vbd; - struct xenbus_transaction xbt; - int err; - struct xenbus_device *dev = blkback_xenbus(blkif->be); - unsigned long long new_size = vbd_size(vbd); - - printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", - blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); - vbd->size = new_size; -again: - err = xenbus_transaction_start(&xbt); - if (err) { - printk(KERN_WARNING "Error starting transaction"); - return; - } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(vbd)); - if (err) { - printk(KERN_WARNING "Error writing new size"); - goto abort; - } - /* - * Write the current state; we will use this to synchronize - * the front-end. If the current state is "connected" the - * front-end will get the new size information online. - */ - err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); - if (err) { - printk(KERN_WARNING "Error writing the state"); - goto abort; - } - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - if (err) - printk(KERN_WARNING "Error ending transaction"); -abort: - xenbus_transaction_end(xbt, 1); -} diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c deleted file mode 100644 index b41ed65d..0000000 --- a/drivers/xen/blkback/xenbus.c +++ /dev/null @@ -1,562 +0,0 @@ -/* Xenbus code for blkif backend - Copyright (C) 2005 Rusty Russell - Copyright (C) 2005 XenSource Ltd - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -#include -#include -#include -#include "common.h" - -#undef DPRINTK -#define DPRINTK(fmt, args...) \ - pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) - -struct backend_info { - struct xenbus_device *dev; - struct blkif_st *blkif; - struct xenbus_watch backend_watch; - unsigned major; - unsigned minor; - char *mode; -}; - -static void connect(struct backend_info *); -static int connect_ring(struct backend_info *); -static void backend_changed(struct xenbus_watch *, const char **, - unsigned int); - -struct xenbus_device *blkback_xenbus(struct backend_info *be) -{ - return be->dev; -} - -static int blkback_name(struct blkif_st *blkif, char *buf) -{ - char *devpath, *devname; - struct xenbus_device *dev = blkif->be->dev; - - devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); - if (IS_ERR(devpath)) - return PTR_ERR(devpath); - - devname = strstr(devpath, "/dev/"); - if (devname != NULL) - devname += strlen("/dev/"); - else - devname = devpath; - - snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); - kfree(devpath); - - return 0; -} - -static void update_blkif_status(struct blkif_st *blkif) -{ - int err; - char name[TASK_COMM_LEN]; - - /* Not ready to connect? */ - if (!blkif->irq || !blkif->vbd.bdev) - return; - - /* Already connected? */ - if (blkif->be->dev->state == XenbusStateConnected) - return; - - /* Attempt to connect: exit if we fail to. */ - connect(blkif->be); - if (blkif->be->dev->state != XenbusStateConnected) - return; - - err = blkback_name(blkif, name); - if (err) { - xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); - return; - } - - err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); - if (err) { - xenbus_dev_error(blkif->be->dev, err, "block flush"); - return; - } - invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); - - blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); - if (IS_ERR(blkif->xenblkd)) { - err = PTR_ERR(blkif->xenblkd); - blkif->xenblkd = NULL; - xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); - } -} - - -/* - * sysfs interface for VBD I/O requests - */ - -#define VBD_SHOW(name, format, args...) \ - static ssize_t show_##name(struct device *_dev, \ - struct device_attribute *attr, \ - char *buf) \ - { \ - struct xenbus_device *dev = to_xenbus_device(_dev); \ - struct backend_info *be = dev_get_drvdata(&dev->dev); \ - \ - return sprintf(buf, format, ##args); \ - } \ - static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) - -VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); -VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); -VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); -VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); -VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); -VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); - -static struct attribute *vbdstat_attrs[] = { - &dev_attr_oo_req.attr, - &dev_attr_rd_req.attr, - &dev_attr_wr_req.attr, - &dev_attr_br_req.attr, - &dev_attr_rd_sect.attr, - &dev_attr_wr_sect.attr, - NULL -}; - -static struct attribute_group vbdstat_group = { - .name = "statistics", - .attrs = vbdstat_attrs, -}; - -VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); -VBD_SHOW(mode, "%s\n", be->mode); - -int xenvbd_sysfs_addif(struct xenbus_device *dev) -{ - int error; - - error = device_create_file(&dev->dev, &dev_attr_physical_device); - if (error) - goto fail1; - - error = device_create_file(&dev->dev, &dev_attr_mode); - if (error) - goto fail2; - - error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); - if (error) - goto fail3; - - return 0; - -fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); -fail2: device_remove_file(&dev->dev, &dev_attr_mode); -fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); - return error; -} - -void xenvbd_sysfs_delif(struct xenbus_device *dev) -{ - sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); - device_remove_file(&dev->dev, &dev_attr_mode); - device_remove_file(&dev->dev, &dev_attr_physical_device); -} - -static int blkback_remove(struct xenbus_device *dev) -{ - struct backend_info *be = dev_get_drvdata(&dev->dev); - - DPRINTK(""); - - if (be->major || be->minor) - xenvbd_sysfs_delif(dev); - - if (be->backend_watch.node) { - unregister_xenbus_watch(&be->backend_watch); - kfree(be->backend_watch.node); - be->backend_watch.node = NULL; - } - - if (be->blkif) { - blkif_disconnect(be->blkif); - vbd_free(&be->blkif->vbd); - blkif_free(be->blkif); - be->blkif = NULL; - } - - kfree(be); - dev_set_drvdata(&dev->dev, NULL); - return 0; -} - -int blkback_barrier(struct xenbus_transaction xbt, - struct backend_info *be, int state) -{ - struct xenbus_device *dev = be->dev; - int err; - - err = xenbus_printf(xbt, dev->nodename, "feature-barrier", - "%d", state); - if (err) - xenbus_dev_fatal(dev, err, "writing feature-barrier"); - - return err; -} - -/** - * Entry point to this code when a new device is created. Allocate the basic - * structures, and watch the store waiting for the hotplug scripts to tell us - * the device's physical major and minor numbers. Switch to InitWait. - */ -static int blkback_probe(struct xenbus_device *dev, - const struct xenbus_device_id *id) -{ - int err; - struct backend_info *be = kzalloc(sizeof(struct backend_info), - GFP_KERNEL); - if (!be) { - xenbus_dev_fatal(dev, -ENOMEM, - "allocating backend structure"); - return -ENOMEM; - } - be->dev = dev; - dev_set_drvdata(&dev->dev, be); - - be->blkif = blkif_alloc(dev->otherend_id); - if (IS_ERR(be->blkif)) { - err = PTR_ERR(be->blkif); - be->blkif = NULL; - xenbus_dev_fatal(dev, err, "creating block interface"); - goto fail; - } - - /* setup back pointer */ - be->blkif->be = be; - - err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, - "%s/%s", dev->nodename, "physical-device"); - if (err) - goto fail; - - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto fail; - - return 0; - -fail: - DPRINTK("failed"); - blkback_remove(dev); - return err; -} - - -/** - * Callback received when the hotplug scripts have placed the physical-device - * node. Read it and the mode node, and create a vbd. If the frontend is - * ready, connect. - */ -static void backend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - int err; - unsigned major; - unsigned minor; - struct backend_info *be - = container_of(watch, struct backend_info, backend_watch); - struct xenbus_device *dev = be->dev; - int cdrom = 0; - char *device_type; - - DPRINTK(""); - - err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", - &major, &minor); - if (XENBUS_EXIST_ERR(err)) { - /* Since this watch will fire once immediately after it is - registered, we expect this. Ignore it, and wait for the - hotplug scripts. */ - return; - } - if (err != 2) { - xenbus_dev_fatal(dev, err, "reading physical-device"); - return; - } - - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - printk(KERN_WARNING - "blkback: changing physical device (from %x:%x to " - "%x:%x) not supported.\n", be->major, be->minor, - major, minor); - return; - } - - be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); - if (IS_ERR(be->mode)) { - err = PTR_ERR(be->mode); - be->mode = NULL; - xenbus_dev_fatal(dev, err, "reading mode"); - return; - } - - device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); - if (!IS_ERR(device_type)) { - cdrom = strcmp(device_type, "cdrom") == 0; - kfree(device_type); - } - - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; - - be->major = major; - be->minor = minor; - - err = vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } - - err = xenvbd_sysfs_addif(dev); - if (err) { - vbd_free(&be->blkif->vbd); - be->major = be->minor = 0; - xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; - } - - /* We're potentially connected now */ - update_blkif_status(be->blkif); - } -} - - -/** - * Callback received when the frontend's state changes. - */ -static void frontend_changed(struct xenbus_device *dev, - enum xenbus_state frontend_state) -{ - struct backend_info *be = dev_get_drvdata(&dev->dev); - int err; - - DPRINTK("%s", xenbus_strstate(frontend_state)); - - switch (frontend_state) { - case XenbusStateInitialising: - if (dev->state == XenbusStateClosed) { - printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __func__, dev->nodename); - xenbus_switch_state(dev, XenbusStateInitWait); - } - break; - - case XenbusStateInitialised: - case XenbusStateConnected: - /* Ensure we connect even when two watches fire in - close successsion and we miss the intermediate value - of frontend_state. */ - if (dev->state == XenbusStateConnected) - break; - - /* Enforce precondition before potential leak point. - * blkif_disconnect() is idempotent. - */ - blkif_disconnect(be->blkif); - - err = connect_ring(be); - if (err) - break; - update_blkif_status(be->blkif); - break; - - case XenbusStateClosing: - blkif_disconnect(be->blkif); - xenbus_switch_state(dev, XenbusStateClosing); - break; - - case XenbusStateClosed: - xenbus_switch_state(dev, XenbusStateClosed); - if (xenbus_dev_is_online(dev)) - break; - /* fall through if not online */ - case XenbusStateUnknown: - /* implies blkif_disconnect() via blkback_remove() */ - device_unregister(&dev->dev); - break; - - default: - xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", - frontend_state); - break; - } -} - - -/* ** Connection ** */ - - -/** - * Write the physical details regarding the block device to the store, and - * switch to Connected state. - */ -static void connect(struct backend_info *be) -{ - struct xenbus_transaction xbt; - int err; - struct xenbus_device *dev = be->dev; - - DPRINTK("%s", dev->otherend); - - /* Supply the information about the device the frontend needs */ -again: - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_dev_fatal(dev, err, "starting transaction"); - return; - } - - err = blkback_barrier(xbt, be, 1); - if (err) - goto abort; - - err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", - vbd_size(&be->blkif->vbd)); - if (err) { - xenbus_dev_fatal(dev, err, "writing %s/sectors", - dev->nodename); - goto abort; - } - - /* FIXME: use a typename instead */ - err = xenbus_printf(xbt, dev->nodename, "info", "%u", - vbd_info(&be->blkif->vbd)); - if (err) { - xenbus_dev_fatal(dev, err, "writing %s/info", - dev->nodename); - goto abort; - } - err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", - vbd_secsize(&be->blkif->vbd)); - if (err) { - xenbus_dev_fatal(dev, err, "writing %s/sector-size", - dev->nodename); - goto abort; - } - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - if (err) - xenbus_dev_fatal(dev, err, "ending transaction"); - - err = xenbus_switch_state(dev, XenbusStateConnected); - if (err) - xenbus_dev_fatal(dev, err, "switching to Connected state", - dev->nodename); - - return; - abort: - xenbus_transaction_end(xbt, 1); -} - - -static int connect_ring(struct backend_info *be) -{ - struct xenbus_device *dev = be->dev; - unsigned long ring_ref; - unsigned int evtchn; - char protocol[64] = ""; - int err; - - DPRINTK("%s", dev->otherend); - - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", - dev->otherend); - return err; - } - - be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; - err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", - "%63s", protocol, NULL); - if (err) - strcpy(protocol, "unspecified, assuming native"); - else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; - else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; - else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; - else { - xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); - return -1; - } - printk(KERN_INFO - "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); - - /* Map the shared frame, irq etc. */ - err = blkif_map(be->blkif, ring_ref, evtchn); - if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); - return err; - } - - return 0; -} - - -/* ** Driver Registration ** */ - - -static const struct xenbus_device_id blkback_ids[] = { - { "vbd" }, - { "" } -}; - - -static struct xenbus_driver blkback = { - .name = "vbd", - .owner = THIS_MODULE, - .ids = blkback_ids, - .probe = blkback_probe, - .remove = blkback_remove, - .otherend_changed = frontend_changed -}; - - -int blkif_xenbus_init(void) -{ - return xenbus_register_backend(&blkback); -} -- cgit v0.10.2 From 2035e776050aea57fb5255557216473e82793f2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 18 Apr 2011 17:29:14 -0400 Subject: ext4: check for ext[23] file system features when mounting as ext[23] Provide better emulation for ext[23] mode by enforcing that the file system does not have any unsupported file system features as defined by ext[23] when emulating the ext[23] file system driver when CONFIG_EXT4_USE_FOR_EXT23 is defined. This causes the file system type information in /proc/mounts to be correct for the automatically mounted root file system. This also means that "mount -t ext2 /dev/sda /mnt" will fail if /dev/sda contains an ext3 or ext4 file system, just as one would expect if the original ext2 file system driver were in use. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4daaf2b..076c5d2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1351,6 +1351,21 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ +#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + +#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8553dfb..cb22783 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -75,11 +75,27 @@ static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +static inline int ext2_feature_set_ok(struct super_block *sb); +static inline int ext3_feature_set_ok(struct super_block *sb); static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { + .owner = THIS_MODULE, + .name = "ext2", + .mount = ext4_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) +#else +#define IS_EXT2_SB(sb) (0) +#endif + + #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, @@ -3187,6 +3203,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "feature flags set on rev 0 fs, " "running e2fsck is recommended"); + if (IS_EXT2_SB(sb)) { + if (ext2_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext2 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + + if (IS_EXT3_SB(sb)) { + if (ext3_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext3 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -4772,14 +4810,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, } #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext2_fs_type = { - .owner = THIS_MODULE, - .name = "ext2", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -4792,10 +4822,22 @@ static inline void unregister_as_ext2(void) { unregister_filesystem(&ext2_fs_type); } + +static inline int ext2_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } +static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) @@ -4811,10 +4853,24 @@ static inline void unregister_as_ext3(void) { unregister_filesystem(&ext3_fs_type); } + +static inline int ext3_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + return 0; + if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } +static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } #endif static struct file_system_type ext4_fs_type = { @@ -4898,8 +4954,8 @@ static int __init ext4_init_fs(void) err = init_inodecache(); if (err) goto out1; - register_as_ext2(); register_as_ext3(); + register_as_ext2(); err = register_filesystem(&ext4_fs_type); if (err) goto out; -- cgit v0.10.2 From 64a14b51bed6427a2e6d68ed687027f065f5a156 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: cyclades: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index c99728f..d6f6dd2 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -4099,8 +4099,7 @@ static int __init cy_init(void) if (!cy_serial_driver) goto err; - printk(KERN_INFO "Cyclades driver " CY_VERSION " (built %s %s)\n", - __DATE__, __TIME__); + printk(KERN_INFO "Cyclades driver " CY_VERSION "\n"); /* Initialize the tty_driver structure */ -- cgit v0.10.2 From 2cae8de7b0464cc4c246517fca10f04593f46a3b Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: nozomi: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c index f4f1116..db16048 100644 --- a/drivers/tty/nozomi.c +++ b/drivers/tty/nozomi.c @@ -61,8 +61,7 @@ #include -#define VERSION_STRING DRIVER_DESC " 2.1d (build date: " \ - __DATE__ " " __TIME__ ")" +#define VERSION_STRING DRIVER_DESC " 2.1d" /* Macros definitions */ -- cgit v0.10.2 From 372c05c4959aaccdd671f75e0fd332629cf15964 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: media/radio-maxiradio: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c index 5c2a905..e83e840 100644 --- a/drivers/media/radio/radio-maxiradio.c +++ b/drivers/media/radio/radio-maxiradio.c @@ -412,8 +412,7 @@ static int __devinit maxiradio_init_one(struct pci_dev *pdev, const struct pci_d goto err_out_free_region; } - v4l2_info(v4l2_dev, "version " DRIVER_VERSION - " time " __TIME__ " " __DATE__ "\n"); + v4l2_info(v4l2_dev, "version " DRIVER_VERSION "\n"); v4l2_info(v4l2_dev, "found Guillemot MAXI Radio device (io = 0x%x)\n", dev->io); -- cgit v0.10.2 From 7cf444639101ad8671cb210addbffbdebd07c068 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: media/cx231xx: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/media/video/cx231xx/cx231xx-avcore.c b/drivers/media/video/cx231xx/cx231xx-avcore.c index 62843d3..98e8765 100644 --- a/drivers/media/video/cx231xx/cx231xx-avcore.c +++ b/drivers/media/video/cx231xx/cx231xx-avcore.c @@ -1354,7 +1354,7 @@ void cx231xx_dump_SC_reg(struct cx231xx *dev) { u8 value[4] = { 0, 0, 0, 0 }; int status = 0; - cx231xx_info("cx231xx_dump_SC_reg %s!\n", __TIME__); + cx231xx_info("cx231xx_dump_SC_reg!\n"); status = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, BOARD_CFG_STAT, value, 4); -- cgit v0.10.2 From 571b16da39922cf71db41c10852d798a44686c15 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: aacraid: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Adaptec OEM Raid Solutions Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 4ff2652..3382475 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -59,7 +59,6 @@ #ifndef AAC_DRIVER_BRANCH #define AAC_DRIVER_BRANCH "" #endif -#define AAC_DRIVER_BUILD_DATE __DATE__ " " __TIME__ #define AAC_DRIVERNAME "aacraid" #ifdef AAC_DRIVER_BUILD @@ -67,7 +66,7 @@ #define str(x) _str(x) #define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION "[" str(AAC_DRIVER_BUILD) "]" AAC_DRIVER_BRANCH #else -#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION AAC_DRIVER_BRANCH " " AAC_DRIVER_BUILD_DATE +#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION AAC_DRIVER_BRANCH #endif MODULE_AUTHOR("Red Hat Inc and Adaptec"); -- cgit v0.10.2 From 4c315a5d57300db83d2eb7b8d5b6019e1fba99fc Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: scsi/in2000: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c index 6568aab..6096d93 100644 --- a/drivers/scsi/in2000.c +++ b/drivers/scsi/in2000.c @@ -2228,7 +2228,7 @@ static int in2000_proc_info(struct Scsi_Host *instance, char *buf, char **start, bp = buf; *bp = '\0'; if (hd->proc & PR_VERSION) { - sprintf(tbuf, "\nVersion %s - %s. Compiled %s %s", IN2000_VERSION, IN2000_DATE, __DATE__, __TIME__); + sprintf(tbuf, "\nVersion %s - %s.", IN2000_VERSION, IN2000_DATE); strcat(bp, tbuf); } if (hd->proc & PR_INFO) { -- cgit v0.10.2 From 565502f8894739ce7b5fd131f39c930dc4351710 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: scsi/wd33c93: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c index 5f697e0..6c983a2 100644 --- a/drivers/scsi/wd33c93.c +++ b/drivers/scsi/wd33c93.c @@ -2052,8 +2052,7 @@ wd33c93_init(struct Scsi_Host *instance, const wd33c93_regs regs, for (i = 0; i < MAX_SETUP_ARGS; i++) printk("%s,", setup_args[i]); printk("\n"); - printk(" Version %s - %s, Compiled %s at %s\n", - WD33C93_VERSION, WD33C93_DATE, __DATE__, __TIME__); + printk(" Version %s - %s\n", WD33C93_VERSION, WD33C93_DATE); } int @@ -2133,8 +2132,8 @@ wd33c93_proc_info(struct Scsi_Host *instance, char *buf, char **start, off_t off bp = buf; *bp = '\0'; if (hd->proc & PR_VERSION) { - sprintf(tbuf, "\nVersion %s - %s. Compiled %s %s", - WD33C93_VERSION, WD33C93_DATE, __DATE__, __TIME__); + sprintf(tbuf, "\nVersion %s - %s.", + WD33C93_VERSION, WD33C93_DATE); strcat(bp, tbuf); } if (hd->proc & PR_INFO) { -- cgit v0.10.2 From 9f3ad1cab2a0357d5866d45413fa2ee3e88e496f Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: rio: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. As the buildDate field is part of the userspace API, I replaced it with the date of the last code change. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek diff --git a/drivers/staging/generic_serial/rio/rioinit.c b/drivers/staging/generic_serial/rio/rioinit.c index 24a282b..fb62b38 100644 --- a/drivers/staging/generic_serial/rio/rioinit.c +++ b/drivers/staging/generic_serial/rio/rioinit.c @@ -381,7 +381,7 @@ struct rioVersion *RIOVersid(void) { strlcpy(stVersion.version, "RIO driver for linux V1.0", sizeof(stVersion.version)); - strlcpy(stVersion.buildDate, __DATE__, + strlcpy(stVersion.buildDate, "Aug 15 2010", sizeof(stVersion.buildDate)); return &stVersion; -- cgit v0.10.2 From 152ba3942276c2a240703669ae4a3099e0a79451 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: edac: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Doug Thompson Cc: bluesmoke-devel@lists.sourceforge.net Cc: linux-edac@vger.kernel.org Acked-by: Mauro Carvalho Chehab Signed-off-by: Michal Marek diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index cace0a7..e47e73b 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -19,7 +19,7 @@ #include #include "edac_core.h" -#define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ +#define AMD76X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "amd76x_edac" #define amd76x_printk(level, fmt, arg...) \ diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index 35b78d0..ddd8900 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -33,7 +33,7 @@ #include "edac_module.h" #include "amd8111_edac.h" -#define AMD8111_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define AMD8111_EDAC_REVISION " Ver: 1.0.0" #define AMD8111_EDAC_MOD_STR "amd8111_edac" #define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c index b432d60..a5c6805 100644 --- a/drivers/edac/amd8131_edac.c +++ b/drivers/edac/amd8131_edac.c @@ -33,7 +33,7 @@ #include "edac_module.h" #include "amd8131_edac.h" -#define AMD8131_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define AMD8131_EDAC_REVISION " Ver: 1.0.0" #define AMD8131_EDAC_MOD_STR "amd8131_edac" /* Wrapper functions for accessing PCI configuration space */ diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index b9a781c..3400ae3 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -30,7 +30,7 @@ #include "edac_core.h" #include "edac_module.h" -#define CPC925_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define CPC925_EDAC_REVISION " Ver: 1.0.0" #define CPC925_EDAC_MOD_STR "cpc925_edac" #define cpc925_printk(level, fmt, arg...) \ diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index ec302d4..1af531a 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -24,7 +24,7 @@ #include #include "edac_core.h" -#define E752X_REVISION " Ver: 2.0.2 " __DATE__ +#define E752X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e752x_edac" static int report_non_memory_errors; diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 1731d72..6ffb6d2 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -29,7 +29,7 @@ #include #include "edac_core.h" -#define E7XXX_REVISION " Ver: 2.0.2 " __DATE__ +#define E7XXX_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e7xxx_edac" #define e7xxx_printk(level, fmt, arg...) \ diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index be4b075..5ddaa86 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -15,7 +15,7 @@ #include "edac_core.h" #include "edac_module.h" -#define EDAC_VERSION "Ver: 2.1.0 " __DATE__ +#define EDAC_VERSION "Ver: 2.1.0" #ifdef CONFIG_EDAC_DEBUG /* Values of 0 to 4 will generate output */ diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index a5cefab..3d0b726 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -27,7 +27,7 @@ /* * Alter this version for the I5000 module when modifications are made */ -#define I5000_REVISION " Ver: 2.0.12 " __DATE__ +#define I5000_REVISION " Ver: 2.0.12" #define EDAC_MOD_STR "i5000_edac" #define i5000_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 38a9be9..fd362b4 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -33,7 +33,7 @@ /* * Alter this version for the I5400 module when modifications are made */ -#define I5400_REVISION " Ver: 1.0.0 " __DATE__ +#define I5400_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i5400_edac" diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 76d1f57..ff320c0 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -31,7 +31,7 @@ /* * Alter this version for the I7300 module when modifications are made */ -#define I7300_REVISION " Ver: 1.0.0 " __DATE__ +#define I7300_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i7300_edac" diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 81154ab..3f320ba 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -59,7 +59,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); /* * Alter this version for the module when modifications are made */ -#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__ +#define I7CORE_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i7core_edac" /* diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index b8a95cf..931a057 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -16,7 +16,7 @@ #include #include "edac_core.h" -#define I82860_REVISION " Ver: 2.0.2 " __DATE__ +#define I82860_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82860_edac" #define i82860_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index b2fd1e8..33864c6 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -20,7 +20,7 @@ #include #include "edac_core.h" -#define I82875P_REVISION " Ver: 2.0.2 " __DATE__ +#define I82875P_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82875p_edac" #define i82875p_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 92e65e7..a5da732 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -16,7 +16,7 @@ #include #include "edac_core.h" -#define I82975X_REVISION " Ver: 1.0.0 " __DATE__ +#define I82975X_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i82975x_edac" #define i82975x_printk(level, fmt, arg...) \ diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index cb24df8..932016f 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -11,7 +11,7 @@ #ifndef _MPC85XX_EDAC_H_ #define _MPC85XX_EDAC_H_ -#define MPC85XX_REVISION " Ver: 2.0.0 " __DATE__ +#define MPC85XX_REVISION " Ver: 2.0.0" #define EDAC_MOD_STR "MPC85xx_edac" #define mpc85xx_printk(level, fmt, arg...) \ diff --git a/drivers/edac/mv64x60_edac.h b/drivers/edac/mv64x60_edac.h index e042e2d..c7f209c 100644 --- a/drivers/edac/mv64x60_edac.h +++ b/drivers/edac/mv64x60_edac.h @@ -12,7 +12,7 @@ #ifndef _MV64X60_EDAC_H_ #define _MV64X60_EDAC_H_ -#define MV64x60_REVISION " Ver: 2.0.0 " __DATE__ +#define MV64x60_REVISION " Ver: 2.0.0" #define EDAC_MOD_STR "MV64x60_edac" #define mv64x60_printk(level, fmt, arg...) \ diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index c1f0045..208244e 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -113,7 +113,7 @@ #define EDAC_OPSTATE_UNKNOWN_STR "unknown" #define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac" -#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0 " __DATE__ +#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0" #define PPC4XX_EDAC_MESSAGE_SIZE 256 diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index 6a822c6..387997a 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -22,7 +22,7 @@ #include #include "edac_core.h" -#define R82600_REVISION " Ver: 2.0.2 " __DATE__ +#define R82600_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "r82600_edac" #define r82600_printk(level, fmt, arg...) \ -- cgit v0.10.2 From a1b666657c3a691c4f8a0025905e88e8b4baa360 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: pmcraid: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Anil Ravindranath Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index bcf858e..c83bc5e 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4249,8 +4249,8 @@ static ssize_t pmcraid_show_drv_version( char *buf ) { - return snprintf(buf, PAGE_SIZE, "version: %s, build date: %s\n", - PMCRAID_DRIVER_VERSION, PMCRAID_DRIVER_DATE); + return snprintf(buf, PAGE_SIZE, "version: %s\n", + PMCRAID_DRIVER_VERSION); } static struct device_attribute pmcraid_driver_version_attr = { @@ -6093,9 +6093,8 @@ static int __init pmcraid_init(void) dev_t dev; int error; - pmcraid_info("%s Device Driver version: %s %s\n", - PMCRAID_DRIVER_NAME, - PMCRAID_DRIVER_VERSION, PMCRAID_DRIVER_DATE); + pmcraid_info("%s Device Driver version: %s\n", + PMCRAID_DRIVER_NAME, PMCRAID_DRIVER_VERSION); error = alloc_chrdev_region(&dev, 0, PMCRAID_MAX_ADAPTERS, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 4db210d..d302737 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -43,7 +43,6 @@ #define PMCRAID_DRIVER_NAME "PMC MaxRAID" #define PMCRAID_DEVFILE "pmcsas" #define PMCRAID_DRIVER_VERSION "1.0.3" -#define PMCRAID_DRIVER_DATE __DATE__ #define PMCRAID_FW_VERSION_1 0x002 -- cgit v0.10.2 From 2a086e5d3a23570735f75b784d29b93068070833 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:09:26 +0900 Subject: TOMOYO: Fix race on updating profile's comment line. In tomoyo_write_profile() since 2.6.34, a lock was by error missing when replacing profile's comment line. If multiple threads attempted echo '0-COMMENT=comment' > /sys/kernel/security/tomoyo/profile in parallel, garbage collector will fail to kfree() the old value. Protect the replacement using a lock. Also, keep the old value rather than replace with empty string when out of memory error has occurred. Signed-off-by: Xiaochen Wang Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 7556315..2b7b1a1 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -459,8 +459,16 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head) if (profile == &tomoyo_default_profile) return -EINVAL; if (!strcmp(data, "COMMENT")) { - const struct tomoyo_path_info *old_comment = profile->comment; - profile->comment = tomoyo_get_name(cp); + static DEFINE_SPINLOCK(lock); + const struct tomoyo_path_info *new_comment + = tomoyo_get_name(cp); + const struct tomoyo_path_info *old_comment; + if (!new_comment) + return -ENOMEM; + spin_lock(&lock); + old_comment = profile->comment; + profile->comment = new_comment; + spin_unlock(&lock); tomoyo_put_name(old_comment); return 0; } -- cgit v0.10.2 From e4f5f26d8336318a5aa0858223c81cf29fcf5f68 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:11:50 +0900 Subject: TOMOYO: Don't add / for allow_unmount permission check. "mount --bind /path/to/file1 /path/to/file2" is legal. Therefore, "umount /path/to/file2" is also legal. Do not automatically append trailing '/' if pathname to be unmounted does not end with '/'. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index cb09f1f..d64e8ec 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -1011,7 +1011,6 @@ int tomoyo_path_perm(const u8 operation, struct path *path) break; case TOMOYO_TYPE_RMDIR: case TOMOYO_TYPE_CHROOT: - case TOMOYO_TYPE_UMOUNT: tomoyo_add_slash(&buf); break; } -- cgit v0.10.2 From c0fa797ae6cd02ff87c0bfe0d509368a3b45640e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:12:54 +0900 Subject: TOMOYO: Fix infinite loop bug when reading /sys/kernel/security/tomoyo/audit In tomoyo_flush(), head->r.w[0] holds pointer to string data to be printed. But head->r.w[0] was updated only when the string data was partially printed (because head->r.w[0] will be updated by head->r.w[1] later if completely printed). However, regarding /sys/kernel/security/tomoyo/query , an additional '\0' is printed after the string data was completely printed. But if free space for read buffer became 0 before printing the additional '\0', tomoyo_flush() was returning without updating head->r.w[0]. As a result, tomoyo_flush() forever reprints already printed string data. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 2b7b1a1..a0d09e5 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -108,10 +108,9 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head) head->read_user_buf += len; w += len; } - if (*w) { - head->r.w[0] = w; + head->r.w[0] = w; + if (*w) return false; - } /* Add '\0' for query. */ if (head->poll) { if (!head->read_user_buf_avail || -- cgit v0.10.2 From 86c0f043a737dadf034a4e6f29aefb074f4a1146 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 18 Apr 2011 10:14:57 +0000 Subject: s3fb: add DDC support Add I2C support for the DDC bus and also default mode initialization by reading monitor EDID to the s3fb driver. Tested on Trio64V+ (2 cards), Trio64V2/DX, Virge (3 cards), Virge/DX (3 cards), Virge/GX2, Trio3D/2X (4 cards), Trio3D. Will probably not work on Trio32 - my 2 cards have DDC support in BIOS that looks different from the other cards but the DDC pins on the VGA connector are not connected. Signed-off-by: Ondrej Zary Signed-off-by: Paul Mundt diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index e6a8d8c..e2126b5 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1463,6 +1463,14 @@ config FB_S3 ---help--- Driver for graphics boards with S3 Trio / S3 Virge chip. +config FB_S3_DDC + bool "DDC for S3 support" + depends on FB_S3 + select FB_DDC + default y + help + Say Y here if you want DDC support for your S3 graphics card. + config FB_SAVAGE tristate "S3 Savage support" depends on FB && PCI && EXPERIMENTAL diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index c4482f2..9a34520 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c @@ -25,6 +25,9 @@ #include /* Why should fb driver call console functions? because console_lock() */ #include