From 63310467a3d1ed6a0460ec1f4268126cd1ceec2e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Thu, 20 Jan 2011 11:12:26 -0600
Subject: mm: Remove support for kmem_cache_name()

The last user was ext4 and Eric Sandeen removed the call in a recent patch. See
the following URL for the discussion:

http://marc.info/?l=linux-ext4&m=129546975702198&w=2

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/include/linux/slab.h b/include/linux/slab.h
index fa90866..ad4dd1c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -105,7 +105,6 @@ void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
 void kmem_cache_free(struct kmem_cache *, void *);
 unsigned int kmem_cache_size(struct kmem_cache *);
-const char *kmem_cache_name(struct kmem_cache *);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
diff --git a/mm/slab.c b/mm/slab.c
index 37961d1..4bab2d1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2147,8 +2147,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  *
  * @name must be valid until the cache is destroyed. This implies that
  * the module calling this has to destroy the cache before getting unloaded.
- * Note that kmem_cache_name() is not guaranteed to return the same pointer,
- * therefore applications must manage it themselves.
  *
  * The flags are
  *
@@ -3840,12 +3838,6 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
-const char *kmem_cache_name(struct kmem_cache *cachep)
-{
-	return cachep->name;
-}
-EXPORT_SYMBOL_GPL(kmem_cache_name);
-
 /*
  * This initializes kmem_list3 or resizes various caches for all nodes.
  */
diff --git a/mm/slob.c b/mm/slob.c
index 3588eaa..46e0aee 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -666,12 +666,6 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
-const char *kmem_cache_name(struct kmem_cache *c)
-{
-	return c->name;
-}
-EXPORT_SYMBOL(kmem_cache_name);
-
 int kmem_cache_shrink(struct kmem_cache *d)
 {
 	return 0;
diff --git a/mm/slub.c b/mm/slub.c
index e15aa7f..d2f343a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2399,12 +2399,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
 }
 EXPORT_SYMBOL(kmem_cache_size);
 
-const char *kmem_cache_name(struct kmem_cache *s)
-{
-	return s->name;
-}
-EXPORT_SYMBOL(kmem_cache_name);
-
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
 							const char *text)
 {
-- 
cgit v0.10.2


From 335d7c58fcc1b71387a2c38b610b08bb9d3a6fcc Mon Sep 17 00:00:00 2001
From: Guenter Roeck <guenter.roeck@ericsson.com>
Date: Wed, 26 Jan 2011 11:45:49 -0800
Subject: i2c/busses: Add support for Diolan U2C-12 USB-I2C adapter

This patch adds support for the Diolan U2C-12 USB-I2C adapter.
It also updates MAINTAINERS to list the author as maintainer.

Signed-off-by: Guenter Roeck <guenter.roeck@ericsson.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c
new file mode 100644
index 0000000..30fe4bb
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-diolan-u2c
@@ -0,0 +1,26 @@
+Kernel driver i2c-diolan-u2c
+
+Supported adapters:
+  * Diolan U2C-12 I2C-USB adapter
+    Documentation:
+	http://www.diolan.com/i2c/u2c12.html
+
+Author: Guenter Roeck <guenter.roeck@ericsson.com>
+
+Description
+-----------
+
+This is the driver for the Diolan U2C-12 USB-I2C adapter.
+
+The Diolan U2C-12 I2C-USB Adapter provides a low cost solution to connect
+a computer to I2C slave devices using a USB interface. It also supports
+connectivity to SPI devices.
+
+This driver only supports the I2C interface of U2C-12. The driver does not use
+interrupts.
+
+
+Module parameters
+-----------------
+
+* frequency: I2C bus frequency
diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f3a5..74cad4f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2093,6 +2093,12 @@ F:	Documentation/serial/digiepca.txt
 F:	drivers/char/epca*
 F:	drivers/char/digi*
 
+DIOLAN U2C-12 I2C DRIVER
+M:	Guenter Roeck <guenter.roeck@ericsson.com>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/busses/i2c-diolan-u2c.c
+
 DIRECTORY NOTIFICATION (DNOTIFY)
 M:	Eric Paris <eparis@parisplace.org>
 S:	Maintained
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 113505a..30f8dbd 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -648,6 +648,16 @@ config I2C_EG20T
 
 comment "External I2C/SMBus adapter drivers"
 
+config I2C_DIOLAN_U2C
+	tristate "Diolan U2C-12 USB adapter"
+	depends on USB
+	help
+	  If you say yes to this option, support will be included for Diolan
+	  U2C-12, a USB to I2C interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called i2c-diolan-u2c.
+
 config I2C_PARPORT
 	tristate "Parallel port adapter"
 	depends on PARPORT
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 9d2d0ec..3c630b7 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_I2C_XILINX)	+= i2c-xiic.o
 obj-$(CONFIG_I2C_EG20T)         += i2c-eg20t.o
 
 # External I2C/SMBus adapter drivers
+obj-$(CONFIG_I2C_DIOLAN_U2C)	+= i2c-diolan-u2c.o
 obj-$(CONFIG_I2C_PARPORT)	+= i2c-parport.o
 obj-$(CONFIG_I2C_PARPORT_LIGHT)	+= i2c-parport-light.o
 obj-$(CONFIG_I2C_TAOS_EVM)	+= i2c-taos-evm.o
diff --git a/drivers/i2c/busses/i2c-diolan-u2c.c b/drivers/i2c/busses/i2c-diolan-u2c.c
new file mode 100644
index 0000000..7636671
--- /dev/null
+++ b/drivers/i2c/busses/i2c-diolan-u2c.c
@@ -0,0 +1,535 @@
+/*
+ * Driver for the Diolan u2c-12 USB-I2C adapter
+ *
+ * Copyright (c) 2010-2011 Ericsson AB
+ *
+ * Derived from:
+ *  i2c-tiny-usb.c
+ *  Copyright (C) 2006-2007 Till Harbaum (Till@Harbaum.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/i2c.h>
+
+#define DRIVER_NAME		"i2c-diolan-u2c"
+
+#define USB_VENDOR_ID_DIOLAN		0x0abf
+#define USB_DEVICE_ID_DIOLAN_U2C	0x3370
+
+#define DIOLAN_OUT_EP		0x02
+#define DIOLAN_IN_EP		0x84
+
+/* commands via USB, must match command ids in the firmware */
+#define CMD_I2C_READ		0x01
+#define CMD_I2C_WRITE		0x02
+#define CMD_I2C_SCAN		0x03	/* Returns list of detected devices */
+#define CMD_I2C_RELEASE_SDA	0x04
+#define CMD_I2C_RELEASE_SCL	0x05
+#define CMD_I2C_DROP_SDA	0x06
+#define CMD_I2C_DROP_SCL	0x07
+#define CMD_I2C_READ_SDA	0x08
+#define CMD_I2C_READ_SCL	0x09
+#define CMD_GET_FW_VERSION	0x0a
+#define CMD_GET_SERIAL		0x0b
+#define CMD_I2C_START		0x0c
+#define CMD_I2C_STOP		0x0d
+#define CMD_I2C_REPEATED_START	0x0e
+#define CMD_I2C_PUT_BYTE	0x0f
+#define CMD_I2C_GET_BYTE	0x10
+#define CMD_I2C_PUT_ACK		0x11
+#define CMD_I2C_GET_ACK		0x12
+#define CMD_I2C_PUT_BYTE_ACK	0x13
+#define CMD_I2C_GET_BYTE_ACK	0x14
+#define CMD_I2C_SET_SPEED	0x1b
+#define CMD_I2C_GET_SPEED	0x1c
+#define CMD_I2C_SET_CLK_SYNC	0x24
+#define CMD_I2C_GET_CLK_SYNC	0x25
+#define CMD_I2C_SET_CLK_SYNC_TO	0x26
+#define CMD_I2C_GET_CLK_SYNC_TO	0x27
+
+#define RESP_OK			0x00
+#define RESP_FAILED		0x01
+#define RESP_BAD_MEMADDR	0x04
+#define RESP_DATA_ERR		0x05
+#define RESP_NOT_IMPLEMENTED	0x06
+#define RESP_NACK		0x07
+#define RESP_TIMEOUT		0x09
+
+#define U2C_I2C_SPEED_FAST	0	/* 400 kHz */
+#define U2C_I2C_SPEED_STD	1	/* 100 kHz */
+#define U2C_I2C_SPEED_2KHZ	242	/* 2 kHz, minimum speed */
+#define U2C_I2C_SPEED(f)	((DIV_ROUND_UP(1000000, (f)) - 10) / 2 + 1)
+
+#define U2C_I2C_FREQ_FAST	400000
+#define U2C_I2C_FREQ_STD	100000
+#define U2C_I2C_FREQ(s)		(1000000 / (2 * (s - 1) + 10))
+
+#define DIOLAN_USB_TIMEOUT	100	/* in ms */
+#define DIOLAN_SYNC_TIMEOUT	20	/* in ms */
+
+#define DIOLAN_OUTBUF_LEN	128
+#define DIOLAN_FLUSH_LEN	(DIOLAN_OUTBUF_LEN - 4)
+#define DIOLAN_INBUF_LEN	256	/* Maximum supported receive length */
+
+/* Structure to hold all of our device specific stuff */
+struct i2c_diolan_u2c {
+	u8 obuffer[DIOLAN_OUTBUF_LEN];	/* output buffer */
+	u8 ibuffer[DIOLAN_INBUF_LEN];	/* input buffer */
+	struct usb_device *usb_dev;	/* the usb device for this device */
+	struct usb_interface *interface;/* the interface for this device */
+	struct i2c_adapter adapter;	/* i2c related things */
+	int olen;			/* Output buffer length */
+	int ocount;			/* Number of enqueued messages */
+};
+
+static uint frequency = U2C_I2C_FREQ_STD;	/* I2C clock frequency in Hz */
+
+module_param(frequency, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(frequency, "I2C clock frequency in hertz");
+
+/* usb layer */
+
+/* Send command to device, and get response. */
+static int diolan_usb_transfer(struct i2c_diolan_u2c *dev)
+{
+	int ret = 0;
+	int actual;
+	int i;
+
+	if (!dev->olen || !dev->ocount)
+		return -EINVAL;
+
+	ret = usb_bulk_msg(dev->usb_dev,
+			   usb_sndbulkpipe(dev->usb_dev, DIOLAN_OUT_EP),
+			   dev->obuffer, dev->olen, &actual,
+			   DIOLAN_USB_TIMEOUT);
+	if (!ret) {
+		for (i = 0; i < dev->ocount; i++) {
+			int tmpret;
+
+			tmpret = usb_bulk_msg(dev->usb_dev,
+					      usb_rcvbulkpipe(dev->usb_dev,
+							      DIOLAN_IN_EP),
+					      dev->ibuffer,
+					      sizeof(dev->ibuffer), &actual,
+					      DIOLAN_USB_TIMEOUT);
+			/*
+			 * Stop command processing if a previous command
+			 * returned an error.
+			 * Note that we still need to retrieve all messages.
+			 */
+			if (ret < 0)
+				continue;
+			ret = tmpret;
+			if (ret == 0 && actual > 0) {
+				switch (dev->ibuffer[actual - 1]) {
+				case RESP_NACK:
+					/*
+					 * Return ENXIO if NACK was received as
+					 * response to the address phase,
+					 * EIO otherwise
+					 */
+					ret = i == 1 ? -ENXIO : -EIO;
+					break;
+				case RESP_TIMEOUT:
+					ret = -ETIMEDOUT;
+					break;
+				case RESP_OK:
+					/* strip off return code */
+					ret = actual - 1;
+					break;
+				default:
+					ret = -EIO;
+					break;
+				}
+			}
+		}
+	}
+	dev->olen = 0;
+	dev->ocount = 0;
+	return ret;
+}
+
+static int diolan_write_cmd(struct i2c_diolan_u2c *dev, bool flush)
+{
+	if (flush || dev->olen >= DIOLAN_FLUSH_LEN)
+		return diolan_usb_transfer(dev);
+	return 0;
+}
+
+/* Send command (no data) */
+static int diolan_usb_cmd(struct i2c_diolan_u2c *dev, u8 command, bool flush)
+{
+	dev->obuffer[dev->olen++] = command;
+	dev->ocount++;
+	return diolan_write_cmd(dev, flush);
+}
+
+/* Send command with one byte of data */
+static int diolan_usb_cmd_data(struct i2c_diolan_u2c *dev, u8 command, u8 data,
+			       bool flush)
+{
+	dev->obuffer[dev->olen++] = command;
+	dev->obuffer[dev->olen++] = data;
+	dev->ocount++;
+	return diolan_write_cmd(dev, flush);
+}
+
+/* Send command with two bytes of data */
+static int diolan_usb_cmd_data2(struct i2c_diolan_u2c *dev, u8 command, u8 d1,
+				u8 d2, bool flush)
+{
+	dev->obuffer[dev->olen++] = command;
+	dev->obuffer[dev->olen++] = d1;
+	dev->obuffer[dev->olen++] = d2;
+	dev->ocount++;
+	return diolan_write_cmd(dev, flush);
+}
+
+/*
+ * Flush input queue.
+ * If we don't do this at startup and the controller has queued up
+ * messages which were not retrieved, it will stop responding
+ * at some point.
+ */
+static void diolan_flush_input(struct i2c_diolan_u2c *dev)
+{
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		int actual = 0;
+		int ret;
+
+		ret = usb_bulk_msg(dev->usb_dev,
+				   usb_rcvbulkpipe(dev->usb_dev, DIOLAN_IN_EP),
+				   dev->ibuffer, sizeof(dev->ibuffer), &actual,
+				   DIOLAN_USB_TIMEOUT);
+		if (ret < 0 || actual == 0)
+			break;
+	}
+	if (i == 10)
+		dev_err(&dev->interface->dev, "Failed to flush input buffer\n");
+}
+
+static int diolan_i2c_start(struct i2c_diolan_u2c *dev)
+{
+	return diolan_usb_cmd(dev, CMD_I2C_START, false);
+}
+
+static int diolan_i2c_repeated_start(struct i2c_diolan_u2c *dev)
+{
+	return diolan_usb_cmd(dev, CMD_I2C_REPEATED_START, false);
+}
+
+static int diolan_i2c_stop(struct i2c_diolan_u2c *dev)
+{
+	return diolan_usb_cmd(dev, CMD_I2C_STOP, true);
+}
+
+static int diolan_i2c_get_byte_ack(struct i2c_diolan_u2c *dev, bool ack,
+				   u8 *byte)
+{
+	int ret;
+
+	ret = diolan_usb_cmd_data(dev, CMD_I2C_GET_BYTE_ACK, ack, true);
+	if (ret > 0)
+		*byte = dev->ibuffer[0];
+	else if (ret == 0)
+		ret = -EIO;
+
+	return ret;
+}
+
+static int diolan_i2c_put_byte_ack(struct i2c_diolan_u2c *dev, u8 byte)
+{
+	return diolan_usb_cmd_data(dev, CMD_I2C_PUT_BYTE_ACK, byte, false);
+}
+
+static int diolan_set_speed(struct i2c_diolan_u2c *dev, u8 speed)
+{
+	return diolan_usb_cmd_data(dev, CMD_I2C_SET_SPEED, speed, true);
+}
+
+/* Enable or disable clock synchronization (stretching) */
+static int diolan_set_clock_synch(struct i2c_diolan_u2c *dev, bool enable)
+{
+	return diolan_usb_cmd_data(dev, CMD_I2C_SET_CLK_SYNC, enable, true);
+}
+
+/* Set clock synchronization timeout in ms */
+static int diolan_set_clock_synch_timeout(struct i2c_diolan_u2c *dev, int ms)
+{
+	int to_val = ms * 10;
+
+	return diolan_usb_cmd_data2(dev, CMD_I2C_SET_CLK_SYNC_TO,
+				    to_val & 0xff, (to_val >> 8) & 0xff, true);
+}
+
+static void diolan_fw_version(struct i2c_diolan_u2c *dev)
+{
+	int ret;
+
+	ret = diolan_usb_cmd(dev, CMD_GET_FW_VERSION, true);
+	if (ret >= 2)
+		dev_info(&dev->interface->dev,
+			 "Diolan U2C firmware version %u.%u\n",
+			 (unsigned int)dev->ibuffer[0],
+			 (unsigned int)dev->ibuffer[1]);
+}
+
+static void diolan_get_serial(struct i2c_diolan_u2c *dev)
+{
+	int ret;
+	u32 serial;
+
+	ret = diolan_usb_cmd(dev, CMD_GET_SERIAL, true);
+	if (ret >= 4) {
+		serial = le32_to_cpu(*(u32 *)dev->ibuffer);
+		dev_info(&dev->interface->dev,
+			 "Diolan U2C serial number %u\n", serial);
+	}
+}
+
+static int diolan_init(struct i2c_diolan_u2c *dev)
+{
+	int speed, ret;
+
+	if (frequency >= 200000) {
+		speed = U2C_I2C_SPEED_FAST;
+		frequency = U2C_I2C_FREQ_FAST;
+	} else if (frequency >= 100000 || frequency == 0) {
+		speed = U2C_I2C_SPEED_STD;
+		frequency = U2C_I2C_FREQ_STD;
+	} else {
+		speed = U2C_I2C_SPEED(frequency);
+		if (speed > U2C_I2C_SPEED_2KHZ)
+			speed = U2C_I2C_SPEED_2KHZ;
+		frequency = U2C_I2C_FREQ(speed);
+	}
+
+	dev_info(&dev->interface->dev,
+		 "Diolan U2C at USB bus %03d address %03d speed %d Hz\n",
+		 dev->usb_dev->bus->busnum, dev->usb_dev->devnum, frequency);
+
+	diolan_flush_input(dev);
+	diolan_fw_version(dev);
+	diolan_get_serial(dev);
+
+	/* Set I2C speed */
+	ret = diolan_set_speed(dev, speed);
+	if (ret < 0)
+		return ret;
+
+	/* Configure I2C clock synchronization */
+	ret = diolan_set_clock_synch(dev, speed != U2C_I2C_SPEED_FAST);
+	if (ret < 0)
+		return ret;
+
+	if (speed != U2C_I2C_SPEED_FAST)
+		ret = diolan_set_clock_synch_timeout(dev, DIOLAN_SYNC_TIMEOUT);
+
+	return ret;
+}
+
+/* i2c layer */
+
+static int diolan_usb_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
+			   int num)
+{
+	struct i2c_diolan_u2c *dev = i2c_get_adapdata(adapter);
+	struct i2c_msg *pmsg;
+	int i, j;
+	int ret, sret;
+
+	ret = diolan_i2c_start(dev);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < num; i++) {
+		pmsg = &msgs[i];
+		if (i) {
+			ret = diolan_i2c_repeated_start(dev);
+			if (ret < 0)
+				goto abort;
+		}
+		if (pmsg->flags & I2C_M_RD) {
+			ret =
+			    diolan_i2c_put_byte_ack(dev, (pmsg->addr << 1) | 1);
+			if (ret < 0)
+				goto abort;
+			for (j = 0; j < pmsg->len; j++) {
+				u8 byte;
+				bool ack = j < pmsg->len - 1;
+
+				/*
+				 * Don't send NACK if this is the first byte
+				 * of a SMBUS_BLOCK message.
+				 */
+				if (j == 0 && (pmsg->flags & I2C_M_RECV_LEN))
+					ack = true;
+
+				ret = diolan_i2c_get_byte_ack(dev, ack, &byte);
+				if (ret < 0)
+					goto abort;
+				/*
+				 * Adjust count if first received byte is length
+				 */
+				if (j == 0 && (pmsg->flags & I2C_M_RECV_LEN)) {
+					if (byte == 0
+					    || byte > I2C_SMBUS_BLOCK_MAX) {
+						ret = -EPROTO;
+						goto abort;
+					}
+					pmsg->len += byte;
+				}
+				pmsg->buf[j] = byte;
+			}
+		} else {
+			ret = diolan_i2c_put_byte_ack(dev, pmsg->addr << 1);
+			if (ret < 0)
+				goto abort;
+			for (j = 0; j < pmsg->len; j++) {
+				ret = diolan_i2c_put_byte_ack(dev,
+							      pmsg->buf[j]);
+				if (ret < 0)
+					goto abort;
+			}
+		}
+	}
+abort:
+	sret = diolan_i2c_stop(dev);
+	if (sret < 0 && ret >= 0)
+		ret = sret;
+	return ret;
+}
+
+/*
+ * Return list of supported functionality.
+ */
+static u32 diolan_usb_func(struct i2c_adapter *a)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
+	       I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_FUNC_SMBUS_BLOCK_PROC_CALL;
+}
+
+static const struct i2c_algorithm diolan_usb_algorithm = {
+	.master_xfer = diolan_usb_xfer,
+	.functionality = diolan_usb_func,
+};
+
+/* device layer */
+
+static const struct usb_device_id diolan_u2c_table[] = {
+	{ USB_DEVICE(USB_VENDOR_ID_DIOLAN, USB_DEVICE_ID_DIOLAN_U2C) },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(usb, diolan_u2c_table);
+
+static void diolan_u2c_free(struct i2c_diolan_u2c *dev)
+{
+	usb_put_dev(dev->usb_dev);
+	kfree(dev);
+}
+
+static int diolan_u2c_probe(struct usb_interface *interface,
+			    const struct usb_device_id *id)
+{
+	struct i2c_diolan_u2c *dev;
+	int ret;
+
+	/* allocate memory for our device state and initialize it */
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&interface->dev, "no memory for device state\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	dev->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+	dev->interface = interface;
+
+	/* save our data pointer in this interface device */
+	usb_set_intfdata(interface, dev);
+
+	/* setup i2c adapter description */
+	dev->adapter.owner = THIS_MODULE;
+	dev->adapter.class = I2C_CLASS_HWMON;
+	dev->adapter.algo = &diolan_usb_algorithm;
+	i2c_set_adapdata(&dev->adapter, dev);
+	snprintf(dev->adapter.name, sizeof(dev->adapter.name),
+		 DRIVER_NAME " at bus %03d device %03d",
+		 dev->usb_dev->bus->busnum, dev->usb_dev->devnum);
+
+	dev->adapter.dev.parent = &dev->interface->dev;
+
+	/* initialize diolan i2c interface */
+	ret = diolan_init(dev);
+	if (ret < 0) {
+		dev_err(&interface->dev, "failed to initialize adapter\n");
+		goto error_free;
+	}
+
+	/* and finally attach to i2c layer */
+	ret = i2c_add_adapter(&dev->adapter);
+	if (ret < 0) {
+		dev_err(&interface->dev, "failed to add I2C adapter\n");
+		goto error_free;
+	}
+
+	dev_dbg(&interface->dev, "connected " DRIVER_NAME "\n");
+
+	return 0;
+
+error_free:
+	usb_set_intfdata(interface, NULL);
+	diolan_u2c_free(dev);
+error:
+	return ret;
+}
+
+static void diolan_u2c_disconnect(struct usb_interface *interface)
+{
+	struct i2c_diolan_u2c *dev = usb_get_intfdata(interface);
+
+	i2c_del_adapter(&dev->adapter);
+	usb_set_intfdata(interface, NULL);
+	diolan_u2c_free(dev);
+
+	dev_dbg(&interface->dev, "disconnected\n");
+}
+
+static struct usb_driver diolan_u2c_driver = {
+	.name = DRIVER_NAME,
+	.probe = diolan_u2c_probe,
+	.disconnect = diolan_u2c_disconnect,
+	.id_table = diolan_u2c_table,
+};
+
+static int __init diolan_u2c_init(void)
+{
+	/* register this driver with the USB subsystem */
+	return usb_register(&diolan_u2c_driver);
+}
+
+static void __exit diolan_u2c_exit(void)
+{
+	/* deregister this driver with the USB subsystem */
+	usb_deregister(&diolan_u2c_driver);
+}
+
+module_init(diolan_u2c_init);
+module_exit(diolan_u2c_exit);
+
+MODULE_AUTHOR("Guenter Roeck <guenter.roeck@ericsson.com>");
+MODULE_DESCRIPTION(DRIVER_NAME " driver");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 087809fce28f50098d9c3ef1a6865c722f23afd2 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 21 Jan 2011 14:11:52 +0000
Subject: dmaengine/dw_dmac: don't scan descriptors if no xfers in progress

Some hardware (picoChip picoXCell in particular) sometimes has
the block transfer complete bit being set for a channel after the
whole transfer has completed. If we don't have any transfers in the
active list then don't bother to scan the descriptors. This often
happens in normal operation and doesn't require the channel to be
reset.

v2: cleanup whitespace

Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index a3991ab..db22754 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -291,6 +291,9 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		return;
 	}
 
+	if (list_empty(&dwc->active_list))
+		return;
+
 	dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp);
 
 	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
-- 
cgit v0.10.2


From f301c062dcdd113bc977ae1ebc8c12232f8531a9 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 21 Jan 2011 14:11:53 +0000
Subject: dmaengine/dw_dmac: allow src/dst masters to be configured at runtime

Some platforms have flexible mastering capabilities and this needs
to be selected at runtime. If the platform has specified private
data in the form of the dw_dma_slave then fetch the source and
destination masters from here. If this isn't present, default to
the previous of 0 and 1.

v2: cleanup whitespace

Acked-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index db22754..a4cf261 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -32,15 +32,18 @@
  * which does not support descriptor writeback.
  */
 
-/* NOTE:  DMS+SMS is system-specific. We should get this information
- * from the platform code somehow.
- */
-#define DWC_DEFAULT_CTLLO	(DWC_CTLL_DST_MSIZE(0)		\
-				| DWC_CTLL_SRC_MSIZE(0)		\
-				| DWC_CTLL_DMS(0)		\
-				| DWC_CTLL_SMS(1)		\
-				| DWC_CTLL_LLP_D_EN		\
-				| DWC_CTLL_LLP_S_EN)
+#define DWC_DEFAULT_CTLLO(private) ({				\
+		struct dw_dma_slave *__slave = (private);	\
+		int dms = __slave ? __slave->dst_master : 0;	\
+		int sms = __slave ? __slave->src_master : 1;	\
+								\
+		(DWC_CTLL_DST_MSIZE(0)				\
+		 | DWC_CTLL_SRC_MSIZE(0)			\
+		 | DWC_CTLL_LLP_D_EN				\
+		 | DWC_CTLL_LLP_S_EN				\
+		 | DWC_CTLL_DMS(dms)				\
+		 | DWC_CTLL_SMS(sms));				\
+	})
 
 /*
  * This is configuration-dependent and usually a funny size like 4095.
@@ -591,7 +594,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	else
 		src_width = dst_width = 0;
 
-	ctllo = DWC_DEFAULT_CTLLO
+	ctllo = DWC_DEFAULT_CTLLO(chan->private)
 			| DWC_CTLL_DST_WIDTH(dst_width)
 			| DWC_CTLL_SRC_WIDTH(src_width)
 			| DWC_CTLL_DST_INC
@@ -672,7 +675,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	switch (direction) {
 	case DMA_TO_DEVICE:
-		ctllo = (DWC_DEFAULT_CTLLO
+		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
 				| DWC_CTLL_SRC_INC
@@ -717,7 +720,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		}
 		break;
 	case DMA_FROM_DEVICE:
-		ctllo = (DWC_DEFAULT_CTLLO
+		ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
 				| DWC_CTLL_SRC_FIX
@@ -1129,7 +1132,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		case DMA_TO_DEVICE:
 			desc->lli.dar = dws->tx_reg;
 			desc->lli.sar = buf_addr + (period_len * i);
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_FIX
@@ -1140,7 +1143,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 		case DMA_FROM_DEVICE:
 			desc->lli.dar = buf_addr + (period_len * i);
 			desc->lli.sar = dws->rx_reg;
-			desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+			desc->lli.ctllo = (DWC_DEFAULT_CTLLO(chan->private)
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_DST_INC
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index c8aad71..8014eb8 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -52,6 +52,8 @@ struct dw_dma_slave {
 	enum dw_dma_slave_width	reg_width;
 	u32			cfg_hi;
 	u32			cfg_lo;
+	int			src_master;
+	int			dst_master;
 };
 
 /* Platform-configurable bits in CFG_HI */
-- 
cgit v0.10.2


From 95ea759e9e116dade3e7386be2a3db76c90f4675 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 21 Jan 2011 14:11:54 +0000
Subject: dmaengine/dw_dmac: provide a mechanism to indicate private devices

Some platforms (e.g. Picochip PC3XX) have multiple DMA controllers
where some may be used for slave transfers and others for general
purpose memcpy type transfers. Add a .is_private boolean to the
platform data structure so that controllers can be marked as private
so that the DMA_PRIVATE capability will be set for that controller.

Signed-off-by: Jamie Iles <jamie.iles@picochip.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index a4cf261..08dab3b 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1341,6 +1341,8 @@ static int __init dw_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+	if (pdata->is_private)
+		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
 	dw->dma.dev = &pdev->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
 	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 8014eb8..deec66b 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -16,9 +16,12 @@
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
+ * @is_private: The device channels should be marked as private and not for
+ *	by the general purpose DMA channel allocator.
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
+	bool		is_private;
 };
 
 /**
-- 
cgit v0.10.2


From 4aa5f366431fef0afca0df348ca9782c63ac9911 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Fri, 21 Jan 2011 14:11:55 +0000
Subject: avr32: at32ap700x: specify DMA src and dst masters

Now that the dw_dmac DMA driver supports configurable source and
destination masters we need to specify which ones to use. This was
previously hardcoded to 0 and 1 respectively in the driver.

Acked-by: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index e67c999..2747cde 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -2048,6 +2048,8 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 		rx_dws->reg_width = DW_DMA_SLAVE_WIDTH_16BIT;
 		rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3);
 		rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		rx_dws->src_master = 0;
+		rx_dws->dst_master = 1;
 	}
 
 	/* Check if DMA slave interface for playback should be configured. */
@@ -2056,6 +2058,8 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 		tx_dws->reg_width = DW_DMA_SLAVE_WIDTH_16BIT;
 		tx_dws->cfg_hi = DWC_CFGH_DST_PER(4);
 		tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		rx_dws->src_master = 0;
+		rx_dws->dst_master = 1;
 	}
 
 	if (platform_device_add_data(pdev, data,
@@ -2128,6 +2132,8 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data)
 	dws->reg_width = DW_DMA_SLAVE_WIDTH_32BIT;
 	dws->cfg_hi = DWC_CFGH_DST_PER(2);
 	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+	dws->src_master = 0;
+	dws->dst_master = 1;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct atmel_abdac_pdata)))
-- 
cgit v0.10.2


From cb9ab2d8e4661c811d5e9a8e687b6f736690c90e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:04 +0100
Subject: dma40: make init function static

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 6e1d46a..52013ed 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3060,7 +3060,7 @@ static struct platform_driver d40_driver = {
 	},
 };
 
-int __init stedma40_init(void)
+static int __init stedma40_init(void)
 {
 	return platform_driver_probe(&d40_driver, d40_probe);
 }
-- 
cgit v0.10.2


From 262d2915d4f11e5e78e432ab68f0ee034ef3f75f Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:05 +0100
Subject: dma40: ensure event lines get enabled

The controller sometimes fails to register the enable of the event line when
both src and dst event lines are used on the same logical channel.  Implement
the recommended software workaround, which is to retry the write until it
works.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 52013ed..42c88fc 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -303,6 +303,11 @@ struct d40_reg_val {
 	unsigned int val;
 };
 
+static struct device *chan2dev(struct d40_chan *d40c)
+{
+	return &d40c->chan.dev->device;
+}
+
 static int d40_pool_lli_alloc(struct d40_desc *d40d,
 			      int lli_len, bool is_log)
 {
@@ -701,17 +706,46 @@ static void d40_term_all(struct d40_chan *d40c)
 	d40c->busy = false;
 }
 
+static void __d40_config_set_event(struct d40_chan *d40c, bool enable,
+				   u32 event, int reg)
+{
+	void __iomem *addr = d40c->base->virtbase + D40_DREG_PCBASE
+			     + d40c->phy_chan->num * D40_DREG_PCDELTA + reg;
+	int tries;
+
+	if (!enable) {
+		writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+		return;
+	}
+
+	/*
+	 * The hardware sometimes doesn't register the enable when src and dst
+	 * event lines are active on the same logical channel.  Retry to ensure
+	 * it does.  Usually only one retry is sufficient.
+	 */
+	tries = 100;
+	while (--tries) {
+		writel((D40_ACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+
+		if (readl(addr) & D40_EVENTLINE_MASK(event))
+			break;
+	}
+
+	if (tries != 99)
+		dev_dbg(chan2dev(d40c),
+			"[%s] workaround enable S%cLNK (%d tries)\n",
+			__func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D',
+			100 - tries);
+
+	WARN_ON(!tries);
+}
+
 static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 {
-	u32 val;
 	unsigned long flags;
 
-	/* Notice, that disable requires the physical channel to be stopped */
-	if (do_enable)
-		val = D40_ACTIVATE_EVENTLINE;
-	else
-		val = D40_DEACTIVATE_EVENTLINE;
-
 	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
 
 	/* Enable event line connected to device (or memcpy) */
@@ -719,20 +753,15 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) {
 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
 
-		writel((val << D40_EVENTLINE_POS(event)) |
-		       ~D40_EVENTLINE_MASK(event),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSLNK);
+		__d40_config_set_event(d40c, do_enable, event,
+				       D40_CHAN_REG_SSLNK);
 	}
+
 	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM) {
 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
 
-		writel((val << D40_EVENTLINE_POS(event)) |
-		       ~D40_EVENTLINE_MASK(event),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDLNK);
+		__d40_config_set_event(d40c, do_enable, event,
+				       D40_CHAN_REG_SDLNK);
 	}
 
 	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
-- 
cgit v0.10.2


From 7d83a854a1a44a8f6a699503441403a36c42f66c Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:06 +0100
Subject: dma40: remove "hardware link with previous jobs" code

This link in hardware with previous jobs code is:

  - unused, no clients using or requiring this feature
  - incomplete, being implemented only for physical channels
  - broken, only working to perform one link

Remove it.  This also allows us to get rid of the channel pause in the
submit_tx() routine.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 42c88fc..ed2a3eb 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -94,7 +94,6 @@ struct d40_lli_pool {
  * during a transfer.
  * @node: List entry.
  * @is_in_client_list: true if the client owns this descriptor.
- * @is_hw_linked: true if this job will automatically be continued for
  * the previous one.
  *
  * This descriptor is used for both logical and physical transfers.
@@ -114,7 +113,6 @@ struct d40_desc {
 	struct list_head		 node;
 
 	bool				 is_in_client_list;
-	bool				 is_hw_linked;
 };
 
 /**
@@ -548,18 +546,6 @@ static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
 	return d;
 }
 
-static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
-{
-	struct d40_desc *d;
-
-	if (list_empty(&d40c->queue))
-		return NULL;
-	list_for_each_entry(d, &d40c->queue, node)
-		if (list_is_last(&d->node, &d40c->queue))
-			break;
-	return d;
-}
-
 static int d40_psize_2_burst_size(bool is_log, int psize)
 {
 	if (is_log) {
@@ -940,77 +926,6 @@ no_suspend:
 	return res;
 }
 
-static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d)
-{
-	/* TODO: Write */
-}
-
-static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d)
-{
-	struct d40_desc *d40d_prev = NULL;
-	int i;
-	u32 val;
-
-	if (!list_empty(&d40c->queue))
-		d40d_prev = d40_last_queued(d40c);
-	else if (!list_empty(&d40c->active))
-		d40d_prev = d40_first_active_get(d40c);
-
-	if (!d40d_prev)
-		return;
-
-	/* Here we try to join this job with previous jobs */
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSLNK);
-
-	/* Figure out which link we're currently transmitting */
-	for (i = 0; i < d40d_prev->lli_len; i++)
-		if (val == d40d_prev->lli_phy.src[i].reg_lnk)
-			break;
-
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS;
-
-	if (i == (d40d_prev->lli_len - 1) && val > 0) {
-		/* Change the current one */
-		writel(virt_to_phys(d40d->lli_phy.src),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSLNK);
-		writel(virt_to_phys(d40d->lli_phy.dst),
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDLNK);
-
-		d40d->is_hw_linked = true;
-
-	} else if (i < d40d_prev->lli_len) {
-		(void) dma_unmap_single(d40c->base->dev,
-					virt_to_phys(d40d_prev->lli_phy.src),
-					d40d_prev->lli_pool.size,
-					DMA_TO_DEVICE);
-
-		/* Keep the settings */
-		val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk &
-			~D40_SREG_LNK_PHYS_LNK_MASK;
-		d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk =
-			val | virt_to_phys(d40d->lli_phy.src);
-
-		val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk &
-			~D40_SREG_LNK_PHYS_LNK_MASK;
-		d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk =
-			val | virt_to_phys(d40d->lli_phy.dst);
-
-		(void) dma_map_single(d40c->base->dev,
-				      d40d_prev->lli_phy.src,
-				      d40d_prev->lli_pool.size,
-				      DMA_TO_DEVICE);
-		d40d->is_hw_linked = true;
-	}
-}
-
 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct d40_chan *d40c = container_of(tx->chan,
@@ -1019,8 +934,6 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
 	unsigned long flags;
 
-	(void) d40_pause(&d40c->chan);
-
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	d40c->chan.cookie++;
@@ -1030,17 +943,10 @@ static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 
 	d40d->txd.cookie = d40c->chan.cookie;
 
-	if (d40c->log_num == D40_PHY_CHAN)
-		d40_tx_submit_phy(d40c, d40d);
-	else
-		d40_tx_submit_log(d40c, d40d);
-
 	d40_desc_queue(d40c, d40d);
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
-	(void) d40_resume(&d40c->chan);
-
 	return tx->cookie;
 }
 
@@ -1080,21 +986,14 @@ static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
 		/* Add to active queue */
 		d40_desc_submit(d40c, d40d);
 
-		/*
-		 * If this job is already linked in hw,
-		 * do not submit it.
-		 */
-
-		if (!d40d->is_hw_linked) {
-			/* Initiate DMA job */
-			d40_desc_load(d40c, d40d);
+		/* Initiate DMA job */
+		d40_desc_load(d40c, d40d);
 
-			/* Start dma job */
-			err = d40_start(d40c);
+		/* Start dma job */
+		err = d40_start(d40c);
 
-			if (err)
-				return NULL;
-		}
+		if (err)
+			return NULL;
 	}
 
 	return d40d;
-- 
cgit v0.10.2


From 8ca84687b91322b9eafeaf4da43a21684cd0316e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:07 +0100
Subject: dma40: use helper for channel registers base

The register offset computation for accessing channel registers is copy/pasted
in several places.  Create a helper function to do it.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index ed2a3eb..3d4cea3 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -306,6 +306,12 @@ static struct device *chan2dev(struct d40_chan *d40c)
 	return &d40c->chan.dev->device;
 }
 
+static void __iomem *chan_base(struct d40_chan *chan)
+{
+	return chan->base->virtbase + D40_DREG_PCBASE +
+	       chan->phy_chan->num * D40_DREG_PCDELTA;
+}
+
 static int d40_pool_lli_alloc(struct d40_desc *d40d,
 			      int lli_len, bool is_log)
 {
@@ -695,8 +701,7 @@ static void d40_term_all(struct d40_chan *d40c)
 static void __d40_config_set_event(struct d40_chan *d40c, bool enable,
 				   u32 event, int reg)
 {
-	void __iomem *addr = d40c->base->virtbase + D40_DREG_PCBASE
-			     + d40c->phy_chan->num * D40_DREG_PCDELTA + reg;
+	void __iomem *addr = chan_base(d40c) + reg;
 	int tries;
 
 	if (!enable) {
@@ -755,15 +760,12 @@ static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
 
 static u32 d40_chan_has_events(struct d40_chan *d40c)
 {
+	void __iomem *chanbase = chan_base(d40c);
 	u32 val;
 
-	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		    d40c->phy_chan->num * D40_DREG_PCDELTA +
-		    D40_CHAN_REG_SSLNK);
+	val = readl(chanbase + D40_CHAN_REG_SSLNK);
+	val |= readl(chanbase + D40_CHAN_REG_SDLNK);
 
-	val |= readl(d40c->base->virtbase + D40_DREG_PCBASE +
-		     d40c->phy_chan->num * D40_DREG_PCDELTA +
-		     D40_CHAN_REG_SDLNK);
 	return val;
 }
 
@@ -810,29 +812,17 @@ static void d40_config_write(struct d40_chan *d40c)
 	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
 
 	if (d40c->log_num != D40_PHY_CHAN) {
+		int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
+			   & D40_SREG_ELEM_LOG_LIDX_MASK;
+		void __iomem *chanbase = chan_base(d40c);
+
 		/* Set default config for CFG reg */
-		writel(d40c->src_def_cfg,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSCFG);
-		writel(d40c->dst_def_cfg,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDCFG);
+		writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG);
+		writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG);
 
 		/* Set LIDX for lcla */
-		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-		       D40_SREG_ELEM_LOG_LIDX_MASK,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SDELT);
-
-		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
-		       D40_SREG_ELEM_LOG_LIDX_MASK,
-		       d40c->base->virtbase + D40_DREG_PCBASE +
-		       d40c->phy_chan->num * D40_DREG_PCDELTA +
-		       D40_CHAN_REG_SSELT);
-
+		writel(lidx, chanbase + D40_CHAN_REG_SSELT);
+		writel(lidx, chanbase + D40_CHAN_REG_SDELT);
 	}
 }
 
@@ -843,12 +833,12 @@ static u32 d40_residue(struct d40_chan *d40c)
 	if (d40c->log_num != D40_PHY_CHAN)
 		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
 			>> D40_MEM_LCSP2_ECNT_POS;
-	else
-		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				 d40c->phy_chan->num * D40_DREG_PCDELTA +
-				 D40_CHAN_REG_SDELT) &
-			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
-			D40_SREG_ELEM_PHY_ECNT_POS;
+	else {
+		u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT);
+		num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK)
+			  >> D40_SREG_ELEM_PHY_ECNT_POS;
+	}
+
 	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
 }
 
@@ -859,10 +849,9 @@ static bool d40_tx_is_linked(struct d40_chan *d40c)
 	if (d40c->log_num != D40_PHY_CHAN)
 		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
 	else
-		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-				d40c->phy_chan->num * D40_DREG_PCDELTA +
-				D40_CHAN_REG_SDLNK) &
-			D40_SREG_LNK_PHYS_LNK_MASK;
+		is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
+			  & D40_SREG_LNK_PHYS_LNK_MASK;
+
 	return is_link;
 }
 
@@ -1550,6 +1539,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 
 static bool d40_is_paused(struct d40_chan *d40c)
 {
+	void __iomem *chanbase = chan_base(d40c);
 	bool is_paused = false;
 	unsigned long flags;
 	void __iomem *active_reg;
@@ -1576,14 +1566,10 @@ static bool d40_is_paused(struct d40_chan *d40c)
 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
-		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			       d40c->phy_chan->num * D40_DREG_PCDELTA +
-			       D40_CHAN_REG_SDLNK);
+		status = readl(chanbase + D40_CHAN_REG_SDLNK);
 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
-		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
-			       d40c->phy_chan->num * D40_DREG_PCDELTA +
-			       D40_CHAN_REG_SSLNK);
+		status = readl(chanbase + D40_CHAN_REG_SSLNK);
 	} else {
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Unknown direction\n", __func__);
-- 
cgit v0.10.2


From 724a8577d80c6f8e9ac680be1cf419eddbd6f2a1 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:08 +0100
Subject: dma40: use helpers for channel type check

The somewhat confusing check

	d40c->log_num == D40_PHY_CHAN

and its variants are used in several places to check if a channel is logical or
physical.  Use appropriately named helpers to do this to make the code more
readable.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 3d4cea3..0073988 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -306,6 +306,16 @@ static struct device *chan2dev(struct d40_chan *d40c)
 	return &d40c->chan.dev->device;
 }
 
+static bool chan_is_physical(struct d40_chan *chan)
+{
+	return chan->log_num == D40_PHY_CHAN;
+}
+
+static bool chan_is_logical(struct d40_chan *chan)
+{
+	return !chan_is_physical(chan);
+}
+
 static void __iomem *chan_base(struct d40_chan *chan)
 {
 	return chan->base->virtbase + D40_DREG_PCBASE +
@@ -400,7 +410,7 @@ static int d40_lcla_free_all(struct d40_chan *d40c,
 	int i;
 	int ret = -EINVAL;
 
-	if (d40c->log_num == D40_PHY_CHAN)
+	if (chan_is_physical(d40c))
 		return 0;
 
 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
@@ -472,7 +482,7 @@ static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 {
 	int curr_lcla = -EINVAL, next_lcla;
 
-	if (d40c->log_num == D40_PHY_CHAN) {
+	if (chan_is_physical(d40c)) {
 		d40_phy_lli_write(d40c->base->virtbase,
 				  d40c->phy_chan->num,
 				  d40d->lli_phy.dst,
@@ -788,7 +798,7 @@ static u32 d40_get_prmo(struct d40_chan *d40c)
 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
 	};
 
-	if (d40c->log_num == D40_PHY_CHAN)
+	if (chan_is_physical(d40c))
 		return phy_map[d40c->dma_cfg.mode_opt];
 	else
 		return log_map[d40c->dma_cfg.mode_opt];
@@ -802,7 +812,7 @@ static void d40_config_write(struct d40_chan *d40c)
 	/* Odd addresses are even addresses + 4 */
 	addr_base = (d40c->phy_chan->num % 2) * 4;
 	/* Setup channel mode to logical or physical */
-	var = ((u32)(d40c->log_num != D40_PHY_CHAN) + 1) <<
+	var = ((u32)(chan_is_logical(d40c)) + 1) <<
 		D40_CHAN_POS(d40c->phy_chan->num);
 	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
 
@@ -811,7 +821,7 @@ static void d40_config_write(struct d40_chan *d40c)
 
 	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
 			   & D40_SREG_ELEM_LOG_LIDX_MASK;
 		void __iomem *chanbase = chan_base(d40c);
@@ -830,7 +840,7 @@ static u32 d40_residue(struct d40_chan *d40c)
 {
 	u32 num_elt;
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
 			>> D40_MEM_LCSP2_ECNT_POS;
 	else {
@@ -846,7 +856,7 @@ static bool d40_tx_is_linked(struct d40_chan *d40c)
 {
 	bool is_link;
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
 	else
 		is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
@@ -869,7 +879,7 @@ static int d40_pause(struct dma_chan *chan)
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res == 0) {
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			d40_config_set_event(d40c, false);
 			/* Resume the other logical channels if any */
 			if (d40_chan_has_events(d40c))
@@ -895,7 +905,7 @@ static int d40_resume(struct dma_chan *chan)
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	if (d40c->base->rev == 0)
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			res = d40_channel_execute_command(d40c,
 							  D40_DMA_SUSPEND_REQ);
 			goto no_suspend;
@@ -904,7 +914,7 @@ static int d40_resume(struct dma_chan *chan)
 	/* If bytes left to transfer or linked tx resume job */
 	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
 
-		if (d40c->log_num != D40_PHY_CHAN)
+		if (chan_is_logical(d40c))
 			d40_config_set_event(d40c, true);
 
 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
@@ -944,7 +954,7 @@ static int d40_start(struct d40_chan *d40c)
 	if (d40c->base->rev == 0) {
 		int err;
 
-		if (d40c->log_num != D40_PHY_CHAN) {
+		if (chan_is_logical(d40c)) {
 			err = d40_channel_execute_command(d40c,
 							  D40_DMA_SUSPEND_REQ);
 			if (err)
@@ -952,7 +962,7 @@ static int d40_start(struct d40_chan *d40c)
 		}
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		d40_config_set_event(d40c, true);
 
 	return d40_channel_execute_command(d40c, D40_DMA_RUN);
@@ -1495,7 +1505,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 		return res;
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		/* Release logical channel, deactivate the event line */
 
 		d40_config_set_event(d40c, false);
@@ -1548,7 +1558,7 @@ static bool d40_is_paused(struct d40_chan *d40c)
 
 	spin_lock_irqsave(&d40c->lock, flags);
 
-	if (d40c->log_num == D40_PHY_CHAN) {
+	if (chan_is_physical(d40c)) {
 		if (d40c->phy_chan->num % 2 == 0)
 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
 		else
@@ -1638,7 +1648,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	d40d->lli_current = 0;
 	d40d->txd.flags = dma_flags;
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
@@ -1765,9 +1775,9 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 
 	/* Fill in basic CFG register values */
 	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
-		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
+		    &d40c->dst_def_cfg, chan_is_logical(d40c));
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		d40_log_cfg(&d40c->dma_cfg,
 			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
 
@@ -1857,7 +1867,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 
 	d40d->txd.tx_submit = d40_tx_submit;
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
 			dev_err(&d40c->chan.dev->device,
@@ -2093,7 +2103,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	if (d40d == NULL)
 		goto err;
 
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
 					    direction, dma_flags);
 	else
@@ -2103,7 +2113,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 		dev_err(&d40c->chan.dev->device,
 			"[%s] Failed to prepare %s slave sg job: %d\n",
 			__func__,
-			d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err);
+			chan_is_logical(d40c) ? "log" : "phy", err);
 		goto err;
 	}
 
@@ -2253,7 +2263,7 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 		return;
 	}
 
-	if (d40c->log_num != D40_PHY_CHAN) {
+	if (chan_is_logical(d40c)) {
 		if (config_maxburst >= 16)
 			psize = STEDMA40_PSIZE_LOG_16;
 		else if (config_maxburst >= 8)
@@ -2286,7 +2296,7 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
 
 	/* Fill in register values */
-	if (d40c->log_num != D40_PHY_CHAN)
+	if (chan_is_logical(d40c))
 		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
 	else
 		d40_phy_cfg(cfg, &d40c->src_def_cfg,
-- 
cgit v0.10.2


From 6db5a8ba11bf23d1618e392518f1684cbf2fe031 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:09 +0100
Subject: dma40: use helpers for error functions

Almost every use of dev_err in this driver prints the function name.  Abstract
out wrappers to help with this and reduce code duplication.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 0073988..ab3ca6af 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -322,6 +322,12 @@ static void __iomem *chan_base(struct d40_chan *chan)
 	       chan->phy_chan->num * D40_DREG_PCDELTA;
 }
 
+#define d40_err(dev, format, arg...)		\
+	dev_err(dev, "[%s] " format, __func__, ## arg)
+
+#define chan_err(d40c, format, arg...)		\
+	d40_err(chan2dev(d40c), format, ## arg)
+
 static int d40_pool_lli_alloc(struct d40_desc *d40d,
 			      int lli_len, bool is_log)
 {
@@ -673,9 +679,9 @@ static int d40_channel_execute_command(struct d40_chan *d40c,
 		}
 
 		if (i == D40_SUSPEND_MAX_IT) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s]: unable to suspend the chl %d (log: %d) status %x\n",
-				__func__, d40c->phy_chan->num, d40c->log_num,
+			chan_err(d40c,
+				"unable to suspend the chl %d (log: %d) status %x\n",
+				d40c->phy_chan->num, d40c->log_num,
 				status);
 			dump_stack();
 			ret = -EBUSY;
@@ -1143,9 +1149,8 @@ static irqreturn_t d40_handle_interrupt(int irq, void *data)
 		if (!il[row].is_error)
 			dma_tc_handle(d40c);
 		else
-			dev_err(base->dev,
-				"[%s] IRQ chan: %ld offset %d idx %d\n",
-				__func__, chan, il[row].offset, idx);
+			d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n",
+				chan, il[row].offset, idx);
 
 		spin_unlock(&d40c->lock);
 	}
@@ -1164,8 +1169,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
 
 	if (!conf->dir) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n",
-			__func__);
+		chan_err(d40c, "Invalid direction.\n");
 		res = -EINVAL;
 	}
 
@@ -1173,46 +1177,40 @@ static int d40_validate_conf(struct d40_chan *d40c,
 	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
 	    d40c->runtime_addr == 0) {
 
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid TX channel address (%d)\n",
-			__func__, conf->dst_dev_type);
+		chan_err(d40c, "Invalid TX channel address (%d)\n",
+			 conf->dst_dev_type);
 		res = -EINVAL;
 	}
 
 	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
 	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
 	    d40c->runtime_addr == 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid RX channel address (%d)\n",
-			__func__, conf->src_dev_type);
+		chan_err(d40c, "Invalid RX channel address (%d)\n",
+			conf->src_dev_type);
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
 	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n",
-			__func__);
+		chan_err(d40c, "Invalid dst\n");
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
 	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
-		dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n",
-			__func__);
+		chan_err(d40c, "Invalid src\n");
 		res = -EINVAL;
 	}
 
 	if (src_event_group == STEDMA40_DEV_SRC_MEMORY &&
 	    dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] No event line\n", __func__);
+		chan_err(d40c, "No event line\n");
 		res = -EINVAL;
 	}
 
 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH &&
 	    (src_event_group != dst_event_group)) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Invalid event group\n", __func__);
+		chan_err(d40c, "Invalid event group\n");
 		res = -EINVAL;
 	}
 
@@ -1221,9 +1219,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 		 * DMAC HW supports it. Will be added to this driver,
 		 * in case any dma client requires it.
 		 */
-		dev_err(&d40c->chan.dev->device,
-			"[%s] periph to periph not supported\n",
-			__func__);
+		chan_err(d40c, "periph to periph not supported\n");
 		res = -EINVAL;
 	}
 
@@ -1236,9 +1232,7 @@ static int d40_validate_conf(struct d40_chan *d40c,
 		 * src (burst x width) == dst (burst x width)
 		 */
 
-		dev_err(&d40c->chan.dev->device,
-			"[%s] src (burst x width) != dst (burst x width)\n",
-			__func__);
+		chan_err(d40c, "src (burst x width) != dst (burst x width)\n");
 		res = -EINVAL;
 	}
 
@@ -1441,8 +1435,7 @@ static int d40_config_memcpy(struct d40_chan *d40c)
 		   dma_has_cap(DMA_SLAVE, cap)) {
 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy;
 	} else {
-		dev_err(&d40c->chan.dev->device, "[%s] No memcpy\n",
-			__func__);
+		chan_err(d40c, "No memcpy\n");
 		return -EINVAL;
 	}
 
@@ -1473,15 +1466,13 @@ static int d40_free_dma(struct d40_chan *d40c)
 		}
 
 	if (phy == NULL) {
-		dev_err(&d40c->chan.dev->device, "[%s] phy == null\n",
-			__func__);
+		chan_err(d40c, "phy == null\n");
 		return -EINVAL;
 	}
 
 	if (phy->allocated_src == D40_ALLOC_FREE &&
 	    phy->allocated_dst == D40_ALLOC_FREE) {
-		dev_err(&d40c->chan.dev->device, "[%s] channel already free\n",
-			__func__);
+		chan_err(d40c, "channel already free\n");
 		return -EINVAL;
 	}
 
@@ -1493,15 +1484,13 @@ static int d40_free_dma(struct d40_chan *d40c)
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
 		is_src = true;
 	} else {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unknown direction\n", __func__);
+		chan_err(d40c, "Unknown direction\n");
 		return -EINVAL;
 	}
 
 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
 	if (res) {
-		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
-			__func__);
+		chan_err(d40c, "suspend failed\n");
 		return res;
 	}
 
@@ -1521,9 +1510,8 @@ static int d40_free_dma(struct d40_chan *d40c)
 				res = d40_channel_execute_command(d40c,
 								  D40_DMA_RUN);
 				if (res) {
-					dev_err(&d40c->chan.dev->device,
-						"[%s] Executing RUN command\n",
-						__func__);
+					chan_err(d40c,
+						"Executing RUN command\n");
 					return res;
 				}
 			}
@@ -1536,8 +1524,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 	/* Release physical channel */
 	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
 	if (res) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to stop channel\n", __func__);
+		chan_err(d40c, "Failed to stop channel\n");
 		return res;
 	}
 	d40c->phy_chan = NULL;
@@ -1581,8 +1568,7 @@ static bool d40_is_paused(struct d40_chan *d40c)
 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
 		status = readl(chanbase + D40_CHAN_REG_SSLNK);
 	} else {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unknown direction\n", __func__);
+		chan_err(d40c, "Unknown direction\n");
 		goto _exit;
 	}
 
@@ -1625,8 +1611,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unallocated channel.\n", __func__);
+		chan_err(d40c, "Unallocated channel.\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -1640,8 +1625,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					d40c->dma_cfg.src_info.data_width,
 					d40c->dma_cfg.dst_info.data_width);
 	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
+		chan_err(d40c, "Unaligned size\n");
 		goto err;
 	}
 
@@ -1651,8 +1635,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	if (chan_is_logical(d40c)) {
 
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
+			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
 
@@ -1671,8 +1654,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40c->dma_cfg.src_info.data_width);
 	} else {
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
+			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
 
@@ -1758,9 +1740,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	if (!d40c->configured) {
 		err = d40_config_memcpy(d40c);
 		if (err) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Failed to configure memcpy channel\n",
-				__func__);
+			chan_err(d40c, "Failed to configure memcpy channel\n");
 			goto fail;
 		}
 	}
@@ -1768,8 +1748,7 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 
 	err = d40_allocate_channel(d40c);
 	if (err) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to allocate channel\n", __func__);
+		chan_err(d40c, "Failed to allocate channel\n");
 		goto fail;
 	}
 
@@ -1810,8 +1789,7 @@ static void d40_free_chan_resources(struct dma_chan *chan)
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot free unallocated channel\n", __func__);
+		chan_err(d40c, "Cannot free unallocated channel\n");
 		return;
 	}
 
@@ -1821,8 +1799,7 @@ static void d40_free_chan_resources(struct dma_chan *chan)
 	err = d40_free_dma(d40c);
 
 	if (err)
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to free channel\n", __func__);
+		chan_err(d40c, "Failed to free channel\n");
 	spin_unlock_irqrestore(&d40c->lock, flags);
 }
 
@@ -1838,8 +1815,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated.\n", __func__);
+		chan_err(d40c, "Channel is not allocated.\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -1847,8 +1823,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	d40d = d40_desc_get(d40c);
 
 	if (d40d == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Descriptor is NULL\n", __func__);
+		chan_err(d40c, "Descriptor is NULL\n");
 		goto err;
 	}
 
@@ -1857,8 +1832,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 					  d40c->dma_cfg.src_info.data_width,
 					  d40c->dma_cfg.dst_info.data_width);
 	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
+		chan_err(d40c, "Unaligned size\n");
 		goto err;
 	}
 
@@ -1870,8 +1844,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	if (chan_is_logical(d40c)) {
 
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
+			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
 		d40d->lli_current = 0;
@@ -1897,8 +1870,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	} else {
 
 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-			dev_err(&d40c->chan.dev->device,
-				"[%s] Out of memory\n", __func__);
+			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
 
@@ -1966,14 +1938,12 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 					d40c->dma_cfg.src_info.data_width,
 					d40c->dma_cfg.dst_info.data_width);
 	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
+		chan_err(d40c, "Unaligned size\n");
 		return -EINVAL;
 	}
 
 	if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Out of memory\n", __func__);
+		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
 
@@ -2022,14 +1992,12 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 					d40c->dma_cfg.src_info.data_width,
 					d40c->dma_cfg.dst_info.data_width);
 	if (d40d->lli_len < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Unaligned size\n", __func__);
+		chan_err(d40c, "Unaligned size\n");
 		return -EINVAL;
 	}
 
 	if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Out of memory\n", __func__);
+		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
 
@@ -2092,8 +2060,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	int err;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot prepare unallocated channel\n", __func__);
+		chan_err(d40c, "Cannot prepare unallocated channel\n");
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -2110,9 +2077,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
 					    direction, dma_flags);
 	if (err) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Failed to prepare %s slave sg job: %d\n",
-			__func__,
+		chan_err(d40c, "Failed to prepare %s slave sg job: %d\n",
 			chan_is_logical(d40c) ? "log" : "phy", err);
 		goto err;
 	}
@@ -2143,9 +2108,7 @@ static enum dma_status d40_tx_status(struct dma_chan *chan,
 	int ret;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Cannot read status of unallocated channel\n",
-			__func__);
+		chan_err(d40c, "Cannot read status of unallocated channel\n");
 		return -EINVAL;
 	}
 
@@ -2169,8 +2132,7 @@ static void d40_issue_pending(struct dma_chan *chan)
 	unsigned long flags;
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated!\n", __func__);
+		chan_err(d40c, "Channel is not allocated!\n");
 		return;
 	}
 
@@ -2321,8 +2283,7 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 
 	if (d40c->phy_chan == NULL) {
-		dev_err(&d40c->chan.dev->device,
-			"[%s] Channel is not allocated!\n", __func__);
+		chan_err(d40c, "Channel is not allocated!\n");
 		return -EINVAL;
 	}
 
@@ -2404,9 +2365,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	err = dma_async_device_register(&base->dma_slave);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to register slave channels\n",
-			__func__);
+		d40_err(base->dev, "Failed to register slave channels\n");
 		goto failure1;
 	}
 
@@ -2435,9 +2394,8 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	err = dma_async_device_register(&base->dma_memcpy);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to regsiter memcpy only channels\n",
-			__func__);
+		d40_err(base->dev,
+			"Failed to regsiter memcpy only channels\n");
 		goto failure2;
 	}
 
@@ -2462,9 +2420,8 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	err = dma_async_device_register(&base->dma_both);
 
 	if (err) {
-		dev_err(base->dev,
-			"[%s] Failed to register logical and physical capable channels\n",
-			__func__);
+		d40_err(base->dev,
+			"Failed to register logical and physical capable channels\n");
 		goto failure3;
 	}
 	return 0;
@@ -2566,8 +2523,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	clk = clk_get(&pdev->dev, NULL);
 
 	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "[%s] No matching clock found\n",
-			__func__);
+		d40_err(&pdev->dev, "No matching clock found\n");
 		goto failure;
 	}
 
@@ -2590,9 +2546,8 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
 		if (dma_id_regs[i].val !=
 		    readl(virtbase + dma_id_regs[i].reg)) {
-			dev_err(&pdev->dev,
-				"[%s] Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
-				__func__,
+			d40_err(&pdev->dev,
+				"Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
 				dma_id_regs[i].val,
 				dma_id_regs[i].reg,
 				readl(virtbase + dma_id_regs[i].reg));
@@ -2605,9 +2560,8 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 
 	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
 	    D40_HW_DESIGNER) {
-		dev_err(&pdev->dev,
-			"[%s] Unknown designer! Got %x wanted %x\n",
-			__func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK,
+		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
+			val & D40_DREG_PERIPHID2_DESIGNER_MASK,
 			D40_HW_DESIGNER);
 		goto failure;
 	}
@@ -2637,7 +2591,7 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		       sizeof(struct d40_chan), GFP_KERNEL);
 
 	if (base == NULL) {
-		dev_err(&pdev->dev, "[%s] Out of memory\n", __func__);
+		d40_err(&pdev->dev, "Out of memory\n");
 		goto failure;
 	}
 
@@ -2809,9 +2763,8 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 						base->lcla_pool.pages);
 		if (!page_list[i]) {
 
-			dev_err(base->dev,
-				"[%s] Failed to allocate %d pages.\n",
-				__func__, base->lcla_pool.pages);
+			d40_err(base->dev, "Failed to allocate %d pages.\n",
+				base->lcla_pool.pages);
 
 			for (j = 0; j < i; j++)
 				free_pages(page_list[j], base->lcla_pool.pages);
@@ -2881,9 +2834,7 @@ static int __init d40_probe(struct platform_device *pdev)
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
 	if (!res) {
 		ret = -ENOENT;
-		dev_err(&pdev->dev,
-			"[%s] No \"lcpa\" memory resource\n",
-			__func__);
+		d40_err(&pdev->dev, "No \"lcpa\" memory resource\n");
 		goto failure;
 	}
 	base->lcpa_size = resource_size(res);
@@ -2892,9 +2843,9 @@ static int __init d40_probe(struct platform_device *pdev)
 	if (request_mem_region(res->start, resource_size(res),
 			       D40_NAME " I/O lcpa") == NULL) {
 		ret = -EBUSY;
-		dev_err(&pdev->dev,
-			"[%s] Failed to request LCPA region 0x%x-0x%x\n",
-			__func__, res->start, res->end);
+		d40_err(&pdev->dev,
+			"Failed to request LCPA region 0x%x-0x%x\n",
+			res->start, res->end);
 		goto failure;
 	}
 
@@ -2910,16 +2861,13 @@ static int __init d40_probe(struct platform_device *pdev)
 	base->lcpa_base = ioremap(res->start, resource_size(res));
 	if (!base->lcpa_base) {
 		ret = -ENOMEM;
-		dev_err(&pdev->dev,
-			"[%s] Failed to ioremap LCPA region\n",
-			__func__);
+		d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
 		goto failure;
 	}
 
 	ret = d40_lcla_allocate(base);
 	if (ret) {
-		dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
-			__func__);
+		d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
 		goto failure;
 	}
 
@@ -2928,9 +2876,8 @@ static int __init d40_probe(struct platform_device *pdev)
 	base->irq = platform_get_irq(pdev, 0);
 
 	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
-
 	if (ret) {
-		dev_err(&pdev->dev, "[%s] No IRQ defined\n", __func__);
+		d40_err(&pdev->dev, "No IRQ defined\n");
 		goto failure;
 	}
 
@@ -2973,7 +2920,7 @@ failure:
 		kfree(base);
 	}
 
-	dev_err(&pdev->dev, "[%s] probe failed\n", __func__);
+	d40_err(&pdev->dev, "probe failed\n");
 	return ret;
 }
 
-- 
cgit v0.10.2


From 4d5949009e585b2bcf09dc4de625351f987a1e6d Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:10 +0100
Subject: dma40: fix comment to refer to SOCs rather than boards

And add DB8500v2 information.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index ab3ca6af..0faae66 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2497,9 +2497,10 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
 		/*
 		 * D40_DREG_PERIPHID2 Depends on HW revision:
-		 *  MOP500/HREF ED has 0x0008,
+		 *  DB8500ed has 0x0008,
 		 *  ? has 0x0018,
-		 *  HREF V1 has 0x0028
+		 *  DB8500v1 has 0x0028
+		 *  DB8500v2 has 0x0038
 		 */
 		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
 
-- 
cgit v0.10.2


From ac2c0a387194f45c759572b3462d1bf92ec92f00 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:11 +0100
Subject: dma40: allow realtime and priority for event lines

DB8500v2's DMA40 (revision 3) allows setting event lines as high priority and
real time.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 4d6dd4c..8358f85 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -104,6 +104,8 @@ struct stedma40_half_channel_info {
  *
  * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH
  * @high_priority: true if high-priority
+ * @realtime: true if realtime mode is to be enabled.  Only available on DMA40
+ * version 3+, i.e DB8500v2+
  * @mode: channel mode: physical, logical, or operation
  * @mode_opt: options for the chosen channel mode
  * @src_dev_type: Src device type
@@ -119,6 +121,7 @@ struct stedma40_half_channel_info {
 struct stedma40_chan_cfg {
 	enum stedma40_xfer_dir			 dir;
 	bool					 high_priority;
+	bool					 realtime;
 	enum stedma40_mode			 mode;
 	enum stedma40_mode_opt			 mode_opt;
 	int					 src_dev_type;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 0faae66..ce55162 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1724,6 +1724,38 @@ bool stedma40_filter(struct dma_chan *chan, void *data)
 }
 EXPORT_SYMBOL(stedma40_filter);
 
+static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
+{
+	bool realtime = d40c->dma_cfg.realtime;
+	bool highprio = d40c->dma_cfg.high_priority;
+	u32 prioreg = highprio ? D40_DREG_PSEG1 : D40_DREG_PCEG1;
+	u32 rtreg = realtime ? D40_DREG_RSEG1 : D40_DREG_RCEG1;
+	u32 event = D40_TYPE_TO_EVENT(dev_type);
+	u32 group = D40_TYPE_TO_GROUP(dev_type);
+	u32 bit = 1 << event;
+
+	/* Destination event lines are stored in the upper halfword */
+	if (!src)
+		bit <<= 16;
+
+	writel(bit, d40c->base->virtbase + prioreg + group * 4);
+	writel(bit, d40c->base->virtbase + rtreg + group * 4);
+}
+
+static void d40_set_prio_realtime(struct d40_chan *d40c)
+{
+	if (d40c->base->rev < 3)
+		return;
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.src_dev_type, true);
+
+	if ((d40c->dma_cfg.dir ==  STEDMA40_MEM_TO_PERIPH) ||
+	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.dst_dev_type, false);
+}
+
 /* DMA ENGINE functions */
 static int d40_alloc_chan_resources(struct dma_chan *chan)
 {
@@ -1756,6 +1788,8 @@ static int d40_alloc_chan_resources(struct dma_chan *chan)
 	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
 		    &d40c->dst_def_cfg, chan_is_logical(d40c));
 
+	d40_set_prio_realtime(d40c);
+
 	if (chan_is_logical(d40c)) {
 		d40_log_cfg(&d40c->dma_cfg,
 			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 9cc4349..e93f394 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -163,6 +163,22 @@
 #define D40_DREG_LCEIS1		0x0B4
 #define D40_DREG_LCEIS2		0x0B8
 #define D40_DREG_LCEIS3		0x0BC
+#define D40_DREG_PSEG1		0x110
+#define D40_DREG_PSEG2		0x114
+#define D40_DREG_PSEG3		0x118
+#define D40_DREG_PSEG4		0x11C
+#define D40_DREG_PCEG1		0x120
+#define D40_DREG_PCEG2		0x124
+#define D40_DREG_PCEG3		0x128
+#define D40_DREG_PCEG4		0x12C
+#define D40_DREG_RSEG1		0x130
+#define D40_DREG_RSEG2		0x134
+#define D40_DREG_RSEG3		0x138
+#define D40_DREG_RSEG4		0x13C
+#define D40_DREG_RCEG1		0x140
+#define D40_DREG_RCEG2		0x144
+#define D40_DREG_RCEG3		0x148
+#define D40_DREG_RCEG4		0x14C
 #define D40_DREG_STFU		0xFC8
 #define D40_DREG_ICFG		0xFCC
 #define D40_DREG_PERIPHID0	0xFE0
-- 
cgit v0.10.2


From 594ece4dc0e8ac222945ef2048430600ad3c7644 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:12 +0100
Subject: dma40: remove unnecessary ALIGN()s

ALIGN(x * y, y) == x * y
ALIGN(aligned + x * y, y) == aligned + x * y

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index ce55162..d32a9ac 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -344,7 +344,7 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d,
 		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
 		d40d->lli_pool.base = NULL;
 	} else {
-		d40d->lli_pool.size = ALIGN(lli_len * 2 * align, align);
+		d40d->lli_pool.size = lli_len * 2 * align;
 
 		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
 		d40d->lli_pool.base = base;
@@ -356,13 +356,11 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d,
 	if (is_log) {
 		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
 					      align);
-		d40d->lli_log.dst = PTR_ALIGN(d40d->lli_log.src + lli_len,
-					      align);
+		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
 	} else {
 		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
 					      align);
-		d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len,
-					      align);
+		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 7fe8be5a74eb058b0b48970caef83e0215f55944 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:13 +0100
Subject: dma40: use sg_dma_address() instead of sg_phys()

The address to use for DMA should be taken from sg_dma_address() and not
sg_phys().

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 0b096a3..6f03f58 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -272,7 +272,7 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		if (target)
 			dst = target;
 		else
-			dst = sg_phys(current_sg);
+			dst = sg_dma_address(current_sg);
 
 		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
 			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
@@ -416,7 +416,7 @@ int d40_log_sg_to_dev(struct scatterlist *sg,
 		if (direction == DMA_TO_DEVICE) {
 			lli_src =
 				d40_log_buf_to_lli(lli_src,
-						   sg_phys(current_sg),
+						   sg_dma_address(current_sg),
 						   sg_dma_len(current_sg),
 						   lcsp->lcsp1, src_data_width,
 						   dst_data_width,
@@ -431,7 +431,7 @@ int d40_log_sg_to_dev(struct scatterlist *sg,
 		} else {
 			lli_dst =
 				d40_log_buf_to_lli(lli_dst,
-						   sg_phys(current_sg),
+						   sg_dma_address(current_sg),
 						   sg_dma_len(current_sg),
 						   lcsp->lcsp3, dst_data_width,
 						   src_data_width,
@@ -491,7 +491,7 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 	for_each_sg(sg, current_sg, sg_len, i) {
 		total_size += sg_dma_len(current_sg);
 		lli = d40_log_buf_to_lli(lli,
-					 sg_phys(current_sg),
+					 sg_dma_address(current_sg),
 					 sg_dma_len(current_sg),
 					 lcsp13,
 					 data_width1, data_width2, true);
-- 
cgit v0.10.2


From 026cbc424a162e495ad29e91d354fb8fc2da2657 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:14 +0100
Subject: dma40: fix DMA API usage for LCLA

Map the buffer once and use dma_sync*() appropriately instead of mapping the
buffer over and over without unmapping it.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index d32a9ac..f08e5c49 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -128,6 +128,7 @@ struct d40_desc {
  */
 struct d40_lcla_pool {
 	void		*base;
+	dma_addr_t	dma_addr;
 	void		*base_unaligned;
 	int		 pages;
 	spinlock_t	 lock;
@@ -504,25 +505,25 @@ static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 
 		d40d->lli_current++;
 		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
-			struct d40_log_lli *lcla;
+			unsigned int lcla_offset = d40c->phy_chan->num * 1024 +
+						   8 * curr_lcla * 2;
+			struct d40_lcla_pool *pool = &d40c->base->lcla_pool;
+			struct d40_log_lli *lcla = pool->base + lcla_offset;
 
 			if (d40d->lli_current + 1 < d40d->lli_len)
 				next_lcla = d40_lcla_alloc_one(d40c, d40d);
 			else
 				next_lcla = -EINVAL;
 
-			lcla = d40c->base->lcla_pool.base +
-				d40c->phy_chan->num * 1024 +
-				8 * curr_lcla * 2;
-
 			d40_log_lli_lcla_write(lcla,
 					       &d40d->lli_log.dst[d40d->lli_current],
 					       &d40d->lli_log.src[d40d->lli_current],
 					       next_lcla);
 
-			(void) dma_map_single(d40c->base->dev, lcla,
-					      2 * sizeof(struct d40_log_lli),
-					      DMA_TO_DEVICE);
+			dma_sync_single_range_for_device(d40c->base->dev,
+						pool->dma_addr, lcla_offset,
+						2 * sizeof(struct d40_log_lli),
+						DMA_TO_DEVICE);
 
 			curr_lcla = next_lcla;
 
@@ -2771,6 +2772,7 @@ static void __init d40_hw_init(struct d40_base *base)
 
 static int __init d40_lcla_allocate(struct d40_base *base)
 {
+	struct d40_lcla_pool *pool = &base->lcla_pool;
 	unsigned long *page_list;
 	int i, j;
 	int ret = 0;
@@ -2835,6 +2837,15 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 						 LCLA_ALIGNMENT);
 	}
 
+	pool->dma_addr = dma_map_single(base->dev, pool->base,
+					SZ_1K * base->num_phy_chans,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(base->dev, pool->dma_addr)) {
+		pool->dma_addr = 0;
+		ret = -ENOMEM;
+		goto failure;
+	}
+
 	writel(virt_to_phys(base->lcla_pool.base),
 	       base->virtbase + D40_DREG_LCLA);
 failure:
@@ -2929,6 +2940,12 @@ failure:
 			kmem_cache_destroy(base->desc_slab);
 		if (base->virtbase)
 			iounmap(base->virtbase);
+
+		if (base->lcla_pool.dma_addr)
+			dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+					 SZ_1K * base->num_phy_chans,
+					 DMA_TO_DEVICE);
+
 		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
 			free_pages((unsigned long)base->lcla_pool.base,
 				   base->lcla_pool.pages);
-- 
cgit v0.10.2


From b00f938c8cf5ba8e7a692519548a256aa3ea1203 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:15 +0100
Subject: dma40: fix DMA API usage for LLIs

Map and unmap the LLIs and use dma_sync_single_for_device() appropriately
instead of mapping and never unmapping them.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index f08e5c49..b585686 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -68,6 +68,7 @@ enum d40_command {
  * @base: Pointer to memory area when the pre_alloc_lli's are not large
  * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
  * pre_alloc_lli is used.
+ * @dma_addr: DMA address, if mapped
  * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
  * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
  * one buffer to one buffer.
@@ -75,6 +76,7 @@ enum d40_command {
 struct d40_lli_pool {
 	void	*base;
 	int	 size;
+	dma_addr_t	dma_addr;
 	/* Space for dst and src, plus an extra for padding */
 	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
 };
@@ -329,7 +331,7 @@ static void __iomem *chan_base(struct d40_chan *chan)
 #define chan_err(d40c, format, arg...)		\
 	d40_err(chan2dev(d40c), format, ## arg)
 
-static int d40_pool_lli_alloc(struct d40_desc *d40d,
+static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
 			      int lli_len, bool is_log)
 {
 	u32 align;
@@ -358,17 +360,36 @@ static int d40_pool_lli_alloc(struct d40_desc *d40d,
 		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
 					      align);
 		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
+
+		d40d->lli_pool.dma_addr = 0;
 	} else {
 		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
 					      align);
 		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
+
+		d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
+							 d40d->lli_phy.src,
+							 d40d->lli_pool.size,
+							 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(d40c->base->dev,
+				      d40d->lli_pool.dma_addr)) {
+			kfree(d40d->lli_pool.base);
+			d40d->lli_pool.base = NULL;
+			d40d->lli_pool.dma_addr = 0;
+			return -ENOMEM;
+		}
 	}
 
 	return 0;
 }
 
-static void d40_pool_lli_free(struct d40_desc *d40d)
+static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
+	if (d40d->lli_pool.dma_addr)
+		dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr,
+				 d40d->lli_pool.size, DMA_TO_DEVICE);
+
 	kfree(d40d->lli_pool.base);
 	d40d->lli_pool.base = NULL;
 	d40d->lli_pool.size = 0;
@@ -454,7 +475,7 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 
 		list_for_each_entry_safe(d, _d, &d40c->client, node)
 			if (async_tx_test_ack(&d->txd)) {
-				d40_pool_lli_free(d);
+				d40_pool_lli_free(d40c, d);
 				d40_desc_remove(d);
 				desc = d;
 				memset(desc, 0, sizeof(*desc));
@@ -474,6 +495,7 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
 {
 
+	d40_pool_lli_free(d40c, d40d);
 	d40_lcla_free_all(d40c, d40d);
 	kmem_cache_free(d40c->base->desc_slab, d40d);
 }
@@ -1063,7 +1085,7 @@ static void dma_tasklet(unsigned long data)
 	callback_param = d40d->txd.callback_param;
 
 	if (async_tx_test_ack(&d40d->txd)) {
-		d40_pool_lli_free(d40d);
+		d40_pool_lli_free(d40c, d40d);
 		d40_desc_remove(d40d);
 		d40_desc_free(d40c, d40d);
 	} else {
@@ -1459,7 +1481,7 @@ static int d40_free_dma(struct d40_chan *d40c)
 	/* Release client owned descriptors */
 	if (!list_empty(&d40c->client))
 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
-			d40_pool_lli_free(d);
+			d40_pool_lli_free(d40c, d);
 			d40_desc_remove(d);
 			d40_desc_free(d40c, d);
 		}
@@ -1633,7 +1655,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 
 	if (chan_is_logical(d40c)) {
 
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
+		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
 			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
@@ -1652,7 +1674,7 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40c->dma_cfg.dst_info.data_width,
 					 d40c->dma_cfg.src_info.data_width);
 	} else {
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
+		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
 			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
@@ -1683,8 +1705,9 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 		if (res < 0)
 			goto err;
 
-		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-				      d40d->lli_pool.size, DMA_TO_DEVICE);
+		dma_sync_single_for_device(d40c->base->dev,
+					   d40d->lli_pool.dma_addr,
+					   d40d->lli_pool.size, DMA_TO_DEVICE);
 	}
 
 	dma_async_tx_descriptor_init(&d40d->txd, chan);
@@ -1876,7 +1899,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 
 	if (chan_is_logical(d40c)) {
 
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
+		if (d40_pool_lli_alloc(d40c,d40d, d40d->lli_len, true) < 0) {
 			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
@@ -1902,7 +1925,7 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 
 	} else {
 
-		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
+		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
 			chan_err(d40c, "Out of memory\n");
 			goto err;
 		}
@@ -1931,8 +1954,9 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 				       false) == NULL)
 			goto err;
 
-		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-				      d40d->lli_pool.size, DMA_TO_DEVICE);
+		dma_sync_single_for_device(d40c->base->dev,
+					   d40d->lli_pool.dma_addr,
+					   d40d->lli_pool.size, DMA_TO_DEVICE);
 	}
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
@@ -1975,7 +1999,7 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 		return -EINVAL;
 	}
 
-	if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
+	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
 		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
@@ -2029,7 +2053,7 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 		return -EINVAL;
 	}
 
-	if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
+	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
 		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
@@ -2075,8 +2099,8 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	if (res < 0)
 		return res;
 
-	(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
-			      d40d->lli_pool.size, DMA_TO_DEVICE);
+	dma_sync_single_for_device(d40c->base->dev, d40d->lli_pool.dma_addr,
+				   d40d->lli_pool.size, DMA_TO_DEVICE);
 	return 0;
 }
 
-- 
cgit v0.10.2


From d924abad7fa9a78d70b20552bf27fe4f7a19a2fb Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:16 +0100
Subject: dma40: remove unnecessary casts

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index b585686..bd72269 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -357,14 +357,12 @@ static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
 	}
 
 	if (is_log) {
-		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
-					      align);
+		d40d->lli_log.src = PTR_ALIGN(base, align);
 		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
 
 		d40d->lli_pool.dma_addr = 0;
 	} else {
-		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
-					      align);
+		d40d->lli_phy.src = PTR_ALIGN(base, align);
 		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
 
 		d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
-- 
cgit v0.10.2


From 95944c6ef5b5214508273992416adb836b63c73f Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:17 +0100
Subject: dma40: implement prep_memcpy as a wrapper around memcpy_sg

To simplify the code.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index bd72269..0a20179 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1863,108 +1863,19 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 						       size_t size,
 						       unsigned long dma_flags)
 {
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
-	unsigned long flags;
-
-	if (d40c->phy_chan == NULL) {
-		chan_err(d40c, "Channel is not allocated.\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
-
-	if (d40d == NULL) {
-		chan_err(d40c, "Descriptor is NULL\n");
-		goto err;
-	}
-
-	d40d->txd.flags = dma_flags;
-	d40d->lli_len = d40_size_2_dmalen(size,
-					  d40c->dma_cfg.src_info.data_width,
-					  d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		chan_err(d40c, "Unaligned size\n");
-		goto err;
-	}
-
-
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
-
-	d40d->txd.tx_submit = d40_tx_submit;
-
-	if (chan_is_logical(d40c)) {
-
-		if (d40_pool_lli_alloc(d40c,d40d, d40d->lli_len, true) < 0) {
-			chan_err(d40c, "Out of memory\n");
-			goto err;
-		}
-		d40d->lli_current = 0;
-
-		if (d40_log_buf_to_lli(d40d->lli_log.src,
-				       src,
-				       size,
-				       d40c->log_def.lcsp1,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       true) == NULL)
-			goto err;
-
-		if (d40_log_buf_to_lli(d40d->lli_log.dst,
-				       dst,
-				       size,
-				       d40c->log_def.lcsp3,
-				       d40c->dma_cfg.dst_info.data_width,
-				       d40c->dma_cfg.src_info.data_width,
-				       true) == NULL)
-			goto err;
-
-	} else {
-
-		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
-			chan_err(d40c, "Out of memory\n");
-			goto err;
-		}
-
-		if (d40_phy_buf_to_lli(d40d->lli_phy.src,
-				       src,
-				       size,
-				       d40c->dma_cfg.src_info.psize,
-				       0,
-				       d40c->src_def_cfg,
-				       true,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       false) == NULL)
-			goto err;
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
 
-		if (d40_phy_buf_to_lli(d40d->lli_phy.dst,
-				       dst,
-				       size,
-				       d40c->dma_cfg.dst_info.psize,
-				       0,
-				       d40c->dst_def_cfg,
-				       true,
-				       d40c->dma_cfg.dst_info.data_width,
-				       d40c->dma_cfg.src_info.data_width,
-				       false) == NULL)
-			goto err;
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
 
-		dma_sync_single_for_device(d40c->base->dev,
-					   d40d->lli_pool.dma_addr,
-					   d40d->lli_pool.size, DMA_TO_DEVICE);
-	}
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
 
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return &d40d->txd;
+	sg_dma_len(&dst_sg) = size;
+	sg_dma_len(&src_sg) = size;
 
-err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return NULL;
+	return stedma40_memcpy_sg(chan, &dst_sg, &src_sg, 1, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 6f03f58..552c597 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -198,7 +198,7 @@ static int d40_seg_size(int size, int data_width1, int data_width2)
 	return seg_max;
 }
 
-struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
+static struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
 				       dma_addr_t addr,
 				       u32 size,
 				       int psize,
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index e93f394..a5d7171 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -312,17 +312,6 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      u32 data_width2,
 		      int psize);
 
-struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
-				       dma_addr_t data,
-				       u32 data_size,
-				       int psize,
-				       dma_addr_t next_lli,
-				       u32 reg_cfg,
-				       bool term_int,
-				       u32 data_width1,
-				       u32 data_width2,
-				       bool is_device);
-
 void d40_phy_lli_write(void __iomem *virtbase,
 		       u32 phy_chan_num,
 		       struct d40_phy_lli *lli_dst,
-- 
cgit v0.10.2


From 5f81158f90db4bc8a79e91736aa3afce8e590e46 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:18 +0100
Subject: dma40: combine desc init functions

The desc init code can be shared between the mem and slave prep routines.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 0a20179..5259a98 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1617,6 +1617,35 @@ static u32 stedma40_residue(struct dma_chan *chan)
 	return bytes_left;
 }
 
+static struct d40_desc *
+d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
+	      unsigned int sg_len, unsigned long dma_flags)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct d40_desc *desc;
+
+	desc = d40_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width,
+					cfg->dst_info.data_width);
+	if (desc->lli_len < 0) {
+		chan_err(chan, "Unaligned size\n");
+		d40_desc_free(chan, desc);
+
+		return NULL;
+	}
+
+	desc->lli_current = 0;
+	desc->txd.flags = dma_flags;
+	desc->txd.tx_submit = d40_tx_submit;
+
+	dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
+
+	return desc;
+}
+
 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 						   struct scatterlist *sgl_dst,
 						   struct scatterlist *sgl_src,
@@ -1635,22 +1664,11 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 	}
 
 	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
 
-	if (d40d == NULL)
+	d40d = d40_prep_desc(d40c, sgl_dst, sgl_len, dma_flags);
+	if (!d40d)
 		goto err;
 
-	d40d->lli_len = d40_sg_2_dmalen(sgl_dst, sgl_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		chan_err(d40c, "Unaligned size\n");
-		goto err;
-	}
-
-	d40d->lli_current = 0;
-	d40d->txd.flags = dma_flags;
-
 	if (chan_is_logical(d40c)) {
 
 		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
@@ -1708,10 +1726,6 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					   d40d->lli_pool.size, DMA_TO_DEVICE);
 	}
 
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
-
-	d40d->txd.tx_submit = d40_tx_submit;
-
 	spin_unlock_irqrestore(&d40c->lock, flags);
 
 	return &d40d->txd;
@@ -1900,21 +1914,11 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 	dma_addr_t dev_addr = 0;
 	int total_size;
 
-	d40d->lli_len = d40_sg_2_dmalen(sgl, sg_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		chan_err(d40c, "Unaligned size\n");
-		return -EINVAL;
-	}
-
 	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
 		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
 
-	d40d->lli_current = 0;
-
 	if (direction == DMA_FROM_DEVICE)
 		if (d40c->runtime_addr)
 			dev_addr = d40c->runtime_addr;
@@ -1954,21 +1958,11 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	dma_addr_t dst_dev_addr;
 	int res;
 
-	d40d->lli_len = d40_sg_2_dmalen(sgl, sgl_len,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width);
-	if (d40d->lli_len < 0) {
-		chan_err(d40c, "Unaligned size\n");
-		return -EINVAL;
-	}
-
 	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
 		chan_err(d40c, "Out of memory\n");
 		return -ENOMEM;
 	}
 
-	d40d->lli_current = 0;
-
 	if (direction == DMA_FROM_DEVICE) {
 		dst_dev_addr = 0;
 		if (d40c->runtime_addr)
@@ -2031,8 +2025,8 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	}
 
 	spin_lock_irqsave(&d40c->lock, flags);
-	d40d = d40_desc_get(d40c);
 
+	d40d = d40_prep_desc(d40c, sgl, sg_len, dma_flags);
 	if (d40d == NULL)
 		goto err;
 
@@ -2048,12 +2042,6 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 		goto err;
 	}
 
-	d40d->txd.flags = dma_flags;
-
-	dma_async_tx_descriptor_init(&d40d->txd, chan);
-
-	d40d->txd.tx_submit = d40_tx_submit;
-
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return &d40d->txd;
 
-- 
cgit v0.10.2


From dbd887880320b6a56811bb38ff4ad888728c3a91 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:19 +0100
Subject: dma40: combine duplicated d40_pool_lli_alloc() calls

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5259a98..495f9eb 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -332,8 +332,9 @@ static void __iomem *chan_base(struct d40_chan *chan)
 	d40_err(chan2dev(d40c), format, ## arg)
 
 static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
-			      int lli_len, bool is_log)
+			      int lli_len)
 {
+	bool is_log = chan_is_logical(d40c);
 	u32 align;
 	void *base;
 
@@ -1623,6 +1624,7 @@ d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
 {
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
 	struct d40_desc *desc;
+	int ret;
 
 	desc = d40_desc_get(chan);
 	if (!desc)
@@ -1632,11 +1634,16 @@ d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
 					cfg->dst_info.data_width);
 	if (desc->lli_len < 0) {
 		chan_err(chan, "Unaligned size\n");
-		d40_desc_free(chan, desc);
+		goto err;
+	}
 
-		return NULL;
+	ret = d40_pool_lli_alloc(chan, desc, desc->lli_len);
+	if (ret < 0) {
+		chan_err(chan, "Could not allocate lli\n");
+		goto err;
 	}
 
+
 	desc->lli_current = 0;
 	desc->txd.flags = dma_flags;
 	desc->txd.tx_submit = d40_tx_submit;
@@ -1644,6 +1651,10 @@ d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
 	dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
 
 	return desc;
+
+err:
+	d40_desc_free(chan, desc);
+	return NULL;
 }
 
 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
@@ -1670,12 +1681,6 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 		goto err;
 
 	if (chan_is_logical(d40c)) {
-
-		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
-			chan_err(d40c, "Out of memory\n");
-			goto err;
-		}
-
 		(void) d40_log_sg_to_lli(sgl_src,
 					 sgl_len,
 					 d40d->lli_log.src,
@@ -1690,11 +1695,6 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 					 d40c->dma_cfg.dst_info.data_width,
 					 d40c->dma_cfg.src_info.data_width);
 	} else {
-		if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
-			chan_err(d40c, "Out of memory\n");
-			goto err;
-		}
-
 		res = d40_phy_sg_to_lli(sgl_src,
 					sgl_len,
 					0,
@@ -1914,11 +1914,6 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 	dma_addr_t dev_addr = 0;
 	int total_size;
 
-	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, true) < 0) {
-		chan_err(d40c, "Out of memory\n");
-		return -ENOMEM;
-	}
-
 	if (direction == DMA_FROM_DEVICE)
 		if (d40c->runtime_addr)
 			dev_addr = d40c->runtime_addr;
@@ -1958,11 +1953,6 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	dma_addr_t dst_dev_addr;
 	int res;
 
-	if (d40_pool_lli_alloc(d40c, d40d, d40d->lli_len, false) < 0) {
-		chan_err(d40c, "Out of memory\n");
-		return -ENOMEM;
-	}
-
 	if (direction == DMA_FROM_DEVICE) {
 		dst_dev_addr = 0;
 		if (d40c->runtime_addr)
-- 
cgit v0.10.2


From 00ac0341486ffe212f45ff1fe0780d12a36fffde Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:20 +0100
Subject: dma40: remove duplicated dev addr code

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 495f9eb..65b5aad 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1909,25 +1909,10 @@ static int d40_prep_slave_sg_log(struct d40_desc *d40d,
 				 struct scatterlist *sgl,
 				 unsigned int sg_len,
 				 enum dma_data_direction direction,
-				 unsigned long dma_flags)
+				 dma_addr_t dev_addr)
 {
-	dma_addr_t dev_addr = 0;
 	int total_size;
 
-	if (direction == DMA_FROM_DEVICE)
-		if (d40c->runtime_addr)
-			dev_addr = d40c->runtime_addr;
-		else
-			dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
-		if (d40c->runtime_addr)
-			dev_addr = d40c->runtime_addr;
-		else
-			dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
-
-	else
-		return -EINVAL;
-
 	total_size = d40_log_sg_to_dev(sgl, sg_len,
 				       &d40d->lli_log,
 				       &d40c->log_def,
@@ -1947,27 +1932,12 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 				 struct scatterlist *sgl,
 				 unsigned int sgl_len,
 				 enum dma_data_direction direction,
-				 unsigned long dma_flags)
+				 dma_addr_t dev_addr)
 {
-	dma_addr_t src_dev_addr;
-	dma_addr_t dst_dev_addr;
+	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
+	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
 	int res;
 
-	if (direction == DMA_FROM_DEVICE) {
-		dst_dev_addr = 0;
-		if (d40c->runtime_addr)
-			src_dev_addr = d40c->runtime_addr;
-		else
-			src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
-	} else if (direction == DMA_TO_DEVICE) {
-		if (d40c->runtime_addr)
-			dst_dev_addr = d40c->runtime_addr;
-		else
-			dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
-		src_dev_addr = 0;
-	} else
-		return -EINVAL;
-
 	res = d40_phy_sg_to_lli(sgl,
 				sgl_len,
 				src_dev_addr,
@@ -1997,6 +1967,24 @@ static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
 	return 0;
 }
 
+static dma_addr_t
+d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
+{
+	struct stedma40_platform_data *plat = chan->base->plat_data;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	dma_addr_t addr;
+
+	if (chan->runtime_addr)
+		return chan->runtime_addr;
+
+	if (direction == DMA_FROM_DEVICE)
+		addr = plat->dev_rx[cfg->src_dev_type];
+	else if (direction == DMA_TO_DEVICE)
+		addr = plat->dev_tx[cfg->dst_dev_type];
+
+	return addr;
+}
+
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 struct scatterlist *sgl,
 							 unsigned int sg_len,
@@ -2006,6 +1994,7 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	struct d40_desc *d40d;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
+	dma_addr_t dev_addr;
 	unsigned long flags;
 	int err;
 
@@ -2014,18 +2003,23 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
+		return NULL;
+
 	spin_lock_irqsave(&d40c->lock, flags);
 
 	d40d = d40_prep_desc(d40c, sgl, sg_len, dma_flags);
 	if (d40d == NULL)
 		goto err;
 
+	dev_addr = d40_get_dev_addr(d40c, direction);
+
 	if (chan_is_logical(d40c))
 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
-					    direction, dma_flags);
+					    direction, dev_addr);
 	else
 		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
-					    direction, dma_flags);
+					    direction, dev_addr);
 	if (err) {
 		chan_err(d40c, "Failed to prepare %s slave sg job: %d\n",
 			chan_is_logical(d40c) ? "log" : "phy", err);
-- 
cgit v0.10.2


From 3e3a0763e78b520dac5fde569c42664863336d94 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:21 +0100
Subject: dma40: combine mem and slave sg-to-lli functions

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 65b5aad..8c6abc2 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1618,6 +1618,81 @@ static u32 stedma40_residue(struct dma_chan *chan)
 	return bytes_left;
 }
 
+static int
+d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, enum dma_data_direction direction,
+		dma_addr_t dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+
+	if (direction == DMA_NONE) {
+		/* memcpy */
+		(void) d40_log_sg_to_lli(sg_src, sg_len,
+					 desc->lli_log.src,
+					 chan->log_def.lcsp1,
+					 src_info->data_width,
+					 dst_info->data_width);
+
+		(void) d40_log_sg_to_lli(sg_dst, sg_len,
+					 desc->lli_log.dst,
+					 chan->log_def.lcsp3,
+					 dst_info->data_width,
+					 src_info->data_width);
+	} else {
+		unsigned int total_size;
+
+		total_size = d40_log_sg_to_dev(sg_src, sg_len,
+					       &desc->lli_log,
+					       &chan->log_def,
+					       src_info->data_width,
+					       dst_info->data_width,
+					       direction, dev_addr);
+		if (total_size < 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, enum dma_data_direction direction,
+		dma_addr_t dev_addr)
+{
+	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
+	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	int ret;
+
+	ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
+				desc->lli_phy.src,
+				virt_to_phys(desc->lli_phy.src),
+				chan->src_def_cfg,
+				src_info->data_width,
+				dst_info->data_width,
+				src_info->psize);
+
+	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
+				desc->lli_phy.dst,
+				virt_to_phys(desc->lli_phy.dst),
+				chan->dst_def_cfg,
+				dst_info->data_width,
+				src_info->data_width,
+				dst_info->psize);
+
+	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
+				   desc->lli_pool.size, DMA_TO_DEVICE);
+
+	return ret < 0 ? ret : 0;
+}
+
+
 static struct d40_desc *
 d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
 	      unsigned int sg_len, unsigned long dma_flags)
@@ -1663,7 +1738,6 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 						   unsigned int sgl_len,
 						   unsigned long dma_flags)
 {
-	int res;
 	struct d40_desc *d40d;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
 					     chan);
@@ -1681,49 +1755,11 @@ struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 		goto err;
 
 	if (chan_is_logical(d40c)) {
-		(void) d40_log_sg_to_lli(sgl_src,
-					 sgl_len,
-					 d40d->lli_log.src,
-					 d40c->log_def.lcsp1,
-					 d40c->dma_cfg.src_info.data_width,
-					 d40c->dma_cfg.dst_info.data_width);
-
-		(void) d40_log_sg_to_lli(sgl_dst,
-					 sgl_len,
-					 d40d->lli_log.dst,
-					 d40c->log_def.lcsp3,
-					 d40c->dma_cfg.dst_info.data_width,
-					 d40c->dma_cfg.src_info.data_width);
+		d40_prep_sg_log(d40c, d40d, sgl_src, sgl_dst,
+				sgl_len, DMA_NONE, 0);
 	} else {
-		res = d40_phy_sg_to_lli(sgl_src,
-					sgl_len,
-					0,
-					d40d->lli_phy.src,
-					virt_to_phys(d40d->lli_phy.src),
-					d40c->src_def_cfg,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.data_width,
-					d40c->dma_cfg.src_info.psize);
-
-		if (res < 0)
-			goto err;
-
-		res = d40_phy_sg_to_lli(sgl_dst,
-					sgl_len,
-					0,
-					d40d->lli_phy.dst,
-					virt_to_phys(d40d->lli_phy.dst),
-					d40c->dst_def_cfg,
-					d40c->dma_cfg.dst_info.data_width,
-					d40c->dma_cfg.src_info.data_width,
-					d40c->dma_cfg.dst_info.psize);
-
-		if (res < 0)
-			goto err;
-
-		dma_sync_single_for_device(d40c->base->dev,
-					   d40d->lli_pool.dma_addr,
-					   d40d->lli_pool.size, DMA_TO_DEVICE);
+		d40_prep_sg_phy(d40c, d40d, sgl_src, sgl_dst,
+				sgl_len, DMA_NONE, 0);
 	}
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
@@ -1904,69 +1940,6 @@ d40_prep_sg(struct dma_chan *chan,
 	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
 }
 
-static int d40_prep_slave_sg_log(struct d40_desc *d40d,
-				 struct d40_chan *d40c,
-				 struct scatterlist *sgl,
-				 unsigned int sg_len,
-				 enum dma_data_direction direction,
-				 dma_addr_t dev_addr)
-{
-	int total_size;
-
-	total_size = d40_log_sg_to_dev(sgl, sg_len,
-				       &d40d->lli_log,
-				       &d40c->log_def,
-				       d40c->dma_cfg.src_info.data_width,
-				       d40c->dma_cfg.dst_info.data_width,
-				       direction,
-				       dev_addr);
-
-	if (total_size < 0)
-		return -EINVAL;
-
-	return 0;
-}
-
-static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
-				 struct d40_chan *d40c,
-				 struct scatterlist *sgl,
-				 unsigned int sgl_len,
-				 enum dma_data_direction direction,
-				 dma_addr_t dev_addr)
-{
-	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
-	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
-	int res;
-
-	res = d40_phy_sg_to_lli(sgl,
-				sgl_len,
-				src_dev_addr,
-				d40d->lli_phy.src,
-				virt_to_phys(d40d->lli_phy.src),
-				d40c->src_def_cfg,
-				d40c->dma_cfg.src_info.data_width,
-				d40c->dma_cfg.dst_info.data_width,
-				d40c->dma_cfg.src_info.psize);
-	if (res < 0)
-		return res;
-
-	res = d40_phy_sg_to_lli(sgl,
-				sgl_len,
-				dst_dev_addr,
-				d40d->lli_phy.dst,
-				virt_to_phys(d40d->lli_phy.dst),
-				d40c->dst_def_cfg,
-				d40c->dma_cfg.dst_info.data_width,
-				d40c->dma_cfg.src_info.data_width,
-				d40c->dma_cfg.dst_info.psize);
-	if (res < 0)
-		return res;
-
-	dma_sync_single_for_device(d40c->base->dev, d40d->lli_pool.dma_addr,
-				   d40d->lli_pool.size, DMA_TO_DEVICE);
-	return 0;
-}
-
 static dma_addr_t
 d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 {
@@ -2015,11 +1988,12 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	dev_addr = d40_get_dev_addr(d40c, direction);
 
 	if (chan_is_logical(d40c))
-		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
-					    direction, dev_addr);
+		err = d40_prep_sg_log(d40c, d40d, sgl, NULL,
+				      sg_len, direction, dev_addr);
 	else
-		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
-					    direction, dev_addr);
+		err = d40_prep_sg_phy(d40c, d40d, sgl, NULL,
+				      sg_len, direction, dev_addr);
+
 	if (err) {
 		chan_err(d40c, "Failed to prepare %s slave sg job: %d\n",
 			chan_is_logical(d40c) ? "log" : "phy", err);
-- 
cgit v0.10.2


From 10a946b3a4e1ad665a81981cbe33c3d3903cd7da Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:22 +0100
Subject: dma40: remove export of stedma40_memcpy_sg

The dmaengine framework has the API for this now.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/plat-nomadik/include/plat/ste_dma40.h b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
index 8358f85..c448860 100644
--- a/arch/arm/plat-nomadik/include/plat/ste_dma40.h
+++ b/arch/arm/plat-nomadik/include/plat/ste_dma40.h
@@ -172,25 +172,6 @@ struct stedma40_platform_data {
 bool stedma40_filter(struct dma_chan *chan, void *data);
 
 /**
- * stedma40_memcpy_sg() - extension of the dma framework, memcpy to/from
- * scattergatter lists.
- *
- * @chan: dmaengine handle
- * @sgl_dst: Destination scatter list
- * @sgl_src: Source scatter list
- * @sgl_len: The length of each scatterlist. Both lists must be of equal length
- * and each element must match the corresponding element in the other scatter
- * list.
- * @flags: is actually enum dma_ctrl_flags. See dmaengine.h
- */
-
-struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
-						   struct scatterlist *sgl_dst,
-						   struct scatterlist *sgl_src,
-						   unsigned int sgl_len,
-						   unsigned long flags);
-
-/**
  * stedma40_slave_mem() - Transfers a raw data buffer to or from a slave
  * (=device)
  *
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8c6abc2..0f5d617 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1733,6 +1733,7 @@ err:
 }
 
 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
+static struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
 						   struct scatterlist *sgl_dst,
 						   struct scatterlist *sgl_src,
 						   unsigned int sgl_len,
@@ -1771,7 +1772,6 @@ err:
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	return NULL;
 }
-EXPORT_SYMBOL(stedma40_memcpy_sg);
 
 bool stedma40_filter(struct dma_chan *chan, void *data)
 {
-- 
cgit v0.10.2


From cade1d30b2e071a687011c2a38c03ed7187ec501 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:23 +0100
Subject: dma40: combine mem and slave prep_sg functions

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 0f5d617..4e9d6c5 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1732,44 +1732,70 @@ err:
 	return NULL;
 }
 
-struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
-static struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
-						   struct scatterlist *sgl_dst,
-						   struct scatterlist *sgl_src,
-						   unsigned int sgl_len,
-						   unsigned long dma_flags)
+static dma_addr_t
+d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
 {
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
+	struct stedma40_platform_data *plat = chan->base->plat_data;
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	dma_addr_t addr;
+
+	if (chan->runtime_addr)
+		return chan->runtime_addr;
+
+	if (direction == DMA_FROM_DEVICE)
+		addr = plat->dev_rx[cfg->src_dev_type];
+	else if (direction == DMA_TO_DEVICE)
+		addr = plat->dev_tx[cfg->dst_dev_type];
+
+	return addr;
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
+	    struct scatterlist *sg_dst, unsigned int sg_len,
+	    enum dma_data_direction direction, unsigned long dma_flags)
+{
+	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
+	dma_addr_t dev_addr = 0;
+	struct d40_desc *desc;
 	unsigned long flags;
+	int ret;
 
-	if (d40c->phy_chan == NULL) {
-		chan_err(d40c, "Unallocated channel.\n");
-		return ERR_PTR(-EINVAL);
+	if (!chan->phy_chan) {
+		chan_err(chan, "Cannot prepare unallocated channel\n");
+		return NULL;
 	}
 
-	spin_lock_irqsave(&d40c->lock, flags);
+	spin_lock_irqsave(&chan->lock, flags);
 
-	d40d = d40_prep_desc(d40c, sgl_dst, sgl_len, dma_flags);
-	if (!d40d)
+	desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
+	if (desc == NULL)
 		goto err;
 
-	if (chan_is_logical(d40c)) {
-		d40_prep_sg_log(d40c, d40d, sgl_src, sgl_dst,
-				sgl_len, DMA_NONE, 0);
-	} else {
-		d40_prep_sg_phy(d40c, d40d, sgl_src, sgl_dst,
-				sgl_len, DMA_NONE, 0);
+	if (direction != DMA_NONE)
+		dev_addr = d40_get_dev_addr(chan, direction);
+
+	if (chan_is_logical(chan))
+		ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
+				      sg_len, direction, dev_addr);
+	else
+		ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
+				      sg_len, direction, dev_addr);
+
+	if (ret) {
+		chan_err(chan, "Failed to prepare %s sg job: %d\n",
+			 chan_is_logical(chan) ? "log" : "phy", ret);
+		goto err;
 	}
 
-	spin_unlock_irqrestore(&d40c->lock, flags);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return &desc->txd;
 
-	return &d40d->txd;
 err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
+	if (desc)
+		d40_desc_free(chan, desc);
+	spin_unlock_irqrestore(&chan->lock, flags);
 	return NULL;
 }
 
@@ -1925,37 +1951,19 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
 	sg_dma_len(&dst_sg) = size;
 	sg_dma_len(&src_sg) = size;
 
-	return stedma40_memcpy_sg(chan, &dst_sg, &src_sg, 1, dma_flags);
+	return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *
-d40_prep_sg(struct dma_chan *chan,
-	    struct scatterlist *dst_sg, unsigned int dst_nents,
-	    struct scatterlist *src_sg, unsigned int src_nents,
-	    unsigned long dma_flags)
+d40_prep_memcpy_sg(struct dma_chan *chan,
+		   struct scatterlist *dst_sg, unsigned int dst_nents,
+		   struct scatterlist *src_sg, unsigned int src_nents,
+		   unsigned long dma_flags)
 {
 	if (dst_nents != src_nents)
 		return NULL;
 
-	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
-}
-
-static dma_addr_t
-d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction)
-{
-	struct stedma40_platform_data *plat = chan->base->plat_data;
-	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
-	dma_addr_t addr;
-
-	if (chan->runtime_addr)
-		return chan->runtime_addr;
-
-	if (direction == DMA_FROM_DEVICE)
-		addr = plat->dev_rx[cfg->src_dev_type];
-	else if (direction == DMA_TO_DEVICE)
-		addr = plat->dev_tx[cfg->dst_dev_type];
-
-	return addr;
+	return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
@@ -1964,50 +1972,10 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 							 enum dma_data_direction direction,
 							 unsigned long dma_flags)
 {
-	struct d40_desc *d40d;
-	struct d40_chan *d40c = container_of(chan, struct d40_chan,
-					     chan);
-	dma_addr_t dev_addr;
-	unsigned long flags;
-	int err;
-
-	if (d40c->phy_chan == NULL) {
-		chan_err(d40c, "Cannot prepare unallocated channel\n");
-		return ERR_PTR(-EINVAL);
-	}
-
 	if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE)
 		return NULL;
 
-	spin_lock_irqsave(&d40c->lock, flags);
-
-	d40d = d40_prep_desc(d40c, sgl, sg_len, dma_flags);
-	if (d40d == NULL)
-		goto err;
-
-	dev_addr = d40_get_dev_addr(d40c, direction);
-
-	if (chan_is_logical(d40c))
-		err = d40_prep_sg_log(d40c, d40d, sgl, NULL,
-				      sg_len, direction, dev_addr);
-	else
-		err = d40_prep_sg_phy(d40c, d40d, sgl, NULL,
-				      sg_len, direction, dev_addr);
-
-	if (err) {
-		chan_err(d40c, "Failed to prepare %s slave sg job: %d\n",
-			chan_is_logical(d40c) ? "log" : "phy", err);
-		goto err;
-	}
-
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return &d40d->txd;
-
-err:
-	if (d40d)
-		d40_desc_free(d40c, d40d);
-	spin_unlock_irqrestore(&d40c->lock, flags);
-	return NULL;
+	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
 }
 
 static enum dma_status d40_tx_status(struct dma_chan *chan,
@@ -2267,7 +2235,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
+	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_slave.device_tx_status = d40_tx_status;
 	base->dma_slave.device_issue_pending = d40_issue_pending;
@@ -2291,7 +2259,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
+	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_memcpy.device_tx_status = d40_tx_status;
 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
@@ -2322,7 +2290,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
+	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
 	base->dma_both.device_tx_status = d40_tx_status;
 	base->dma_both.device_issue_pending = d40_issue_pending;
-- 
cgit v0.10.2


From 1c4b0927feab41346b0be971e0287aaf46eba8e0 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:24 +0100
Subject: dma40: move lli_load to main source file

These register writes are better placed in the main source file rather than
ll.c.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 4e9d6c5..6a7a00d 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -504,15 +504,29 @@ static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
 	list_add_tail(&desc->node, &d40c->active);
 }
 
+static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_phy_lli *lli_dst = desc->lli_phy.dst;
+	struct d40_phy_lli *lli_src = desc->lli_phy.src;
+	void __iomem *base = chan_base(chan);
+
+	writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG);
+	writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT);
+	writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR);
+	writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK);
+
+	writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG);
+	writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT);
+	writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR);
+	writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
+}
+
 static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
 {
 	int curr_lcla = -EINVAL, next_lcla;
 
 	if (chan_is_physical(d40c)) {
-		d40_phy_lli_write(d40c->base->virtbase,
-				  d40c->phy_chan->num,
-				  d40d->lli_phy.dst,
-				  d40d->lli_phy.src);
+		d40_phy_lli_load(d40c, d40d);
 		d40d->lli_current = d40d->lli_len;
 	} else {
 
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 552c597..fd75251 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -295,32 +295,6 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 }
 
 
-void d40_phy_lli_write(void __iomem *virtbase,
-		       u32 phy_chan_num,
-		       struct d40_phy_lli *lli_dst,
-		       struct d40_phy_lli *lli_src)
-{
-
-	writel(lli_src->reg_cfg, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSCFG);
-	writel(lli_src->reg_elt, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSELT);
-	writel(lli_src->reg_ptr, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSPTR);
-	writel(lli_src->reg_lnk, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SSLNK);
-
-	writel(lli_dst->reg_cfg, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDCFG);
-	writel(lli_dst->reg_elt, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDELT);
-	writel(lli_dst->reg_ptr, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDPTR);
-	writel(lli_dst->reg_lnk, virtbase + D40_DREG_PCBASE +
-	       phy_chan_num * D40_DREG_PCDELTA + D40_CHAN_REG_SDLNK);
-
-}
-
 /* DMA logical lli operations */
 
 static void d40_log_lli_link(struct d40_log_lli *lli_dst,
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index a5d7171..46578a66 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -312,11 +312,6 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      u32 data_width2,
 		      int psize);
 
-void d40_phy_lli_write(void __iomem *virtbase,
-		       u32 phy_chan_num,
-		       struct d40_phy_lli *lli_dst,
-		       struct d40_phy_lli *lli_src);
-
 /* Logical channels */
 
 struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
-- 
cgit v0.10.2


From e24b36bdf873b4a64545fd66da13877214d235cf Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:25 +0100
Subject: dma40: combine duplicated code in log_sg_to_dev

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index fd75251..fa6c3ab 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -385,40 +385,34 @@ int d40_log_sg_to_dev(struct scatterlist *sg,
 	struct d40_log_lli *lli_dst = lli->dst;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
-		total_size += sg_dma_len(current_sg);
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t src;
+		dma_addr_t dst;
+
+		total_size += len;
 
 		if (direction == DMA_TO_DEVICE) {
-			lli_src =
-				d40_log_buf_to_lli(lli_src,
-						   sg_dma_address(current_sg),
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp1, src_data_width,
-						   dst_data_width,
-						   true);
-			lli_dst =
-				d40_log_buf_to_lli(lli_dst,
-						   dev_addr,
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp3, dst_data_width,
-						   src_data_width,
-						   false);
+			src = sg_addr;
+			dst = dev_addr;
 		} else {
-			lli_dst =
-				d40_log_buf_to_lli(lli_dst,
-						   sg_dma_address(current_sg),
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp3, dst_data_width,
-						   src_data_width,
-						   true);
-			lli_src =
-				d40_log_buf_to_lli(lli_src,
-						   dev_addr,
-						   sg_dma_len(current_sg),
-						   lcsp->lcsp1, src_data_width,
-						   dst_data_width,
-						   false);
+			src = dev_addr;
+			dst = sg_addr;
 		}
+
+		lli_src = d40_log_buf_to_lli(lli_src, src, len,
+					     lcsp->lcsp1,
+					     src_data_width,
+					     dst_data_width,
+					     src == sg_addr);
+
+		lli_dst = d40_log_buf_to_lli(lli_dst, dst, len,
+					     lcsp->lcsp3,
+					     dst_data_width,
+					     src_data_width,
+					     dst == sg_addr);
 	}
+
 	return total_size;
 }
 
-- 
cgit v0.10.2


From 5ed04b8575cb22920b1333aeb55121339449048f Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:26 +0100
Subject: dma40: unify d40_log_sg_to_lli funcs for mem and slave

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 6a7a00d..c597dba 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1638,37 +1638,28 @@ d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
 		unsigned int sg_len, enum dma_data_direction direction,
 		dma_addr_t dev_addr)
 {
+	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
+	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
 	struct stedma40_half_channel_info *src_info = &cfg->src_info;
 	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	int ret;
 
-	if (direction == DMA_NONE) {
-		/* memcpy */
-		(void) d40_log_sg_to_lli(sg_src, sg_len,
-					 desc->lli_log.src,
-					 chan->log_def.lcsp1,
-					 src_info->data_width,
-					 dst_info->data_width);
-
-		(void) d40_log_sg_to_lli(sg_dst, sg_len,
-					 desc->lli_log.dst,
-					 chan->log_def.lcsp3,
-					 dst_info->data_width,
-					 src_info->data_width);
-	} else {
-		unsigned int total_size;
+	ret = d40_log_sg_to_lli(sg_src, sg_len,
+				src_dev_addr,
+				desc->lli_log.src,
+				chan->log_def.lcsp1,
+				src_info->data_width,
+				dst_info->data_width);
 
-		total_size = d40_log_sg_to_dev(sg_src, sg_len,
-					       &desc->lli_log,
-					       &chan->log_def,
-					       src_info->data_width,
-					       dst_info->data_width,
-					       direction, dev_addr);
-		if (total_size < 0)
-			return -EINVAL;
-	}
+	ret = d40_log_sg_to_lli(sg_dst, sg_len,
+				dst_dev_addr,
+				desc->lli_log.dst,
+				chan->log_def.lcsp3,
+				dst_info->data_width,
+				src_info->data_width);
 
-	return 0;
+	return ret < 0 ? ret : 0;
 }
 
 static int
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index fa6c3ab..9935c6d 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -369,53 +369,6 @@ static void d40_log_fill_lli(struct d40_log_lli *lli,
 
 }
 
-int d40_log_sg_to_dev(struct scatterlist *sg,
-		      int sg_len,
-		      struct d40_log_lli_bidir *lli,
-		      struct d40_def_lcsp *lcsp,
-		      u32 src_data_width,
-		      u32 dst_data_width,
-		      enum dma_data_direction direction,
-		      dma_addr_t dev_addr)
-{
-	int total_size = 0;
-	struct scatterlist *current_sg = sg;
-	int i;
-	struct d40_log_lli *lli_src = lli->src;
-	struct d40_log_lli *lli_dst = lli->dst;
-
-	for_each_sg(sg, current_sg, sg_len, i) {
-		dma_addr_t sg_addr = sg_dma_address(current_sg);
-		unsigned int len = sg_dma_len(current_sg);
-		dma_addr_t src;
-		dma_addr_t dst;
-
-		total_size += len;
-
-		if (direction == DMA_TO_DEVICE) {
-			src = sg_addr;
-			dst = dev_addr;
-		} else {
-			src = dev_addr;
-			dst = sg_addr;
-		}
-
-		lli_src = d40_log_buf_to_lli(lli_src, src, len,
-					     lcsp->lcsp1,
-					     src_data_width,
-					     dst_data_width,
-					     src == sg_addr);
-
-		lli_dst = d40_log_buf_to_lli(lli_dst, dst, len,
-					     lcsp->lcsp3,
-					     dst_data_width,
-					     src_data_width,
-					     dst == sg_addr);
-	}
-
-	return total_size;
-}
-
 struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				       dma_addr_t addr,
 				       int size,
@@ -447,6 +400,7 @@ struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
+		      dma_addr_t dev_addr,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
 		      u32 data_width1, u32 data_width2)
@@ -455,14 +409,21 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 	struct scatterlist *current_sg = sg;
 	int i;
 	struct d40_log_lli *lli = lli_sg;
+	bool autoinc = !dev_addr;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t addr = dev_addr ?: sg_addr;
+
 		total_size += sg_dma_len(current_sg);
-		lli = d40_log_buf_to_lli(lli,
-					 sg_dma_address(current_sg),
-					 sg_dma_len(current_sg),
+
+		lli = d40_log_buf_to_lli(lli, addr, len,
 					 lcsp13,
-					 data_width1, data_width2, true);
+					 data_width1,
+					 data_width2,
+					 autoinc);
 	}
+
 	return total_size;
 }
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 46578a66..867f23f 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -321,17 +321,9 @@ struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				       u32 data_width1, u32 data_width2,
 				       bool addr_inc);
 
-int d40_log_sg_to_dev(struct scatterlist *sg,
-		      int sg_len,
-		      struct d40_log_lli_bidir *lli,
-		      struct d40_def_lcsp *lcsp,
-		      u32 src_data_width,
-		      u32 dst_data_width,
-		      enum dma_data_direction direction,
-		      dma_addr_t dev_addr);
-
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
+		      dma_addr_t dev_addr,
 		      struct d40_log_lli *lli_sg,
 		      u32 lcsp13, /* src or dst*/
 		      u32 data_width1, u32 data_width2);
-- 
cgit v0.10.2


From cc31b6f7949efd46c5f13d0758cf7b0bcb71fae2 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:27 +0100
Subject: dma40: pass the info pointer all the way to reduce argument count

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index c597dba..3c61c58 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1679,17 +1679,13 @@ d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
 				desc->lli_phy.src,
 				virt_to_phys(desc->lli_phy.src),
 				chan->src_def_cfg,
-				src_info->data_width,
-				dst_info->data_width,
-				src_info->psize);
+				src_info, dst_info);
 
 	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
 				desc->lli_phy.dst,
 				virt_to_phys(desc->lli_phy.dst),
 				chan->dst_def_cfg,
-				dst_info->data_width,
-				src_info->data_width,
-				dst_info->psize);
+				dst_info, src_info);
 
 	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
 				   desc->lli_pool.size, DMA_TO_DEVICE);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 9935c6d..509a130 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -125,13 +125,14 @@ void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
 static int d40_phy_fill_lli(struct d40_phy_lli *lli,
 			    dma_addr_t data,
 			    u32 data_size,
-			    int psize,
 			    dma_addr_t next_lli,
 			    u32 reg_cfg,
 			    bool term_int,
-			    u32 data_width,
-			    bool is_device)
+			    bool is_device,
+			    struct stedma40_half_channel_info *info)
 {
+	unsigned int data_width = info->data_width;
+	int psize = info->psize;
 	int num_elems;
 
 	if (psize == STEDMA40_PSIZE_PHY_1)
@@ -198,16 +199,11 @@ static int d40_seg_size(int size, int data_width1, int data_width2)
 	return seg_max;
 }
 
-static struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
-				       dma_addr_t addr,
-				       u32 size,
-				       int psize,
-				       dma_addr_t lli_phys,
-				       u32 reg_cfg,
-				       bool term_int,
-				       u32 data_width1,
-				       u32 data_width2,
-				       bool is_device)
+static struct d40_phy_lli *
+d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
+		   dma_addr_t lli_phys, u32 reg_cfg, bool term_int,
+		   bool is_device, struct stedma40_half_channel_info *info,
+		   struct stedma40_half_channel_info *otherinfo)
 {
 	int err;
 	dma_addr_t next = lli_phys;
@@ -215,7 +211,8 @@ static struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
 	int size_seg = 0;
 
 	do {
-		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_seg = d40_seg_size(size_rest, info->data_width,
+					otherinfo->data_width);
 		size_rest -= size_seg;
 
 		if (term_int && size_rest == 0)
@@ -227,12 +224,11 @@ static struct d40_phy_lli *d40_phy_buf_to_lli(struct d40_phy_lli *lli,
 		err = d40_phy_fill_lli(lli,
 				       addr,
 				       size_seg,
-				       psize,
 				       next,
 				       reg_cfg,
 				       !next,
-				       data_width1,
-				       is_device);
+				       is_device,
+				       info);
 
 		if (err)
 			goto err;
@@ -254,9 +250,8 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      struct d40_phy_lli *lli_sg,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width1,
-		      u32 data_width2,
-		      int psize)
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo)
 {
 	int total_size = 0;
 	int i;
@@ -280,13 +275,12 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		lli = d40_phy_buf_to_lli(lli,
 					 dst,
 					 sg_dma_len(current_sg),
-					 psize,
 					 l_phys,
 					 reg_cfg,
 					 sg_len - 1 == i,
-					 data_width1,
-					 data_width2,
-					 target == dst);
+					 target == dst,
+					 info,
+					 otherinfo);
 		if (lli == NULL)
 			return -EINVAL;
 	}
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 867f23f..cd94afd 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -308,9 +308,8 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      struct d40_phy_lli *lli,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
-		      u32 data_width1,
-		      u32 data_width2,
-		      int psize);
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo);
 
 /* Logical channels */
 
-- 
cgit v0.10.2


From 822c567639971628ceba2c53531670d595e3164d Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:28 +0100
Subject: dma40: unify src/dst addr check

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 3c61c58..d9dfda2 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -1635,11 +1635,9 @@ static u32 stedma40_residue(struct dma_chan *chan)
 static int
 d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
 		struct scatterlist *sg_src, struct scatterlist *sg_dst,
-		unsigned int sg_len, enum dma_data_direction direction,
-		dma_addr_t dev_addr)
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
 {
-	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
-	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
 	struct stedma40_half_channel_info *src_info = &cfg->src_info;
 	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
@@ -1665,11 +1663,9 @@ d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
 static int
 d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
 		struct scatterlist *sg_src, struct scatterlist *sg_dst,
-		unsigned int sg_len, enum dma_data_direction direction,
-		dma_addr_t dev_addr)
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
 {
-	dma_addr_t src_dev_addr = direction == DMA_FROM_DEVICE ? dev_addr : 0;
-	dma_addr_t dst_dev_addr = direction == DMA_TO_DEVICE ? dev_addr : 0;
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
 	struct stedma40_half_channel_info *src_info = &cfg->src_info;
 	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
@@ -1757,7 +1753,8 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	    enum dma_data_direction direction, unsigned long dma_flags)
 {
 	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
-	dma_addr_t dev_addr = 0;
+	dma_addr_t src_dev_addr = 0;
+	dma_addr_t dst_dev_addr = 0;
 	struct d40_desc *desc;
 	unsigned long flags;
 	int ret;
@@ -1773,15 +1770,21 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	if (desc == NULL)
 		goto err;
 
-	if (direction != DMA_NONE)
-		dev_addr = d40_get_dev_addr(chan, direction);
+	if (direction != DMA_NONE) {
+		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
+
+		if (direction == DMA_FROM_DEVICE)
+			src_dev_addr = dev_addr;
+		else if (direction == DMA_TO_DEVICE)
+			dst_dev_addr = dev_addr;
+	}
 
 	if (chan_is_logical(chan))
 		ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
-				      sg_len, direction, dev_addr);
+				      sg_len, src_dev_addr, dst_dev_addr);
 	else
 		ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
-				      sg_len, direction, dev_addr);
+				      sg_len, src_dev_addr, dst_dev_addr);
 
 	if (ret) {
 		chan_err(chan, "Failed to prepare %s sg job: %d\n",
-- 
cgit v0.10.2


From 1f7622ca55b1f5875e32140b4781759f800aded3 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:29 +0100
Subject: dma40: make d40_log_buf_to_lli static

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 509a130..876aad2 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -363,7 +363,7 @@ static void d40_log_fill_lli(struct d40_log_lli *lli,
 
 }
 
-struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				       dma_addr_t addr,
 				       int size,
 				       u32 lcsp13, /* src or dst*/
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index cd94afd..4626c88 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -313,13 +313,6 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 
 /* Logical channels */
 
-struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
-				       dma_addr_t addr,
-				       int size,
-				       u32 lcsp13, /* src or dst*/
-				       u32 data_width1, u32 data_width2,
-				       bool addr_inc);
-
 int d40_log_sg_to_lli(struct scatterlist *sg,
 		      int sg_len,
 		      dma_addr_t dev_addr,
-- 
cgit v0.10.2


From 7f933bed96e9872131014ea2bdd5b012e43fc316 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:30 +0100
Subject: dma40: use flags to reduce parameter count

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 876aad2..88b9e37 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -127,10 +127,11 @@ static int d40_phy_fill_lli(struct d40_phy_lli *lli,
 			    u32 data_size,
 			    dma_addr_t next_lli,
 			    u32 reg_cfg,
-			    bool term_int,
-			    bool is_device,
-			    struct stedma40_half_channel_info *info)
+			    struct stedma40_half_channel_info *info,
+			    unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
 	unsigned int data_width = info->data_width;
 	int psize = info->psize;
 	int num_elems;
@@ -155,7 +156,7 @@ static int d40_phy_fill_lli(struct d40_phy_lli *lli,
 	 * Distance to next element sized entry.
 	 * Usually the size of the element unless you want gaps.
 	 */
-	if (!is_device)
+	if (addr_inc)
 		lli->reg_elt |= (0x1 << data_width) <<
 			D40_SREG_ELEM_PHY_EIDX_POS;
 
@@ -201,40 +202,45 @@ static int d40_seg_size(int size, int data_width1, int data_width2)
 
 static struct d40_phy_lli *
 d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
-		   dma_addr_t lli_phys, u32 reg_cfg, bool term_int,
-		   bool is_device, struct stedma40_half_channel_info *info,
-		   struct stedma40_half_channel_info *otherinfo)
+		   dma_addr_t lli_phys, u32 reg_cfg,
+		   struct stedma40_half_channel_info *info,
+		   struct stedma40_half_channel_info *otherinfo,
+		   unsigned long flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
 	int err;
 	dma_addr_t next = lli_phys;
 	int size_rest = size;
 	int size_seg = 0;
 
+	/*
+	 * This piece may be split up based on d40_seg_size(); we only want the
+	 * term int on the last part.
+	 */
+	if (term_int)
+		flags &= ~LLI_TERM_INT;
+
 	do {
 		size_seg = d40_seg_size(size_rest, info->data_width,
 					otherinfo->data_width);
 		size_rest -= size_seg;
 
-		if (term_int && size_rest == 0)
+		if (term_int && size_rest == 0) {
 			next = 0;
-		else
+			flags |= LLI_TERM_INT;
+		} else
 			next = ALIGN(next + sizeof(struct d40_phy_lli),
 				     D40_LLI_ALIGN);
 
-		err = d40_phy_fill_lli(lli,
-				       addr,
-				       size_seg,
-				       next,
-				       reg_cfg,
-				       !next,
-				       is_device,
-				       info);
+		err = d40_phy_fill_lli(lli, addr, size_seg, next,
+				       reg_cfg, info, flags);
 
 		if (err)
 			goto err;
 
 		lli++;
-		if (!is_device)
+		if (addr_inc)
 			addr += size_seg;
 	} while (size_rest);
 
@@ -256,31 +262,29 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 	int total_size = 0;
 	int i;
 	struct scatterlist *current_sg = sg;
-	dma_addr_t dst;
 	struct d40_phy_lli *lli = lli_sg;
 	dma_addr_t l_phys = lli_phys;
+	unsigned long flags = 0;
+
+	if (!target)
+		flags |= LLI_ADDR_INC;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t dst = target ?: sg_addr;
 
 		total_size += sg_dma_len(current_sg);
 
-		if (target)
-			dst = target;
-		else
-			dst = sg_dma_address(current_sg);
+		if (i == sg_len - 1)
+			flags |= LLI_TERM_INT;
 
 		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
 			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
 
-		lli = d40_phy_buf_to_lli(lli,
-					 dst,
-					 sg_dma_len(current_sg),
-					 l_phys,
-					 reg_cfg,
-					 sg_len - 1 == i,
-					 target == dst,
-					 info,
-					 otherinfo);
+		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys,
+					 reg_cfg, info, otherinfo, flags);
+
 		if (lli == NULL)
 			return -EINVAL;
 	}
@@ -343,8 +347,10 @@ static void d40_log_fill_lli(struct d40_log_lli *lli,
 			     dma_addr_t data, u32 data_size,
 			     u32 reg_cfg,
 			     u32 data_width,
-			     bool addr_inc)
+			     unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
+
 	lli->lcsp13 = reg_cfg;
 
 	/* The number of elements to transfer */
@@ -369,8 +375,9 @@ static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				       u32 lcsp13, /* src or dst*/
 				       u32 data_width1,
 				       u32 data_width2,
-				       bool addr_inc)
+				       unsigned int flags)
 {
+	bool addr_inc = flags & LLI_ADDR_INC;
 	struct d40_log_lli *lli = lli_sg;
 	int size_rest = size;
 	int size_seg = 0;
@@ -383,7 +390,7 @@ static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
 				 addr,
 				 size_seg,
 				 lcsp13, data_width1,
-				 addr_inc);
+				 flags);
 		if (addr_inc)
 			addr += size_seg;
 		lli++;
@@ -403,7 +410,10 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 	struct scatterlist *current_sg = sg;
 	int i;
 	struct d40_log_lli *lli = lli_sg;
-	bool autoinc = !dev_addr;
+	unsigned long flags = 0;
+
+	if (!dev_addr)
+		flags |= LLI_ADDR_INC;
 
 	for_each_sg(sg, current_sg, sg_len, i) {
 		dma_addr_t sg_addr = sg_dma_address(current_sg);
@@ -416,7 +426,7 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 					 lcsp13,
 					 data_width1,
 					 data_width2,
-					 autoinc);
+					 flags);
 	}
 
 	return total_size;
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 4626c88..59e72f0 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -293,6 +293,11 @@ struct d40_def_lcsp {
 
 /* Physical channels */
 
+enum d40_lli_flags {
+	LLI_ADDR_INC	= 1 << 0,
+	LLI_TERM_INT	= 1 << 1,
+};
+
 void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
 		 u32 *src_cfg,
 		 u32 *dst_cfg,
-- 
cgit v0.10.2


From e65889c75ccb5b64dfb60f32e2d9448446cabcc7 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:31 +0100
Subject: dma40: extract lcla code into separate function

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index d9dfda2..4ec96ac 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -521,54 +521,62 @@ static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
 	writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
 }
 
-static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
-{
-	int curr_lcla = -EINVAL, next_lcla;
-
-	if (chan_is_physical(d40c)) {
-		d40_phy_lli_load(d40c, d40d);
-		d40d->lli_current = d40d->lli_len;
-	} else {
-
-		if ((d40d->lli_len - d40d->lli_current) > 1)
-			curr_lcla = d40_lcla_alloc_one(d40c, d40d);
-
-		d40_log_lli_lcpa_write(d40c->lcpa,
-				       &d40d->lli_log.dst[d40d->lli_current],
-				       &d40d->lli_log.src[d40d->lli_current],
-				       curr_lcla);
-
-		d40d->lli_current++;
-		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
-			unsigned int lcla_offset = d40c->phy_chan->num * 1024 +
-						   8 * curr_lcla * 2;
-			struct d40_lcla_pool *pool = &d40c->base->lcla_pool;
-			struct d40_log_lli *lcla = pool->base + lcla_offset;
-
-			if (d40d->lli_current + 1 < d40d->lli_len)
-				next_lcla = d40_lcla_alloc_one(d40c, d40d);
-			else
-				next_lcla = -EINVAL;
-
-			d40_log_lli_lcla_write(lcla,
-					       &d40d->lli_log.dst[d40d->lli_current],
-					       &d40d->lli_log.src[d40d->lli_current],
-					       next_lcla);
+static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_lcla_pool *pool = &chan->base->lcla_pool;
+	struct d40_log_lli_bidir *lli = &desc->lli_log;
+	int lli_current = desc->lli_current;
+	int lli_len = desc->lli_len;
+	int curr_lcla = -EINVAL;
+
+	if (lli_len - lli_current > 1)
+		curr_lcla = d40_lcla_alloc_one(chan, desc);
+
+	d40_log_lli_lcpa_write(chan->lcpa,
+			       &lli->dst[lli_current],
+			       &lli->src[lli_current],
+			       curr_lcla);
+
+	lli_current++;
+	for (; lli_current < lli_len; lli_current++) {
+		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
+					   8 * curr_lcla * 2;
+		struct d40_log_lli *lcla = pool->base + lcla_offset;
+		int next_lcla;
+
+		if (lli_current + 1 < lli_len)
+			next_lcla = d40_lcla_alloc_one(chan, desc);
+		else
+			next_lcla = -EINVAL;
 
-			dma_sync_single_range_for_device(d40c->base->dev,
-						pool->dma_addr, lcla_offset,
-						2 * sizeof(struct d40_log_lli),
-						DMA_TO_DEVICE);
+		d40_log_lli_lcla_write(lcla,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       next_lcla);
 
-			curr_lcla = next_lcla;
+		dma_sync_single_range_for_device(chan->base->dev,
+					pool->dma_addr, lcla_offset,
+					2 * sizeof(struct d40_log_lli),
+					DMA_TO_DEVICE);
 
-			if (curr_lcla == -EINVAL) {
-				d40d->lli_current++;
-				break;
-			}
+		curr_lcla = next_lcla;
 
+		if (curr_lcla == -EINVAL) {
+			lli_current++;
+			break;
 		}
 	}
+
+	desc->lli_current = lli_current;
+}
+
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (chan_is_physical(d40c)) {
+		d40_phy_lli_load(d40c, d40d);
+		d40d->lli_current = d40d->lli_len;
+	} else
+		d40_log_lli_to_lcxa(d40c, d40d);
 }
 
 static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
-- 
cgit v0.10.2


From 6045f0bb2818393a44e835454db96709cb5b3d80 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:32 +0100
Subject: dma40: handle failure to allocate first LCLA

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 4ec96ac..b8cce85 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -538,6 +538,10 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 			       curr_lcla);
 
 	lli_current++;
+
+	if (curr_lcla < 0)
+		goto out;
+
 	for (; lli_current < lli_len; lli_current++) {
 		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
 					   8 * curr_lcla * 2;
@@ -567,6 +571,7 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 		}
 	}
 
+out:
 	desc->lli_current = lli_current;
 }
 
-- 
cgit v0.10.2


From 7ad74a7cf6f6355fd3f4c15afe63460fc4ec3f57 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:33 +0100
Subject: dma40: fix DMA_SG capability and channels

The DMA_SG cap is enabled on the wrong channel, and the pointers are repeatedly
set incorrectly.  Fix it and combine the ops settings to a common function.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index b8cce85..929fd8f 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2238,6 +2238,32 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 	}
 }
 
+static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
+{
+	if (dma_has_cap(DMA_SLAVE, dev->cap_mask))
+		dev->device_prep_slave_sg = d40_prep_slave_sg;
+
+	if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
+		dev->device_prep_dma_memcpy = d40_prep_memcpy;
+
+		/*
+		 * This controller can only access address at even
+		 * 32bit boundaries, i.e. 2^2
+		 */
+		dev->copy_align = 2;
+	}
+
+	if (dma_has_cap(DMA_SG, dev->cap_mask))
+		dev->device_prep_dma_sg = d40_prep_memcpy_sg;
+
+	dev->device_alloc_chan_resources = d40_alloc_chan_resources;
+	dev->device_free_chan_resources = d40_free_chan_resources;
+	dev->device_issue_pending = d40_issue_pending;
+	dev->device_tx_status = d40_tx_status;
+	dev->device_control = d40_control;
+	dev->dev = base->dev;
+}
+
 static int __init d40_dmaengine_init(struct d40_base *base,
 				     int num_reserved_chans)
 {
@@ -2249,15 +2275,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_zero(base->dma_slave.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
 
-	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
-	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_slave.device_tx_status = d40_tx_status;
-	base->dma_slave.device_issue_pending = d40_issue_pending;
-	base->dma_slave.device_control = d40_control;
-	base->dma_slave.dev = base->dev;
+	d40_ops_init(base, &base->dma_slave);
 
 	err = dma_async_device_register(&base->dma_slave);
 
@@ -2271,22 +2289,9 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_memcpy.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
-	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
-
-	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
-	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_memcpy.device_tx_status = d40_tx_status;
-	base->dma_memcpy.device_issue_pending = d40_issue_pending;
-	base->dma_memcpy.device_control = d40_control;
-	base->dma_memcpy.dev = base->dev;
-	/*
-	 * This controller can only access address at even
-	 * 32bit boundaries, i.e. 2^2
-	 */
-	base->dma_memcpy.copy_align = 2;
+	dma_cap_set(DMA_SG, base->dma_memcpy.cap_mask);
+
+	d40_ops_init(base, &base->dma_memcpy);
 
 	err = dma_async_device_register(&base->dma_memcpy);
 
@@ -2302,18 +2307,10 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_zero(base->dma_both.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
-	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
-
-	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
-	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
-	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
-	base->dma_slave.device_prep_dma_sg = d40_prep_memcpy_sg;
-	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
-	base->dma_both.device_tx_status = d40_tx_status;
-	base->dma_both.device_issue_pending = d40_issue_pending;
-	base->dma_both.device_control = d40_control;
-	base->dma_both.dev = base->dev;
-	base->dma_both.copy_align = 2;
+	dma_cap_set(DMA_SG, base->dma_both.cap_mask);
+
+	d40_ops_init(base, &base->dma_both);
+
 	err = dma_async_device_register(&base->dma_both);
 
 	if (err) {
-- 
cgit v0.10.2


From 86eb5fb61125e4646c9447a1f2ce130817dab34e Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:34 +0100
Subject: dma40: stop ongoing transfers in DMA_TERMINATE_ALL

The current implementation of DMA_TERMINATE_ALL leaves ongoing transfers
running.  Fix it.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 929fd8f..8fd0bb9 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -918,10 +918,8 @@ static bool d40_tx_is_linked(struct d40_chan *d40c)
 	return is_link;
 }
 
-static int d40_pause(struct dma_chan *chan)
+static int d40_pause(struct d40_chan *d40c)
 {
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
 	int res = 0;
 	unsigned long flags;
 
@@ -945,10 +943,8 @@ static int d40_pause(struct dma_chan *chan)
 	return res;
 }
 
-static int d40_resume(struct dma_chan *chan)
+static int d40_resume(struct d40_chan *d40c)
 {
-	struct d40_chan *d40c =
-		container_of(chan, struct d40_chan, chan);
 	int res = 0;
 	unsigned long flags;
 
@@ -978,6 +974,22 @@ no_suspend:
 	return res;
 }
 
+static int d40_terminate_all(struct d40_chan *chan)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	ret = d40_pause(chan);
+	if (!ret && chan_is_physical(chan))
+		ret = d40_channel_execute_command(chan, D40_DMA_STOP);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	d40_term_all(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return ret;
+}
+
 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct d40_chan *d40c = container_of(tx->chan,
@@ -2176,7 +2188,6 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 		       unsigned long arg)
 {
-	unsigned long flags;
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 
 	if (d40c->phy_chan == NULL) {
@@ -2186,14 +2197,11 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		spin_lock_irqsave(&d40c->lock, flags);
-		d40_term_all(d40c);
-		spin_unlock_irqrestore(&d40c->lock, flags);
-		return 0;
+		return d40_terminate_all(d40c);
 	case DMA_PAUSE:
-		return d40_pause(chan);
+		return d40_pause(d40c);
 	case DMA_RESUME:
-		return d40_resume(chan);
+		return d40_resume(d40c);
 	case DMA_SLAVE_CONFIG:
 		d40_set_runtime_config(chan,
 			(struct dma_slave_config *) arg);
-- 
cgit v0.10.2


From 0c842b551063c5f7382ac9b457992f3b34972801 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin.vincent@stericsson.com>
Date: Tue, 25 Jan 2011 11:18:35 +0100
Subject: dma40: cyclic xfer support

Support cyclic transfers, which are useful for ALSA drivers.

Acked-by: Per Forlin <per.forlin@stericsson.com>
Acked-by: Jonas Aaberg <jonas.aberg@stericsson.com>
Signed-off-by: Rabin Vincent <rabin.vincent@stericsson.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8fd0bb9..af955de 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -115,6 +115,7 @@ struct d40_desc {
 	struct list_head		 node;
 
 	bool				 is_in_client_list;
+	bool				 cyclic;
 };
 
 /**
@@ -527,17 +528,45 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 	struct d40_log_lli_bidir *lli = &desc->lli_log;
 	int lli_current = desc->lli_current;
 	int lli_len = desc->lli_len;
+	bool cyclic = desc->cyclic;
 	int curr_lcla = -EINVAL;
+	int first_lcla = 0;
+	bool linkback;
 
-	if (lli_len - lli_current > 1)
+	/*
+	 * We may have partially running cyclic transfers, in case we did't get
+	 * enough LCLA entries.
+	 */
+	linkback = cyclic && lli_current == 0;
+
+	/*
+	 * For linkback, we need one LCLA even with only one link, because we
+	 * can't link back to the one in LCPA space
+	 */
+	if (linkback || (lli_len - lli_current > 1)) {
 		curr_lcla = d40_lcla_alloc_one(chan, desc);
+		first_lcla = curr_lcla;
+	}
+
+	/*
+	 * For linkback, we normally load the LCPA in the loop since we need to
+	 * link it to the second LCLA and not the first.  However, if we
+	 * couldn't even get a first LCLA, then we have to run in LCPA and
+	 * reload manually.
+	 */
+	if (!linkback || curr_lcla == -EINVAL) {
+		unsigned int flags = 0;
 
-	d40_log_lli_lcpa_write(chan->lcpa,
-			       &lli->dst[lli_current],
-			       &lli->src[lli_current],
-			       curr_lcla);
+		if (curr_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
 
-	lli_current++;
+		d40_log_lli_lcpa_write(chan->lcpa,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       curr_lcla,
+				       flags);
+		lli_current++;
+	}
 
 	if (curr_lcla < 0)
 		goto out;
@@ -546,17 +575,33 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
 					   8 * curr_lcla * 2;
 		struct d40_log_lli *lcla = pool->base + lcla_offset;
+		unsigned int flags = 0;
 		int next_lcla;
 
 		if (lli_current + 1 < lli_len)
 			next_lcla = d40_lcla_alloc_one(chan, desc);
 		else
-			next_lcla = -EINVAL;
+			next_lcla = linkback ? first_lcla : -EINVAL;
+
+		if (cyclic || next_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
 
+		if (linkback && curr_lcla == first_lcla) {
+			/* First link goes in both LCPA and LCLA */
+			d40_log_lli_lcpa_write(chan->lcpa,
+					       &lli->dst[lli_current],
+					       &lli->src[lli_current],
+					       next_lcla, flags);
+		}
+
+		/*
+		 * One unused LCLA in the cyclic case if the very first
+		 * next_lcla fails...
+		 */
 		d40_log_lli_lcla_write(lcla,
 				       &lli->dst[lli_current],
 				       &lli->src[lli_current],
-				       next_lcla);
+				       next_lcla, flags);
 
 		dma_sync_single_range_for_device(chan->base->dev,
 					pool->dma_addr, lcla_offset,
@@ -565,7 +610,7 @@ static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
 
 		curr_lcla = next_lcla;
 
-		if (curr_lcla == -EINVAL) {
+		if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
 			lli_current++;
 			break;
 		}
@@ -1074,17 +1119,36 @@ static void dma_tc_handle(struct d40_chan *d40c)
 	if (d40d == NULL)
 		return;
 
-	d40_lcla_free_all(d40c, d40d);
+	if (d40d->cyclic) {
+		/*
+		 * If this was a paritially loaded list, we need to reloaded
+		 * it, and only when the list is completed.  We need to check
+		 * for done because the interrupt will hit for every link, and
+		 * not just the last one.
+		 */
+		if (d40d->lli_current < d40d->lli_len
+		    && !d40_tx_is_linked(d40c)
+		    && !d40_residue(d40c)) {
+			d40_lcla_free_all(d40c, d40d);
+			d40_desc_load(d40c, d40d);
+			(void) d40_start(d40c);
 
-	if (d40d->lli_current < d40d->lli_len) {
-		d40_desc_load(d40c, d40d);
-		/* Start dma job */
-		(void) d40_start(d40c);
-		return;
-	}
+			if (d40d->lli_current == d40d->lli_len)
+				d40d->lli_current = 0;
+		}
+	} else {
+		d40_lcla_free_all(d40c, d40d);
 
-	if (d40_queue_start(d40c) == NULL)
-		d40c->busy = false;
+		if (d40d->lli_current < d40d->lli_len) {
+			d40_desc_load(d40c, d40d);
+			/* Start dma job */
+			(void) d40_start(d40c);
+			return;
+		}
+
+		if (d40_queue_start(d40c) == NULL)
+			d40c->busy = false;
+	}
 
 	d40c->pending_tx++;
 	tasklet_schedule(&d40c->tasklet);
@@ -1103,11 +1167,11 @@ static void dma_tasklet(unsigned long data)
 
 	/* Get first active entry from list */
 	d40d = d40_first_active_get(d40c);
-
 	if (d40d == NULL)
 		goto err;
 
-	d40c->completed = d40d->txd.cookie;
+	if (!d40d->cyclic)
+		d40c->completed = d40d->txd.cookie;
 
 	/*
 	 * If terminating a channel pending_tx is set to zero.
@@ -1122,16 +1186,18 @@ static void dma_tasklet(unsigned long data)
 	callback = d40d->txd.callback;
 	callback_param = d40d->txd.callback_param;
 
-	if (async_tx_test_ack(&d40d->txd)) {
-		d40_pool_lli_free(d40c, d40d);
-		d40_desc_remove(d40d);
-		d40_desc_free(d40c, d40d);
-	} else {
-		if (!d40d->is_in_client_list) {
+	if (!d40d->cyclic) {
+		if (async_tx_test_ack(&d40d->txd)) {
+			d40_pool_lli_free(d40c, d40d);
 			d40_desc_remove(d40d);
-			d40_lcla_free_all(d40c, d40d);
-			list_add_tail(&d40d->node, &d40c->client);
-			d40d->is_in_client_list = true;
+			d40_desc_free(d40c, d40d);
+		} else {
+			if (!d40d->is_in_client_list) {
+				d40_desc_remove(d40d);
+				d40_lcla_free_all(d40c, d40d);
+				list_add_tail(&d40d->node, &d40c->client);
+				d40d->is_in_client_list = true;
+			}
 		}
 	}
 
@@ -1694,19 +1760,23 @@ d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
 	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
 	struct stedma40_half_channel_info *src_info = &cfg->src_info;
 	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	unsigned long flags = 0;
 	int ret;
 
+	if (desc->cyclic)
+		flags |= LLI_CYCLIC | LLI_TERM_INT;
+
 	ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
 				desc->lli_phy.src,
 				virt_to_phys(desc->lli_phy.src),
 				chan->src_def_cfg,
-				src_info, dst_info);
+				src_info, dst_info, flags);
 
 	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
 				desc->lli_phy.dst,
 				virt_to_phys(desc->lli_phy.dst),
 				chan->dst_def_cfg,
-				dst_info, src_info);
+				dst_info, src_info, flags);
 
 	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
 				   desc->lli_pool.size, DMA_TO_DEVICE);
@@ -1789,12 +1859,16 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 		return NULL;
 	}
 
+
 	spin_lock_irqsave(&chan->lock, flags);
 
 	desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
 	if (desc == NULL)
 		goto err;
 
+	if (sg_next(&sg_src[sg_len - 1]) == sg_src)
+		desc->cyclic = true;
+
 	if (direction != DMA_NONE) {
 		dma_addr_t dev_addr = d40_get_dev_addr(chan, direction);
 
@@ -2007,6 +2081,36 @@ static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
 	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
 }
 
+static struct dma_async_tx_descriptor *
+dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+		     size_t buf_len, size_t period_len,
+		     enum dma_data_direction direction)
+{
+	unsigned int periods = buf_len / period_len;
+	struct dma_async_tx_descriptor *txd;
+	struct scatterlist *sg;
+	int i;
+
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL);
+	for (i = 0; i < periods; i++) {
+		sg_dma_address(&sg[i]) = dma_addr;
+		sg_dma_len(&sg[i]) = period_len;
+		dma_addr += period_len;
+	}
+
+	sg[periods].offset = 0;
+	sg[periods].length = 0;
+	sg[periods].page_link =
+		((unsigned long)sg | 0x01) & ~0x02;
+
+	txd = d40_prep_sg(chan, sg, sg, periods, direction,
+			  DMA_PREP_INTERRUPT);
+
+	kfree(sg);
+
+	return txd;
+}
+
 static enum dma_status d40_tx_status(struct dma_chan *chan,
 				     dma_cookie_t cookie,
 				     struct dma_tx_state *txstate)
@@ -2264,6 +2368,9 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
 	if (dma_has_cap(DMA_SG, dev->cap_mask))
 		dev->device_prep_dma_sg = d40_prep_memcpy_sg;
 
+	if (dma_has_cap(DMA_CYCLIC, dev->cap_mask))
+		dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic;
+
 	dev->device_alloc_chan_resources = d40_alloc_chan_resources;
 	dev->device_free_chan_resources = d40_free_chan_resources;
 	dev->device_issue_pending = d40_issue_pending;
@@ -2282,6 +2389,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	dma_cap_zero(base->dma_slave.cap_mask);
 	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
 
 	d40_ops_init(base, &base->dma_slave);
 
@@ -2316,9 +2424,9 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
 	dma_cap_set(DMA_SG, base->dma_both.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
 
 	d40_ops_init(base, &base->dma_both);
-
 	err = dma_async_device_register(&base->dma_both);
 
 	if (err) {
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index 88b9e37..cad9e1d 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -202,13 +202,15 @@ static int d40_seg_size(int size, int data_width1, int data_width2)
 
 static struct d40_phy_lli *
 d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
-		   dma_addr_t lli_phys, u32 reg_cfg,
+		   dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg,
 		   struct stedma40_half_channel_info *info,
 		   struct stedma40_half_channel_info *otherinfo,
 		   unsigned long flags)
 {
+	bool lastlink = flags & LLI_LAST_LINK;
 	bool addr_inc = flags & LLI_ADDR_INC;
 	bool term_int = flags & LLI_TERM_INT;
+	bool cyclic = flags & LLI_CYCLIC;
 	int err;
 	dma_addr_t next = lli_phys;
 	int size_rest = size;
@@ -226,10 +228,12 @@ d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
 					otherinfo->data_width);
 		size_rest -= size_seg;
 
-		if (term_int && size_rest == 0) {
-			next = 0;
+		if (size_rest == 0 && term_int)
 			flags |= LLI_TERM_INT;
-		} else
+
+		if (size_rest == 0 && lastlink)
+			next = cyclic ? first_phys : 0;
+		else
 			next = ALIGN(next + sizeof(struct d40_phy_lli),
 				     D40_LLI_ALIGN);
 
@@ -257,14 +261,14 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
 		      struct stedma40_half_channel_info *info,
-		      struct stedma40_half_channel_info *otherinfo)
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags)
 {
 	int total_size = 0;
 	int i;
 	struct scatterlist *current_sg = sg;
 	struct d40_phy_lli *lli = lli_sg;
 	dma_addr_t l_phys = lli_phys;
-	unsigned long flags = 0;
 
 	if (!target)
 		flags |= LLI_ADDR_INC;
@@ -277,12 +281,12 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		total_size += sg_dma_len(current_sg);
 
 		if (i == sg_len - 1)
-			flags |= LLI_TERM_INT;
+			flags |= LLI_TERM_INT | LLI_LAST_LINK;
 
 		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
 			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
 
-		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys,
+		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys,
 					 reg_cfg, info, otherinfo, flags);
 
 		if (lli == NULL)
@@ -297,15 +301,18 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 
 static void d40_log_lli_link(struct d40_log_lli *lli_dst,
 			     struct d40_log_lli *lli_src,
-			     int next)
+			     int next, unsigned int flags)
 {
+	bool interrupt = flags & LLI_TERM_INT;
 	u32 slos = 0;
 	u32 dlos = 0;
 
 	if (next != -EINVAL) {
 		slos = next * 2;
 		dlos = next * 2 + 1;
-	} else {
+	}
+
+	if (interrupt) {
 		lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
 		lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
 	}
@@ -320,9 +327,9 @@ static void d40_log_lli_link(struct d40_log_lli *lli_dst,
 void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 			   struct d40_log_lli *lli_dst,
 			   struct d40_log_lli *lli_src,
-			   int next)
+			   int next, unsigned int flags)
 {
-	d40_log_lli_link(lli_dst, lli_src, next);
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
 
 	writel(lli_src->lcsp02, &lcpa[0].lcsp0);
 	writel(lli_src->lcsp13, &lcpa[0].lcsp1);
@@ -333,9 +340,9 @@ void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
 			   struct d40_log_lli *lli_dst,
 			   struct d40_log_lli *lli_src,
-			   int next)
+			   int next, unsigned int flags)
 {
-	d40_log_lli_link(lli_dst, lli_src, next);
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
 
 	writel(lli_src->lcsp02, &lcla[0].lcsp02);
 	writel(lli_src->lcsp13, &lcla[0].lcsp13);
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
index 59e72f0..195ee65 100644
--- a/drivers/dma/ste_dma40_ll.h
+++ b/drivers/dma/ste_dma40_ll.h
@@ -296,6 +296,8 @@ struct d40_def_lcsp {
 enum d40_lli_flags {
 	LLI_ADDR_INC	= 1 << 0,
 	LLI_TERM_INT	= 1 << 1,
+	LLI_CYCLIC	= 1 << 2,
+	LLI_LAST_LINK	= 1 << 3,
 };
 
 void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
@@ -314,7 +316,8 @@ int d40_phy_sg_to_lli(struct scatterlist *sg,
 		      dma_addr_t lli_phys,
 		      u32 reg_cfg,
 		      struct stedma40_half_channel_info *info,
-		      struct stedma40_half_channel_info *otherinfo);
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags);
 
 /* Logical channels */
 
@@ -328,11 +331,11 @@ int d40_log_sg_to_lli(struct scatterlist *sg,
 void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
 			    struct d40_log_lli *lli_dst,
 			    struct d40_log_lli *lli_src,
-			    int next);
+			    int next, unsigned int flags);
 
 void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
 			    struct d40_log_lli *lli_dst,
 			    struct d40_log_lli *lli_src,
-			    int next);
+			    int next, unsigned int flags);
 
 #endif /* STE_DMA40_LLI_H */
-- 
cgit v0.10.2


From 3ff84a7f36554b257cd57325b1a7c1fa4b49fbe3 Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@kernel.org>
Date: Mon, 14 Feb 2011 17:46:21 +0200
Subject: Revert "slab: Fix missing DEBUG_SLAB last user"

This reverts commit 5c5e3b33b7cb959a401f823707bee006caadd76e.

The commit breaks ARM thusly:

| Mount-cache hash table entries: 512
| slab error in verify_redzone_free(): cache `idr_layer_cache': memory outside object was overwritten
| Backtrace:
| [<c0227088>] (dump_backtrace+0x0/0x110) from [<c0431afc>] (dump_stack+0x18/0x1c)
| [<c0431ae4>] (dump_stack+0x0/0x1c) from [<c0293304>] (__slab_error+0x28/0x30)
| [<c02932dc>] (__slab_error+0x0/0x30) from [<c0293a74>] (cache_free_debugcheck+0x1c0/0x2b8)
| [<c02938b4>] (cache_free_debugcheck+0x0/0x2b8) from [<c0293f78>] (kmem_cache_free+0x3c/0xc0)
| [<c0293f3c>] (kmem_cache_free+0x0/0xc0) from [<c032b1c8>] (ida_get_new_above+0x19c/0x1c0)
| [<c032b02c>] (ida_get_new_above+0x0/0x1c0) from [<c02af7ec>] (alloc_vfsmnt+0x54/0x144)
| [<c02af798>] (alloc_vfsmnt+0x0/0x144) from [<c0299830>] (vfs_kern_mount+0x30/0xec)
| [<c0299800>] (vfs_kern_mount+0x0/0xec) from [<c0299908>] (kern_mount_data+0x1c/0x20)
| [<c02998ec>] (kern_mount_data+0x0/0x20) from [<c02146c4>] (sysfs_init+0x68/0xc8)
| [<c021465c>] (sysfs_init+0x0/0xc8) from [<c02137d4>] (mnt_init+0x90/0x1b0)
| [<c0213744>] (mnt_init+0x0/0x1b0) from [<c0213388>] (vfs_caches_init+0x100/0x140)
| [<c0213288>] (vfs_caches_init+0x0/0x140) from [<c0208c0c>] (start_kernel+0x2e8/0x368)
| [<c0208924>] (start_kernel+0x0/0x368) from [<c0208034>] (__enable_mmu+0x0/0x2c)
| c0113268: redzone 1:0xd84156c5c032b3ac, redzone 2:0xd84156c5635688c0.
| slab error in cache_alloc_debugcheck_after(): cache `idr_layer_cache': double free, or memory outside object was overwritten
| ...
| c011307c: redzone 1:0x9f91102ffffffff, redzone 2:0x9f911029d74e35b
| slab: Internal list corruption detected in cache 'idr_layer_cache'(24), slabp c0113000(16). Hexdump:
|
| 000: 20 4f 10 c0 20 4f 10 c0 7c 00 00 00 7c 30 11 c0
| 010: 10 00 00 00 10 00 00 00 00 00 c9 17 fe ff ff ff
| 020: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff
| 030: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff
| 040: fe ff ff ff fe ff ff ff fe ff ff ff fe ff ff ff
| 050: fe ff ff ff fe ff ff ff fe ff ff ff 11 00 00 00
| 060: 12 00 00 00 13 00 00 00 14 00 00 00 15 00 00 00
| 070: 16 00 00 00 17 00 00 00 c0 88 56 63
| kernel BUG at /home/rmk/git/linux-2.6-rmk/mm/slab.c:2928!

Reference: https://lkml.org/lkml/2011/2/7/238
Cc: <stable@kernel.org> # 2.6.35.y and later
Reported-and-analyzed-by: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slab.c b/mm/slab.c
index 37961d1..4c6e2e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2288,8 +2288,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	if (ralign < align) {
 		ralign = align;
 	}
-	/* disable debug if not aligning with REDZONE_ALIGN */
-	if (ralign & (__alignof__(unsigned long long) - 1))
+	/* disable debug if necessary */
+	if (ralign > __alignof__(unsigned long long))
 		flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
 	/*
 	 * 4) Store it.
@@ -2315,8 +2315,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 	 */
 	if (flags & SLAB_RED_ZONE) {
 		/* add space for red zone words */
-		cachep->obj_offset += align;
-		size += align + sizeof(unsigned long long);
+		cachep->obj_offset += sizeof(unsigned long long);
+		size += 2 * sizeof(unsigned long long);
 	}
 	if (flags & SLAB_STORE_USER) {
 		/* user store requires one word storage behind the end of
-- 
cgit v0.10.2


From b3d41885d9cd0d9db31c8f49e362bae02c96fa3f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Feb 2011 18:35:22 +0100
Subject: slub: fix kmemcheck calls to match ksize() hints

Recent use of ksize() in network stack (commit ca44ac38 : net: don't
reallocate skb->head unless the current one hasn't the needed extra size
or is shared) triggers kmemcheck warnings, because ksize() can return
more space than kmemcheck is aware of.

Pekka Enberg noticed SLAB+kmemcheck is doing the right thing, while SLUB
+kmemcheck doesnt.

Bugzilla reference #27212

Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Suggested-by: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Christoph Lameter <cl@linux.com>
CC: Changli Gao <xiaosuo@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index d2f343a..217b5b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -797,10 +797,34 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 	return should_failslab(s->objsize, flags, s->flags);
 }
 
+static inline size_t slab_ksize(const struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_DEBUG
+	/*
+	 * Debugging requires use of the padding between object
+	 * and whatever may come after it.
+	 */
+	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
+		return s->objsize;
+
+#endif
+	/*
+	 * If we have the need to store the freelist pointer
+	 * back there or track user information then we can
+	 * only use the space before that information.
+	 */
+	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+		return s->inuse;
+	/*
+	 * Else we can use all the padding etc for the allocation
+	 */
+	return s->size;
+}
+
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
 {
 	flags &= gfp_allowed_mask;
-	kmemcheck_slab_alloc(s, flags, object, s->objsize);
+	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
 	kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
 }
 
@@ -2690,7 +2714,6 @@ EXPORT_SYMBOL(__kmalloc_node);
 size_t ksize(const void *object)
 {
 	struct page *page;
-	struct kmem_cache *s;
 
 	if (unlikely(object == ZERO_SIZE_PTR))
 		return 0;
@@ -2701,28 +2724,8 @@ size_t ksize(const void *object)
 		WARN_ON(!PageCompound(page));
 		return PAGE_SIZE << compound_order(page);
 	}
-	s = page->slab;
 
-#ifdef CONFIG_SLUB_DEBUG
-	/*
-	 * Debugging requires use of the padding between object
-	 * and whatever may come after it.
-	 */
-	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
-		return s->objsize;
-
-#endif
-	/*
-	 * If we have the need to store the freelist pointer
-	 * back there or track user information then we can
-	 * only use the space before that information.
-	 */
-	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
-		return s->inuse;
-	/*
-	 * Else we can use all the padding etc for the allocation
-	 */
-	return s->size;
+	return slab_ksize(page->slab);
 }
 EXPORT_SYMBOL(ksize);
 
-- 
cgit v0.10.2


From c5a9f9d0895b2c16908979244d3d678fd6db0545 Mon Sep 17 00:00:00 2001
From: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Date: Fri, 18 Feb 2011 10:01:20 +0530
Subject: pch_dma: fix kernel error issue

fix the following kernel error

------------[ cut here ]------------
WARNING: at kernel/softirq.c:159 _local_bh_enable_ip.clone.5+0x35/0x71()
Hardware name: To be filled by O.E.M.
Modules linked in: pch_uart pch_dma fuse mga drm cpufreq_ondemand acpi_cpufreq mperf ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 uinput snd_hda_codec_realtek snd_hda_intel snd_hda_codec matroxfb_base snd_hwdep 8250_pnp snd_seq snd_seq_device matroxfb_DAC1064 snd_pcm joydev 8250 matroxfb_accel snd_timer matroxfb_Ti3026 ppdev pegasus parport_pc snd parport matroxfb_g450 g450_pll serial_core video output matroxfb_misc soundcore snd_page_alloc serio_raw pcspkr ext4 jbd2 crc16 sdhci_pci sdhci mmc_core floppy [last unloaded: scsi_wait_scan]
Pid: 0, comm: swapper Not tainted 2.6.37.upstream_check+ #8
Call Trace:
 [<c0433add>] warn_slowpath_common+0x65/0x7a
 [<c043825b>] ? _local_bh_enable_ip.clone.5+0x35/0x71
 [<c0433b01>] warn_slowpath_null+0xf/0x13
 [<c043825b>] _local_bh_enable_ip.clone.5+0x35/0x71
 [<c043829f>] local_bh_enable_ip+0x8/0xa
 [<c06ec471>] _raw_spin_unlock_bh+0x10/0x12
 [<f82b57dd>] pd_prep_slave_sg+0xba/0x200 [pch_dma]
 [<f82f7b7a>] pch_uart_interrupt+0x44d/0x6aa [pch_uart]
 [<c046fa97>] handle_IRQ_event+0x1d/0x9e
 [<c047146f>] handle_fasteoi_irq+0x90/0xc7
 [<c04713df>] ? handle_fasteoi_irq+0x0/0xc7
 <IRQ>  [<c04045af>] ? do_IRQ+0x3e/0x89
 [<c04035a9>] ? common_interrupt+0x29/0x30
 [<c04400d8>] ? sys_getpriority+0x12d/0x1a2
 [<c058bb2b>] ? arch_local_irq_enable+0x5/0xb
 [<c058c740>] ? acpi_idle_enter_bm+0x22a/0x261
 [<c0648b11>] ? cpuidle_idle_call+0x70/0xa1
 [<c0401f44>] ? cpu_idle+0x49/0x6a
 [<c06d9fc4>] ? rest_init+0x58/0x5a
 [<c089e762>] ? start_kernel+0x2d0/0x2d5
 [<c089e0ce>] ? i386_start_kernel+0xce/0xd5

Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 1c38418..bf2ddd6 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -366,7 +366,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 	struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
 	dma_cookie_t cookie;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock(&pd_chan->lock);
 	cookie = pdc_assign_cookie(pd_chan, desc);
 
 	if (list_empty(&pd_chan->active_list)) {
@@ -376,7 +376,7 @@ static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
 		list_add_tail(&desc->desc_node, &pd_chan->queue);
 	}
 
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock(&pd_chan->lock);
 	return 0;
 }
 
@@ -386,7 +386,7 @@ static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
 	struct pch_dma *pd = to_pd(chan->device);
 	dma_addr_t addr;
 
-	desc = pci_pool_alloc(pd->pool, GFP_KERNEL, &addr);
+	desc = pci_pool_alloc(pd->pool, flags, &addr);
 	if (desc) {
 		memset(desc, 0, sizeof(struct pch_dma_desc));
 		INIT_LIST_HEAD(&desc->tx_list);
@@ -405,7 +405,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
 	struct pch_dma_desc *ret = NULL;
 	int i;
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock(&pd_chan->lock);
 	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
 		i++;
 		if (async_tx_test_ack(&desc->txd)) {
@@ -415,15 +415,15 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
 		}
 		dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
 	}
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock(&pd_chan->lock);
 	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
 
 	if (!ret) {
 		ret = pdc_alloc_desc(&pd_chan->chan, GFP_NOIO);
 		if (ret) {
-			spin_lock_bh(&pd_chan->lock);
+			spin_lock(&pd_chan->lock);
 			pd_chan->descs_allocated++;
-			spin_unlock_bh(&pd_chan->lock);
+			spin_unlock(&pd_chan->lock);
 		} else {
 			dev_err(chan2dev(&pd_chan->chan),
 				"failed to alloc desc\n");
@@ -437,10 +437,10 @@ static void pdc_desc_put(struct pch_dma_chan *pd_chan,
 			 struct pch_dma_desc *desc)
 {
 	if (desc) {
-		spin_lock_bh(&pd_chan->lock);
+		spin_lock(&pd_chan->lock);
 		list_splice_init(&desc->tx_list, &pd_chan->free_list);
 		list_add(&desc->desc_node, &pd_chan->free_list);
-		spin_unlock_bh(&pd_chan->lock);
+		spin_unlock(&pd_chan->lock);
 	}
 }
 
@@ -530,9 +530,9 @@ static void pd_issue_pending(struct dma_chan *chan)
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 
 	if (pdc_is_idle(pd_chan)) {
-		spin_lock_bh(&pd_chan->lock);
+		spin_lock(&pd_chan->lock);
 		pdc_advance_work(pd_chan);
-		spin_unlock_bh(&pd_chan->lock);
+		spin_unlock(&pd_chan->lock);
 	}
 }
 
@@ -592,7 +592,6 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
 			goto err_desc_get;
 		}
 
-
 		if (!first) {
 			first = desc;
 		} else {
@@ -641,13 +640,13 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 
 	spin_unlock_bh(&pd_chan->lock);
 
-
 	return 0;
 }
 
 static void pdc_tasklet(unsigned long data)
 {
 	struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data;
+	unsigned long flags;
 
 	if (!pdc_is_idle(pd_chan)) {
 		dev_err(chan2dev(&pd_chan->chan),
@@ -655,12 +654,12 @@ static void pdc_tasklet(unsigned long data)
 		return;
 	}
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irqsave(&pd_chan->lock, flags);
 	if (test_and_clear_bit(0, &pd_chan->err_status))
 		pdc_handle_error(pd_chan);
 	else
 		pdc_advance_work(pd_chan);
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irqrestore(&pd_chan->lock, flags);
 }
 
 static irqreturn_t pd_irq(int irq, void *devid)
-- 
cgit v0.10.2


From 26d890f0d09fd58f7194aad651e86283cb9e6574 Mon Sep 17 00:00:00 2001
From: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Date: Fri, 18 Feb 2011 10:01:21 +0530
Subject: pch_dma: set the number of array correctly

set the number of array correctly.

Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index bf2ddd6..b1dfba7 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -82,7 +82,7 @@ struct pch_dma_regs {
 	u32	dma_sts1;
 	u32	reserved2;
 	u32	reserved3;
-	struct pch_dma_desc_regs desc[0];
+	struct pch_dma_desc_regs desc[MAX_CHAN_NR];
 };
 
 struct pch_dma_desc {
@@ -124,7 +124,7 @@ struct pch_dma {
 	struct pci_pool		*pool;
 	struct pch_dma_regs	regs;
 	struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
-	struct pch_dma_chan	channels[0];
+	struct pch_dma_chan	channels[MAX_CHAN_NR];
 };
 
 #define PCH_DMA_CTL0	0x00
-- 
cgit v0.10.2


From d71f606f687ef9d0cdddfd3619ca7cb9a0b3fb63 Mon Sep 17 00:00:00 2001
From: Mariusz Kozlowski <mk@lab.zgora.pl>
Date: Sat, 26 Feb 2011 20:10:26 +0100
Subject: slub: fix ksize() build error

mm/slub.c: In function 'ksize':
mm/slub.c:2728: error: implicit declaration of function 'slab_ksize'

slab_ksize() needs to go out of CONFIG_SLUB_DEBUG section.

Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Mariusz Kozlowski <mk@lab.zgora.pl>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index 217b5b5..ea6f039 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -281,6 +281,30 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
 	return (p - addr) / s->size;
 }
 
+static inline size_t slab_ksize(const struct kmem_cache *s)
+{
+#ifdef CONFIG_SLUB_DEBUG
+	/*
+	 * Debugging requires use of the padding between object
+	 * and whatever may come after it.
+	 */
+	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
+		return s->objsize;
+
+#endif
+	/*
+	 * If we have the need to store the freelist pointer
+	 * back there or track user information then we can
+	 * only use the space before that information.
+	 */
+	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
+		return s->inuse;
+	/*
+	 * Else we can use all the padding etc for the allocation
+	 */
+	return s->size;
+}
+
 static inline struct kmem_cache_order_objects oo_make(int order,
 						unsigned long size)
 {
@@ -797,30 +821,6 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 	return should_failslab(s->objsize, flags, s->flags);
 }
 
-static inline size_t slab_ksize(const struct kmem_cache *s)
-{
-#ifdef CONFIG_SLUB_DEBUG
-	/*
-	 * Debugging requires use of the padding between object
-	 * and whatever may come after it.
-	 */
-	if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
-		return s->objsize;
-
-#endif
-	/*
-	 * If we have the need to store the freelist pointer
-	 * back there or track user information then we can
-	 * only use the space before that information.
-	 */
-	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
-		return s->inuse;
-	/*
-	 * Else we can use all the padding etc for the allocation
-	 */
-	return s->size;
-}
-
 static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
 {
 	flags &= gfp_allowed_mask;
-- 
cgit v0.10.2


From 173442f2787c88e1ed1bb62aaeb6fd9127720559 Mon Sep 17 00:00:00 2001
From: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Date: Tue, 1 Mar 2011 14:16:23 +0900
Subject: i2c-eg20t: support new device OKI SEMICONDUCTOR ML7213 IOH

Support new device OKI SEMICONDUCTOR ML7213 IOH.
The ML7213 which is for IVI(In-Vehicle Infotainment) is a companion
chip for the Atom E6xx series and compatible with the Intel EG20T
PCH.

Signed-off-by: Tomoya MORINAGA <tomoya-linux@dsn.okisemi.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 113505a..5b592df 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -639,12 +639,15 @@ config I2C_XILINX
 	  will be called xilinx_i2c.
 
 config I2C_EG20T
-        tristate "PCH I2C of Intel EG20T"
-        depends on PCI
-        help
-          This driver is for PCH(Platform controller Hub) I2C of EG20T which
-          is an IOH(Input/Output Hub) for x86 embedded processor.
-          This driver can access PCH I2C bus device.
+	tristate "Intel EG20T PCH/OKI SEMICONDUCTOR ML7213 IOH"
+	depends on PCI
+	help
+	  This driver is for PCH(Platform controller Hub) I2C of EG20T which
+	  is an IOH(Input/Output Hub) for x86 embedded processor.
+	  This driver can access PCH I2C bus device.
+
+	  This driver also supports the ML7213, a companion chip for the
+	  Atom E6xx series and compatible with the Intel EG20T PCH.
 
 comment "External I2C/SMBus adapter drivers"
 
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd..c57c837 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -131,6 +131,13 @@
 #define pch_pci_dbg(pdev, fmt, arg...)  \
 	dev_dbg(&pdev->dev, "%s :" fmt, __func__, ##arg)
 
+/*
+Set the number of I2C instance max
+Intel EG20T PCH :		1ch
+OKI SEMICONDUCTOR ML7213 IOH :	2ch
+*/
+#define PCH_I2C_MAX_DEV			2
+
 /**
  * struct i2c_algo_pch_data - for I2C driver functionalities
  * @pch_adapter:		stores the reference to i2c_adapter structure
@@ -155,12 +162,14 @@ struct i2c_algo_pch_data {
  * @pch_data:		stores a list of i2c_algo_pch_data
  * @pch_i2c_suspended:	specifies whether the system is suspended or not
  *			perhaps with more lines and words.
+ * @ch_num:		specifies the number of i2c instance
  *
  * pch_data has as many elements as maximum I2C channels
  */
 struct adapter_info {
-	struct i2c_algo_pch_data pch_data;
+	struct i2c_algo_pch_data pch_data[PCH_I2C_MAX_DEV];
 	bool pch_i2c_suspended;
+	int ch_num;
 };
 
 
@@ -169,8 +178,13 @@ static int pch_clk = 50000;	/* specifies I2C clock speed in KHz */
 static wait_queue_head_t pch_event;
 static DEFINE_MUTEX(pch_mutex);
 
+/* Definition for ML7213 by OKI SEMICONDUCTOR */
+#define PCI_VENDOR_ID_ROHM		0x10DB
+#define PCI_DEVICE_ID_ML7213_I2C	0x802D
+
 static struct pci_device_id __devinitdata pch_pcidev_id[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PCH_I2C)},
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH_I2C),   1, },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_I2C), 2, },
 	{0,}
 };
 
@@ -211,8 +225,7 @@ static void pch_i2c_init(struct i2c_algo_pch_data *adap)
 	/* Initialize I2C registers */
 	iowrite32(0x21, p + PCH_I2CNF);
 
-	pch_setbit(adap->pch_base_address, PCH_I2CCTL,
-			  PCH_I2CCTL_I2CMEN);
+	pch_setbit(adap->pch_base_address, PCH_I2CCTL, PCH_I2CCTL_I2CMEN);
 
 	if (pch_i2c_speed != 400)
 		pch_i2c_speed = 100;
@@ -254,7 +267,7 @@ static inline bool ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
  * @timeout:	waiting time counter (us).
  */
 static s32 pch_i2c_wait_for_bus_idle(struct i2c_algo_pch_data *adap,
-				 s32 timeout)
+				     s32 timeout)
 {
 	void __iomem *p = adap->pch_base_address;
 
@@ -474,8 +487,8 @@ static void pch_i2c_sendnack(struct i2c_algo_pch_data *adap)
  * @last:	specifies whether last message or not.
  * @first:	specifies whether first message or not.
  */
-s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs,
-		  u32 last, u32 first)
+static s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs,
+			     u32 last, u32 first)
 {
 	struct i2c_algo_pch_data *adap = i2c_adap->algo_data;
 
@@ -568,10 +581,10 @@ s32 pch_i2c_readbytes(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs,
 }
 
 /**
- * pch_i2c_cb_ch0() - Interrupt handler Call back function
+ * pch_i2c_cb() - Interrupt handler Call back function
  * @adap:	Pointer to struct i2c_algo_pch_data.
  */
-static void pch_i2c_cb_ch0(struct i2c_algo_pch_data *adap)
+static void pch_i2c_cb(struct i2c_algo_pch_data *adap)
 {
 	u32 sts;
 	void __iomem *p = adap->pch_base_address;
@@ -599,24 +612,30 @@ static void pch_i2c_cb_ch0(struct i2c_algo_pch_data *adap)
  */
 static irqreturn_t pch_i2c_handler(int irq, void *pData)
 {
-	s32 reg_val;
-
-	struct i2c_algo_pch_data *adap_data = (struct i2c_algo_pch_data *)pData;
-	void __iomem *p = adap_data->pch_base_address;
-	u32 mode = ioread32(p + PCH_I2CMOD) & (BUFFER_MODE | EEPROM_SR_MODE);
-
-	if (mode != NORMAL_MODE) {
-		pch_err(adap_data, "I2C mode is not supported\n");
-		return IRQ_NONE;
+	u32 reg_val;
+	int flag;
+	int i;
+	struct adapter_info *adap_info = pData;
+	void __iomem *p;
+	u32 mode;
+
+	for (i = 0, flag = 0; i < adap_info->ch_num; i++) {
+		p = adap_info->pch_data[i].pch_base_address;
+		mode = ioread32(p + PCH_I2CMOD);
+		mode &= BUFFER_MODE | EEPROM_SR_MODE;
+		if (mode != NORMAL_MODE) {
+			pch_err(adap_info->pch_data,
+				"I2C-%d mode(%d) is not supported\n", mode, i);
+			continue;
+		}
+		reg_val = ioread32(p + PCH_I2CSR);
+		if (reg_val & (I2CMAL_BIT | I2CMCF_BIT | I2CMIF_BIT)) {
+			pch_i2c_cb(&adap_info->pch_data[i]);
+			flag = 1;
+		}
 	}
 
-	reg_val = ioread32(p + PCH_I2CSR);
-	if (reg_val & (I2CMAL_BIT | I2CMCF_BIT | I2CMIF_BIT))
-		pch_i2c_cb_ch0(adap_data);
-	else
-		return IRQ_NONE;
-
-	return IRQ_HANDLED;
+	return flag ? IRQ_HANDLED : IRQ_NONE;
 }
 
 /**
@@ -626,7 +645,7 @@ static irqreturn_t pch_i2c_handler(int irq, void *pData)
  * @num:	number of messages.
  */
 static s32 pch_i2c_xfer(struct i2c_adapter *i2c_adap,
-		    struct i2c_msg *msgs, s32 num)
+			struct i2c_msg *msgs, s32 num)
 {
 	struct i2c_msg *pmsg;
 	u32 i = 0;
@@ -709,11 +728,13 @@ static void pch_i2c_disbl_int(struct i2c_algo_pch_data *adap)
 }
 
 static int __devinit pch_i2c_probe(struct pci_dev *pdev,
-			       const struct pci_device_id *id)
+				   const struct pci_device_id *id)
 {
 	void __iomem *base_addr;
-	s32 ret;
+	int ret;
+	int i, j;
 	struct adapter_info *adap_info;
+	struct i2c_adapter *pch_adap;
 
 	pch_pci_dbg(pdev, "Entered.\n");
 
@@ -743,44 +764,48 @@ static int __devinit pch_i2c_probe(struct pci_dev *pdev,
 		goto err_pci_iomap;
 	}
 
-	adap_info->pch_i2c_suspended = false;
+	/* Set the number of I2C channel instance */
+	adap_info->ch_num = id->driver_data;
 
-	adap_info->pch_data.p_adapter_info = adap_info;
+	for (i = 0; i < adap_info->ch_num; i++) {
+		pch_adap = &adap_info->pch_data[i].pch_adapter;
+		adap_info->pch_i2c_suspended = false;
 
-	adap_info->pch_data.pch_adapter.owner = THIS_MODULE;
-	adap_info->pch_data.pch_adapter.class = I2C_CLASS_HWMON;
-	strcpy(adap_info->pch_data.pch_adapter.name, KBUILD_MODNAME);
-	adap_info->pch_data.pch_adapter.algo = &pch_algorithm;
-	adap_info->pch_data.pch_adapter.algo_data =
-						&adap_info->pch_data;
+		adap_info->pch_data[i].p_adapter_info = adap_info;
 
-	/* (i * 0x80) + base_addr; */
-	adap_info->pch_data.pch_base_address = base_addr;
+		pch_adap->owner = THIS_MODULE;
+		pch_adap->class = I2C_CLASS_HWMON;
+		strcpy(pch_adap->name, KBUILD_MODNAME);
+		pch_adap->algo = &pch_algorithm;
+		pch_adap->algo_data = &adap_info->pch_data[i];
 
-	adap_info->pch_data.pch_adapter.dev.parent = &pdev->dev;
+		/* base_addr + offset; */
+		adap_info->pch_data[i].pch_base_address = base_addr + 0x100 * i;
 
-	ret = i2c_add_adapter(&(adap_info->pch_data.pch_adapter));
+		pch_adap->dev.parent = &pdev->dev;
 
-	if (ret) {
-		pch_pci_err(pdev, "i2c_add_adapter FAILED\n");
-		goto err_i2c_add_adapter;
-	}
+		ret = i2c_add_adapter(pch_adap);
+		if (ret) {
+			pch_pci_err(pdev, "i2c_add_adapter[ch:%d] FAILED\n", i);
+			goto err_i2c_add_adapter;
+		}
 
-	pch_i2c_init(&adap_info->pch_data);
+		pch_i2c_init(&adap_info->pch_data[i]);
+	}
 	ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
-		  KBUILD_MODNAME, &adap_info->pch_data);
+		  KBUILD_MODNAME, adap_info);
 	if (ret) {
 		pch_pci_err(pdev, "request_irq FAILED\n");
-		goto err_request_irq;
+		goto err_i2c_add_adapter;
 	}
 
 	pci_set_drvdata(pdev, adap_info);
 	pch_pci_dbg(pdev, "returns %d.\n", ret);
 	return 0;
 
-err_request_irq:
-	i2c_del_adapter(&(adap_info->pch_data.pch_adapter));
 err_i2c_add_adapter:
+	for (j = 0; j < i; j++)
+		i2c_del_adapter(&adap_info->pch_data[j].pch_adapter);
 	pci_iounmap(pdev, base_addr);
 err_pci_iomap:
 	pci_release_regions(pdev);
@@ -793,17 +818,22 @@ err_pci_enable:
 
 static void __devexit pch_i2c_remove(struct pci_dev *pdev)
 {
+	int i;
 	struct adapter_info *adap_info = pci_get_drvdata(pdev);
 
-	pch_i2c_disbl_int(&adap_info->pch_data);
-	free_irq(pdev->irq, &adap_info->pch_data);
-	i2c_del_adapter(&(adap_info->pch_data.pch_adapter));
+	free_irq(pdev->irq, adap_info);
 
-	if (adap_info->pch_data.pch_base_address) {
-		pci_iounmap(pdev, adap_info->pch_data.pch_base_address);
-		adap_info->pch_data.pch_base_address = 0;
+	for (i = 0; i < adap_info->ch_num; i++) {
+		pch_i2c_disbl_int(&adap_info->pch_data[i]);
+		i2c_del_adapter(&adap_info->pch_data[i].pch_adapter);
 	}
 
+	if (adap_info->pch_data[0].pch_base_address)
+		pci_iounmap(pdev, adap_info->pch_data[0].pch_base_address);
+
+	for (i = 0; i < adap_info->ch_num; i++)
+		adap_info->pch_data[i].pch_base_address = 0;
+
 	pci_set_drvdata(pdev, NULL);
 
 	pci_release_regions(pdev);
@@ -816,17 +846,22 @@ static void __devexit pch_i2c_remove(struct pci_dev *pdev)
 static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	int ret;
+	int i;
 	struct adapter_info *adap_info = pci_get_drvdata(pdev);
-	void __iomem *p = adap_info->pch_data.pch_base_address;
+	void __iomem *p = adap_info->pch_data[0].pch_base_address;
 
 	adap_info->pch_i2c_suspended = true;
 
-	while ((adap_info->pch_data.pch_i2c_xfer_in_progress)) {
-		/* Wait until all channel transfers are completed */
-		msleep(20);
+	for (i = 0; i < adap_info->ch_num; i++) {
+		while ((adap_info->pch_data[i].pch_i2c_xfer_in_progress)) {
+			/* Wait until all channel transfers are completed */
+			msleep(20);
+		}
 	}
+
 	/* Disable the i2c interrupts */
-	pch_i2c_disbl_int(&adap_info->pch_data);
+	for (i = 0; i < adap_info->ch_num; i++)
+		pch_i2c_disbl_int(&adap_info->pch_data[i]);
 
 	pch_pci_dbg(pdev, "I2CSR = %x I2CBUFSTA = %x I2CESRSTA = %x "
 		"invoked function pch_i2c_disbl_int successfully\n",
@@ -849,6 +884,7 @@ static int pch_i2c_suspend(struct pci_dev *pdev, pm_message_t state)
 
 static int pch_i2c_resume(struct pci_dev *pdev)
 {
+	int i;
 	struct adapter_info *adap_info = pci_get_drvdata(pdev);
 
 	pci_set_power_state(pdev, PCI_D0);
@@ -861,7 +897,8 @@ static int pch_i2c_resume(struct pci_dev *pdev)
 
 	pci_enable_wake(pdev, PCI_D3hot, 0);
 
-	pch_i2c_init(&adap_info->pch_data);
+	for (i = 0; i < adap_info->ch_num; i++)
+		pch_i2c_init(&adap_info->pch_data[i]);
 
 	adap_info->pch_i2c_suspended = false;
 
@@ -893,7 +930,7 @@ static void __exit pch_pci_exit(void)
 }
 module_exit(pch_pci_exit);
 
-MODULE_DESCRIPTION("PCH I2C PCI Driver");
+MODULE_DESCRIPTION("Intel EG20T PCH/OKI SEMICONDUCTOR ML7213 IOH I2C Driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Tomoya MORINAGA. <tomoya-linux@dsn.okisemi.com>");
 module_param(pch_i2c_speed, int, (S_IRUSR | S_IWUSR));
-- 
cgit v0.10.2


From a580b8c5429a624d120cd603e1498bf676e2b4da Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@freescale.com>
Date: Sun, 27 Feb 2011 00:47:42 +0800
Subject: dmaengine: mxs-dma: add dma support for i.MX23/28

This patch adds dma support for Freescale MXS-based SoC i.MX23/28,
including apbh-dma and apbx-dma.

* apbh-dma and apbx-dma are supported in the driver as two mxs-dma
  instances.

* apbh-dma is different between mx23 and mx28, hardware version
  register is used to differentiate.

* mxs-dma supports pio function besides data transfer.  The driver
  uses dma_data_direction DMA_NONE to identify the pio mode, and
  steals sgl and sg_len to get pio words and numbers from clients.

* mxs dmaengine has some very specific features, like sense function
  and the special NAND support (nand_lock, nand_wait4ready).  These
  are too specific to implemented in generic dmaengine driver.

* The driver refers to imx-sdma and only a single descriptor is
  statically assigned to each channel.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/arch/arm/mach-mxs/include/mach/dma.h b/arch/arm/mach-mxs/include/mach/dma.h
new file mode 100644
index 0000000..7f4aeea
--- /dev/null
+++ b/arch/arm/mach-mxs/include/mach/dma.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MACH_MXS_DMA_H__
+#define __MACH_MXS_DMA_H__
+
+struct mxs_dma_data {
+	int chan_irq;
+};
+
+static inline int mxs_dma_is_apbh(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "mxs-dma-apbh");
+}
+
+static inline int mxs_dma_is_apbx(struct dma_chan *chan)
+{
+	return !strcmp(dev_name(chan->device->dev), "mxs-dma-apbx");
+}
+
+#endif /* __MACH_MXS_DMA_H__ */
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 1c28816..76f6472 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -227,6 +227,14 @@ config IMX_DMA
 	  Support the i.MX DMA engine. This engine is integrated into
 	  Freescale i.MX1/21/27 chips.
 
+config MXS_DMA
+	bool "MXS DMA support"
+	depends on SOC_IMX23 || SOC_IMX28
+	select DMA_ENGINE
+	help
+	  Support the MXS DMA engine. This engine including APBH-DMA
+	  and APBX-DMA is integrated into Freescale i.MX23/28 chips.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 64b21f5..802b557 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_PL330_DMA) += pl330.o
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
new file mode 100644
index 0000000..88aad4f
--- /dev/null
+++ b/drivers/dma/mxs-dma.c
@@ -0,0 +1,724 @@
+/*
+ * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * Refer to drivers/dma/imx-sdma.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+
+#include <asm/irq.h>
+#include <mach/mxs.h>
+#include <mach/dma.h>
+#include <mach/common.h>
+
+/*
+ * NOTE: The term "PIO" throughout the mxs-dma implementation means
+ * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
+ * dma can program the controller registers of peripheral devices.
+ */
+
+#define MXS_DMA_APBH		0
+#define MXS_DMA_APBX		1
+#define dma_is_apbh()		(mxs_dma->dev_id == MXS_DMA_APBH)
+
+#define APBH_VERSION_LATEST	3
+#define apbh_is_old()		(mxs_dma->version < APBH_VERSION_LATEST)
+
+#define HW_APBHX_CTRL0				0x000
+#define BM_APBH_CTRL0_APB_BURST8_EN		(1 << 29)
+#define BM_APBH_CTRL0_APB_BURST_EN		(1 << 28)
+#define BP_APBH_CTRL0_CLKGATE_CHANNEL		8
+#define BP_APBH_CTRL0_RESET_CHANNEL		16
+#define HW_APBHX_CTRL1				0x010
+#define HW_APBHX_CTRL2				0x020
+#define HW_APBHX_CHANNEL_CTRL			0x030
+#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL	16
+#define HW_APBH_VERSION				(cpu_is_mx23() ? 0x3f0 : 0x800)
+#define HW_APBX_VERSION				0x800
+#define BP_APBHX_VERSION_MAJOR			24
+#define HW_APBHX_CHn_NXTCMDAR(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x050 : 0x110) + (n) * 0x70)
+#define HW_APBHX_CHn_SEMA(n) \
+	(((dma_is_apbh() && apbh_is_old()) ? 0x080 : 0x140) + (n) * 0x70)
+
+/*
+ * ccw bits definitions
+ *
+ * COMMAND:		0..1	(2)
+ * CHAIN:		2	(1)
+ * IRQ:			3	(1)
+ * NAND_LOCK:		4	(1) - not implemented
+ * NAND_WAIT4READY:	5	(1) - not implemented
+ * DEC_SEM:		6	(1)
+ * WAIT4END:		7	(1)
+ * HALT_ON_TERMINATE:	8	(1)
+ * TERMINATE_FLUSH:	9	(1)
+ * RESERVED:		10..11	(2)
+ * PIO_NUM:		12..15	(4)
+ */
+#define BP_CCW_COMMAND		0
+#define BM_CCW_COMMAND		(3 << 0)
+#define CCW_CHAIN		(1 << 2)
+#define CCW_IRQ			(1 << 3)
+#define CCW_DEC_SEM		(1 << 6)
+#define CCW_WAIT4END		(1 << 7)
+#define CCW_HALT_ON_TERM	(1 << 8)
+#define CCW_TERM_FLUSH		(1 << 9)
+#define BP_CCW_PIO_NUM		12
+#define BM_CCW_PIO_NUM		(0xf << 12)
+
+#define BF_CCW(value, field)	(((value) << BP_CCW_##field) & BM_CCW_##field)
+
+#define MXS_DMA_CMD_NO_XFER	0
+#define MXS_DMA_CMD_WRITE	1
+#define MXS_DMA_CMD_READ	2
+#define MXS_DMA_CMD_DMA_SENSE	3	/* not implemented */
+
+struct mxs_dma_ccw {
+	u32		next;
+	u16		bits;
+	u16		xfer_bytes;
+#define MAX_XFER_BYTES	0xff00
+	u32		bufaddr;
+#define MXS_PIO_WORDS	16
+	u32		pio_words[MXS_PIO_WORDS];
+};
+
+#define NUM_CCW	(int)(PAGE_SIZE / sizeof(struct mxs_dma_ccw))
+
+struct mxs_dma_chan {
+	struct mxs_dma_engine		*mxs_dma;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+	int				chan_irq;
+	struct mxs_dma_ccw		*ccw;
+	dma_addr_t			ccw_phys;
+	dma_cookie_t			last_completed;
+	enum dma_status			status;
+	unsigned int			flags;
+#define MXS_DMA_SG_LOOP			(1 << 0)
+};
+
+#define MXS_DMA_CHANNELS		16
+#define MXS_DMA_CHANNELS_MASK		0xffff
+
+struct mxs_dma_engine {
+	int				dev_id;
+	unsigned int			version;
+	void __iomem			*base;
+	struct clk			*clk;
+	struct dma_device		dma_device;
+	struct device_dma_parameters	dma_parms;
+	struct mxs_dma_chan		mxs_chans[MXS_DMA_CHANNELS];
+};
+
+static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+}
+
+static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* set cmd_addr up */
+	writel(mxs_chan->ccw_phys,
+		mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(chan_id));
+
+	/* enable apbh channel clock */
+	if (dma_is_apbh()) {
+		if (apbh_is_old())
+			writel(1 << (chan_id + BP_APBH_CTRL0_CLKGATE_CHANNEL),
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+		else
+			writel(1 << chan_id,
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+	}
+
+	/* write 1 to SEMA to kick off the channel */
+	writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(chan_id));
+}
+
+static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* disable apbh channel clock */
+	if (dma_is_apbh()) {
+		if (apbh_is_old())
+			writel(1 << (chan_id + BP_APBH_CTRL0_CLKGATE_CHANNEL),
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+		else
+			writel(1 << chan_id,
+				mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	}
+
+	mxs_chan->status = DMA_SUCCESS;
+}
+
+static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* freeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_SET_ADDR);
+
+	mxs_chan->status = DMA_PAUSED;
+}
+
+static void mxs_dma_resume_chan(struct mxs_dma_chan *mxs_chan)
+{
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* unfreeze the channel */
+	if (dma_is_apbh() && apbh_is_old())
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_CLR_ADDR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + MXS_CLR_ADDR);
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+}
+
+static dma_cookie_t mxs_dma_assign_cookie(struct mxs_dma_chan *mxs_chan)
+{
+	dma_cookie_t cookie = mxs_chan->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	mxs_chan->chan.cookie = cookie;
+	mxs_chan->desc.cookie = cookie;
+
+	return cookie;
+}
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(tx->chan);
+
+	mxs_dma_enable_chan(mxs_chan);
+
+	return mxs_dma_assign_cookie(mxs_chan);
+}
+
+static void mxs_dma_tasklet(unsigned long data)
+{
+	struct mxs_dma_chan *mxs_chan = (struct mxs_dma_chan *) data;
+
+	if (mxs_chan->desc.callback)
+		mxs_chan->desc.callback(mxs_chan->desc.callback_param);
+}
+
+static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
+{
+	struct mxs_dma_engine *mxs_dma = dev_id;
+	u32 stat1, stat2;
+
+	/* completion status */
+	stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
+	stat1 &= MXS_DMA_CHANNELS_MASK;
+	writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + MXS_CLR_ADDR);
+
+	/* error status */
+	stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
+	writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + MXS_CLR_ADDR);
+
+	/*
+	 * When both completion and error of termination bits set at the
+	 * same time, we do not take it as an error.  IOW, it only becomes
+	 * an error we need to handler here in case of ether it's (1) an bus
+	 * error or (2) a termination error with no completion.
+	 */
+	stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
+		(~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
+
+	/* combine error and completion status for checking */
+	stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
+	while (stat1) {
+		int channel = fls(stat1) - 1;
+		struct mxs_dma_chan *mxs_chan =
+			&mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
+
+		if (channel >= MXS_DMA_CHANNELS) {
+			dev_dbg(mxs_dma->dma_device.dev,
+				"%s: error in channel %d\n", __func__,
+				channel - MXS_DMA_CHANNELS);
+			mxs_chan->status = DMA_ERROR;
+			mxs_dma_reset_chan(mxs_chan);
+		} else {
+			if (mxs_chan->flags & MXS_DMA_SG_LOOP)
+				mxs_chan->status = DMA_IN_PROGRESS;
+			else
+				mxs_chan->status = DMA_SUCCESS;
+		}
+
+		stat1 &= ~(1 << channel);
+
+		if (mxs_chan->status == DMA_SUCCESS)
+			mxs_chan->last_completed = mxs_chan->desc.cookie;
+
+		/* schedule tasklet on this channel */
+		tasklet_schedule(&mxs_chan->tasklet);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_data *data = chan->private;
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int ret;
+
+	if (!data)
+		return -EINVAL;
+
+	mxs_chan->chan_irq = data->chan_irq;
+
+	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+				&mxs_chan->ccw_phys, GFP_KERNEL);
+	if (!mxs_chan->ccw) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	memset(mxs_chan->ccw, 0, PAGE_SIZE);
+
+	ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+				0, "mxs-dma", mxs_dma);
+	if (ret)
+		goto err_irq;
+
+	ret = clk_enable(mxs_dma->clk);
+	if (ret)
+		goto err_clk;
+
+	mxs_dma_reset_chan(mxs_chan);
+
+	dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
+	mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
+
+	/* the descriptor is ready */
+	async_tx_ack(&mxs_chan->desc);
+
+	return 0;
+
+err_clk:
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+err_irq:
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+err_alloc:
+	return ret;
+}
+
+static void mxs_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	mxs_dma_disable_chan(mxs_chan);
+
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+
+	dma_free_coherent(mxs_dma->dma_device.dev, PAGE_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+
+	clk_disable(mxs_dma->clk);
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long append)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	struct mxs_dma_ccw *ccw;
+	struct scatterlist *sg;
+	int i, j;
+	u32 *pio;
+	static int idx;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS && !append)
+		return NULL;
+
+	if (sg_len + (append ? idx : 0) > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				sg_len, NUM_CCW);
+		goto err_out;
+	}
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags = 0;
+
+	/*
+	 * If the sg is prepared with append flag set, the sg
+	 * will be appended to the last prepared sg.
+	 */
+	if (append) {
+		BUG_ON(idx < 1);
+		ccw = &mxs_chan->ccw[idx - 1];
+		ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits &= ~CCW_IRQ;
+		ccw->bits &= ~CCW_DEC_SEM;
+		ccw->bits &= ~CCW_WAIT4END;
+	} else {
+		idx = 0;
+	}
+
+	if (direction == DMA_NONE) {
+		ccw = &mxs_chan->ccw[idx++];
+		pio = (u32 *) sgl;
+
+		for (j = 0; j < sg_len;)
+			ccw->pio_words[j++] = *pio++;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_DEC_SEM;
+		ccw->bits |= CCW_WAIT4END;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(sg_len, PIO_NUM);
+		ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND);
+	} else {
+		for_each_sg(sgl, sg, sg_len, i) {
+			if (sg->length > MAX_XFER_BYTES) {
+				dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n",
+						sg->length, MAX_XFER_BYTES);
+				goto err_out;
+			}
+
+			ccw = &mxs_chan->ccw[idx++];
+
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+			ccw->bufaddr = sg->dma_address;
+			ccw->xfer_bytes = sg->length;
+
+			ccw->bits = 0;
+			ccw->bits |= CCW_CHAIN;
+			ccw->bits |= CCW_HALT_ON_TERM;
+			ccw->bits |= CCW_TERM_FLUSH;
+			ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
+					COMMAND);
+
+			if (i + 1 == sg_len) {
+				ccw->bits &= ~CCW_CHAIN;
+				ccw->bits |= CCW_IRQ;
+				ccw->bits |= CCW_DEC_SEM;
+				ccw->bits |= CCW_WAIT4END;
+			}
+		}
+	}
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_data_direction direction)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int num_periods = buf_len / period_len;
+	int i = 0, buf = 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags |= MXS_DMA_SG_LOOP;
+
+	if (num_periods > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				num_periods, NUM_CCW);
+		goto err_out;
+	}
+
+	if (period_len > MAX_XFER_BYTES) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum period size exceeded: %d > %d\n",
+				period_len, MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i];
+
+		if (i + 1 == num_periods)
+			ccw->next = mxs_chan->ccw_phys;
+		else
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1);
+
+		ccw->bufaddr = dma_addr;
+		ccw->xfer_bytes = period_len;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(direction == DMA_FROM_DEVICE ?
+				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		unsigned long arg)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		mxs_dma_disable_chan(mxs_chan);
+		break;
+	case DMA_PAUSE:
+		mxs_dma_pause_chan(mxs_chan);
+		break;
+	case DMA_RESUME:
+		mxs_dma_resume_chan(mxs_chan);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
+static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	dma_cookie_t last_used;
+
+	last_used = chan->cookie;
+	dma_set_tx_state(txstate, mxs_chan->last_completed, last_used, 0);
+
+	return mxs_chan->status;
+}
+
+static void mxs_dma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * Nothing to do. We only have a single descriptor.
+	 */
+}
+
+static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+{
+	int ret;
+
+	ret = clk_enable(mxs_dma->clk);
+	if (ret)
+		goto err_out;
+
+	ret = mxs_reset_block(mxs_dma->base);
+	if (ret)
+		goto err_out;
+
+	/* only major version matters */
+	mxs_dma->version = readl(mxs_dma->base +
+				((mxs_dma->dev_id == MXS_DMA_APBX) ?
+				HW_APBX_VERSION : HW_APBH_VERSION)) >>
+				BP_APBHX_VERSION_MAJOR;
+
+	/* enable apbh burst */
+	if (dma_is_apbh()) {
+		writel(BM_APBH_CTRL0_APB_BURST_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+		writel(BM_APBH_CTRL0_APB_BURST8_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + MXS_SET_ADDR);
+	}
+
+	/* enable irq for all the channels */
+	writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
+		mxs_dma->base + HW_APBHX_CTRL1 + MXS_SET_ADDR);
+
+	clk_disable(mxs_dma->clk);
+
+	return 0;
+
+err_out:
+	return ret;
+}
+
+static int __init mxs_dma_probe(struct platform_device *pdev)
+{
+	const struct platform_device_id *id_entry =
+				platform_get_device_id(pdev);
+	struct mxs_dma_engine *mxs_dma;
+	struct resource *iores;
+	int ret, i;
+
+	mxs_dma = kzalloc(sizeof(*mxs_dma), GFP_KERNEL);
+	if (!mxs_dma)
+		return -ENOMEM;
+
+	mxs_dma->dev_id = id_entry->driver_data;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!request_mem_region(iores->start, resource_size(iores),
+				pdev->name)) {
+		ret = -EBUSY;
+		goto err_request_region;
+	}
+
+	mxs_dma->base = ioremap(iores->start, resource_size(iores));
+	if (!mxs_dma->base) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	mxs_dma->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxs_dma->clk)) {
+		ret = PTR_ERR(mxs_dma->clk);
+		goto err_clk;
+	}
+
+	dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&mxs_dma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MXS_DMA_CHANNELS; i++) {
+		struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i];
+
+		mxs_chan->mxs_dma = mxs_dma;
+		mxs_chan->chan.device = &mxs_dma->dma_device;
+
+		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
+			     (unsigned long) mxs_chan);
+
+
+		/* Add the channel to mxs_chan list */
+		list_add_tail(&mxs_chan->chan.device_node,
+			&mxs_dma->dma_device.channels);
+	}
+
+	ret = mxs_dma_init(mxs_dma);
+	if (ret)
+		goto err_init;
+
+	mxs_dma->dma_device.dev = &pdev->dev;
+
+	/* mxs_dma gets 65535 bytes maximum sg size */
+	mxs_dma->dma_device.dev->dma_parms = &mxs_dma->dma_parms;
+	dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES);
+
+	mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources;
+	mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources;
+	mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status;
+	mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg;
+	mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic;
+	mxs_dma->dma_device.device_control = mxs_dma_control;
+	mxs_dma->dma_device.device_issue_pending = mxs_dma_issue_pending;
+
+	ret = dma_async_device_register(&mxs_dma->dma_device);
+	if (ret) {
+		dev_err(mxs_dma->dma_device.dev, "unable to register\n");
+		goto err_init;
+	}
+
+	dev_info(mxs_dma->dma_device.dev, "initialized\n");
+
+	return 0;
+
+err_init:
+	clk_put(mxs_dma->clk);
+err_clk:
+	iounmap(mxs_dma->base);
+err_ioremap:
+	release_mem_region(iores->start, resource_size(iores));
+err_request_region:
+	kfree(mxs_dma);
+	return ret;
+}
+
+static struct platform_device_id mxs_dma_type[] = {
+	{
+		.name = "mxs-dma-apbh",
+		.driver_data = MXS_DMA_APBH,
+	}, {
+		.name = "mxs-dma-apbx",
+		.driver_data = MXS_DMA_APBX,
+	}
+};
+
+static struct platform_driver mxs_dma_driver = {
+	.driver		= {
+		.name	= "mxs-dma",
+	},
+	.id_table	= mxs_dma_type,
+};
+
+static int __init mxs_dma_module_init(void)
+{
+	return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+}
+subsys_initcall(mxs_dma_module_init);
-- 
cgit v0.10.2


From f44ad7e91dd12bed0959b3e715f4f3ab84951a59 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:14 +0530
Subject: dw_dmac: Remove compilation dependency from AVR32 and put on HAVE_CLK

This driver will now be used in atleast two platforms AVR32 & ARM. And there is
no actual hardware dependency of this driver over AVR32 or ARM. So this
dependency can be removed altogether.

Also dw_dmac driver uses clk framework and must have compilation dependency on
HAVE_CLK

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 76f6472..d700895 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -82,7 +82,7 @@ config INTEL_IOP_ADMA
 
 config DW_DMAC
 	tristate "Synopsys DesignWare AHB DMA support"
-	depends on AVR32
+	depends on HAVE_CLK
 	select DMA_ENGINE
 	default y if CPU_AT32AP7000
 	help
-- 
cgit v0.10.2


From cb689a706d17ef19a61735670ded60466dd015fa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:15 +0530
Subject: dw_dmac: Replace module_init() with subsys_initcall()

In some cases users of dw_dmac are initialized before dw_dmac, and if they try
to use dw_dmac, they simply fail. So its better we register init() routine
of driver using subsys_initcall() instead of module_init(), so that dma driver
is available at the earliest possible.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 08dab3b..064a183 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1455,7 +1455,7 @@ static int __init dw_init(void)
 {
 	return platform_driver_probe(&dw_driver, dw_probe);
 }
-module_init(dw_init);
+subsys_initcall(dw_init);
 
 static void __exit dw_exit(void)
 {
-- 
cgit v0.10.2


From f336e42f73d93b74fd21bf9176ee6c7ab8b195c5 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:16 +0530
Subject: dw_dmac: Move single descriptor from dwc->queue to dwc->active_list
 in dwc_complete_all

dwc_complete_all and other routines was removing all descriptors from dwc->queue
and pushing them to dwc->active_list. Only one was required to be removed. Also
we are calling dwc_dostart, once list is fixed.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 064a183..942b50f 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -87,11 +87,6 @@ static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
 	return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
 }
 
-static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
-{
-	return list_entry(dwc->queue.next, struct dw_desc, desc_node);
-}
-
 static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
 {
 	struct dw_desc *desc, *_desc;
@@ -262,10 +257,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	 * Submit queued descriptors ASAP, i.e. before we go through
 	 * the completed ones.
 	 */
-	if (!list_empty(&dwc->queue))
-		dwc_dostart(dwc, dwc_first_queued(dwc));
 	list_splice_init(&dwc->active_list, &list);
-	list_splice_init(&dwc->queue, &dwc->active_list);
+	if (!list_empty(&dwc->queue)) {
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
+	}
 
 	list_for_each_entry_safe(desc, _desc, &list, desc_node)
 		dwc_descriptor_complete(dwc, desc);
@@ -325,8 +321,8 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		cpu_relax();
 
 	if (!list_empty(&dwc->queue)) {
-		dwc_dostart(dwc, dwc_first_queued(dwc));
-		list_splice_init(&dwc->queue, &dwc->active_list);
+		list_move(dwc->queue.next, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
 	}
 }
 
@@ -352,7 +348,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	 */
 	bad_desc = dwc_first_active(dwc);
 	list_del_init(&bad_desc->desc_node);
-	list_splice_init(&dwc->queue, dwc->active_list.prev);
+	list_move(dwc->queue.next, dwc->active_list.prev);
 
 	/* Clear the error flag and try to restart the controller */
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
@@ -547,8 +543,8 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	if (list_empty(&dwc->active_list)) {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
 				desc->txd.cookie);
-		dwc_dostart(dwc, desc);
 		list_add_tail(&desc->desc_node, &dwc->active_list);
+		dwc_dostart(dwc, dwc_first_active(dwc));
 	} else {
 		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
 				desc->txd.cookie);
-- 
cgit v0.10.2


From 569432efa7975f5795efb8142134f5a098942381 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:17 +0530
Subject: dw_dmac: Calling dwc_scan_descriptors from dwc_tx_status() after
 taking lock

Lock must be taken before calling dwc_scan_descriptors, as this may
access/modify shared data and queues. dwc_tx_status wasn't taking lock before
calling this routine. This patch add code that takes lock before calling
dwc_scan_descriptors.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 942b50f..2b0d5e9 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -836,7 +836,9 @@ dwc_tx_status(struct dma_chan *chan,
 
 	ret = dma_async_is_complete(cookie, last_complete, last_used);
 	if (ret != DMA_SUCCESS) {
+		spin_lock_bh(&dwc->lock);
 		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+		spin_unlock_bh(&dwc->lock);
 
 		last_complete = dwc->completed;
 		last_used = chan->cookie;
-- 
cgit v0.10.2


From a02274564dd78f7edde3c9ff197ed44f2f8a5a81 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:18 +0530
Subject: dw_dmac: Adding support for 64 bit access width for memcpy xfers

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 2b0d5e9..e5d97bf 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -583,7 +583,9 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	 * We can be a lot more clever here, but this should take care
 	 * of the most common optimization.
 	 */
-	if (!((src | dest  | len) & 3))
+	if (!((src | dest  | len) & 7))
+		src_width = dst_width = 3;
+	else if (!((src | dest  | len) & 3))
 		src_width = dst_width = 2;
 	else if (!((src | dest | len) & 1))
 		src_width = dst_width = 1;
-- 
cgit v0.10.2


From 418e74070662e1ae7d9bb5202f773d35c9a7f05e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Fri, 4 Mar 2011 15:42:50 +0530
Subject: dw_dmac: Change value of DWC_MAX_COUNT to 4095.

Every descriptor can transfer a maximum count of 4095 (12 bits, in control reg),
So we must have DWC_MAX_COUNT as 4095 instead of 2048.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index e5d97bf..711ebe9 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -47,14 +47,13 @@
 
 /*
  * This is configuration-dependent and usually a funny size like 4095.
- * Let's round it down to the nearest power of two.
  *
  * Note that this is a transfer count, i.e. if we transfer 32-bit
- * words, we can do 8192 bytes per descriptor.
+ * words, we can do 16380 bytes per descriptor.
  *
  * This parameter is also system-specific.
  */
-#define DWC_MAX_COUNT	2048U
+#define DWC_MAX_COUNT	4095U
 
 /*
  * Number of descriptors to allocate for each channel. This should be
-- 
cgit v0.10.2


From e518076ef8cb56adb558ff56ad5bfa0cd9f3abd9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:20 +0530
Subject: dw_dmac: Mark all tx_descriptors with DMA_CRTL_ACK after xfer finish

dwc_desc_get checks all descriptors for DMA_CTRL_ACK before allocating them for
transfers. And descriptors are not marked with DMA_CRTL_ACK after transfer
finishes. Thus descriptor once used is not usable again. This patch marks
descriptors with DMA_CRTL_ACK after dma xfer finishes

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 711ebe9..6ab440e 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -198,6 +198,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	dma_async_tx_callback		callback;
 	void				*param;
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct dw_desc			*child;
 
 	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
 
@@ -206,6 +207,12 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	param = txd->callback_param;
 
 	dwc_sync_desc_for_cpu(dwc, desc);
+
+	/* async_tx_ack */
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		async_tx_ack(&child->txd);
+	async_tx_ack(&desc->txd);
+
 	list_splice_init(&desc->tx_list, &dwc->free_list);
 	list_move(&desc->desc_node, &dwc->free_list);
 
-- 
cgit v0.10.2


From b0c3130d69bda5cd91aa3b3f08e7878df49fde69 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:21 +0530
Subject: dw_dmac: Pass Channel Allocation Order from platform_data

In SPEAr Platform channels 4-7 have more Fifo depth. So we must get better
channel first. This patch introduces concept of channel allocation order in
dw_dmac. If user doesn't pass anything or 0, than normal (ascending) channel
allocation will follow, else channels will be allocated in descending order.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 6ab440e..f413e12 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1319,7 +1319,11 @@ static int __init dw_probe(struct platform_device *pdev)
 		dwc->chan.device = &dw->dma;
 		dwc->chan.cookie = dwc->completed = 1;
 		dwc->chan.chan_id = i;
-		list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
+			list_add_tail(&dwc->chan.device_node,
+					&dw->dma.channels);
+		else
+			list_add(&dwc->chan.device_node, &dw->dma.channels);
 
 		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
 		spin_lock_init(&dwc->lock);
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index deec66b..a18c498 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -22,6 +22,9 @@
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
+#define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
+#define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
+	unsigned char	chan_allocation_order;
 };
 
 /**
-- 
cgit v0.10.2


From 93317e8e35b77633d589fe0e132291195757d785 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:22 +0530
Subject: dw_dmac: Pass Channel Priority from platform_data

In Synopsys designware, channel priority is programmable. This patch adds
support for passing channel priority through platform data. By default Ascending
channel priority will be followed, i.e. channel 0 will get highest priority and
channel 7 will get lowest.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index f413e12..318a342 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -901,8 +901,11 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
 
 		cfghi = dws->cfg_hi;
-		cfglo = dws->cfg_lo;
+		cfglo = dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
 	}
+
+	cfglo |= DWC_CFGL_CH_PRIOR(dwc->priority);
+
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
 
@@ -1325,6 +1328,12 @@ static int __init dw_probe(struct platform_device *pdev)
 		else
 			list_add(&dwc->chan.device_node, &dw->dma.channels);
 
+		/* 7 is highest priority & 0 is lowest. */
+		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+			dwc->priority = 7 - i;
+		else
+			dwc->priority = i;
+
 		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
 		spin_lock_init(&dwc->lock);
 		dwc->mask = 1 << i;
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index d9a939f..6a8e6d3 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -101,6 +101,8 @@ struct dw_dma_regs {
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
 /* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
+#define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
 #define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
 #define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
@@ -134,6 +136,7 @@ struct dw_dma_chan {
 	struct dma_chan		chan;
 	void __iomem		*ch_regs;
 	u8			mask;
+	u8			priority;
 
 	spinlock_t		lock;
 
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index a18c498..64c76da 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -25,6 +25,9 @@ struct dw_dma_platform_data {
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
+#define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
+#define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
+	unsigned char	chan_priority;
 };
 
 /**
@@ -70,7 +73,6 @@ struct dw_dma_slave {
 #define DWC_CFGH_DST_PER(x)	((x) << 11)
 
 /* Platform-configurable bits in CFG_LO */
-#define DWC_CFGL_PRIO(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
 #define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
 #define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
-- 
cgit v0.10.2


From 59c22fc11d12b69da36c6585a38229863ba0bb16 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:23 +0530
Subject: dw_dmac: Changing type of src_master and dest_master to u8.

src_master & dest_master don't required u32 as they have values limited to u8
only. Also their description is missing from doc style comment. This patch
fixes above mentioned issues.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 64c76da..3ba2f06 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -53,6 +53,8 @@ enum dw_dma_slave_width {
  * @reg_width: peripheral register width
  * @cfg_hi: Platform-specific initializer for the CFG_HI register
  * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_master: src master for transfers on allocated channel.
+ * @dst_master: dest master for transfers on allocated channel.
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
@@ -61,8 +63,8 @@ struct dw_dma_slave {
 	enum dw_dma_slave_width	reg_width;
 	u32			cfg_hi;
 	u32			cfg_lo;
-	int			src_master;
-	int			dst_master;
+	u8			src_master;
+	u8			dst_master;
 };
 
 /* Platform-configurable bits in CFG_HI */
-- 
cgit v0.10.2


From ee66509d7f354eecb45ac99f21ea6aa8650dea7e Mon Sep 17 00:00:00 2001
From: Viresh KUMAR <viresh.kumar@st.com>
Date: Fri, 4 Mar 2011 15:42:51 +0530
Subject: dw_dmac: Allow src/dst msize & flow controller to be configured at
 runtime

Msize or Burst Size is peripheral dependent in case of prep_slave_sg and
cyclic_prep transfers, and in case of memcpy transfers it is platform dependent.
So msize configuration must come from platform data.

Also some peripherals (ex: JPEG), need to be flow controller for dma transfers,
so this information in case of slave_sg & cyclic_prep transfers must come from
platform data.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 318a342..90ea08a 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -36,9 +36,11 @@
 		struct dw_dma_slave *__slave = (private);	\
 		int dms = __slave ? __slave->dst_master : 0;	\
 		int sms = __slave ? __slave->src_master : 1;	\
+		u8 smsize = __slave ? __slave->src_msize : 0;	\
+		u8 dmsize = __slave ? __slave->dst_msize : 0;	\
 								\
-		(DWC_CTLL_DST_MSIZE(0)				\
-		 | DWC_CTLL_SRC_MSIZE(0)			\
+		(DWC_CTLL_DST_MSIZE(dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(smsize)			\
 		 | DWC_CTLL_LLP_D_EN				\
 		 | DWC_CTLL_LLP_S_EN				\
 		 | DWC_CTLL_DMS(dms)				\
@@ -683,7 +685,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
 				| DWC_CTLL_SRC_INC
-				| DWC_CTLL_FC_M2P);
+				| DWC_CTLL_FC(dws->fc));
 		reg = dws->tx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
@@ -728,7 +730,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
 				| DWC_CTLL_SRC_FIX
-				| DWC_CTLL_FC_P2M);
+				| DWC_CTLL_FC(dws->fc));
 
 		reg = dws->rx_reg;
 		for_each_sg(sgl, sg, sg_len, i) {
@@ -1146,7 +1148,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 					| DWC_CTLL_SRC_WIDTH(reg_width)
 					| DWC_CTLL_DST_FIX
 					| DWC_CTLL_SRC_INC
-					| DWC_CTLL_FC_M2P
+					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
 		case DMA_FROM_DEVICE:
@@ -1157,7 +1159,7 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
 					| DWC_CTLL_DST_WIDTH(reg_width)
 					| DWC_CTLL_DST_INC
 					| DWC_CTLL_SRC_FIX
-					| DWC_CTLL_FC_P2M
+					| DWC_CTLL_FC(dws->fc)
 					| DWC_CTLL_INT_EN);
 			break;
 		default:
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 6a8e6d3..9a32964 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -86,6 +86,7 @@ struct dw_dma_regs {
 #define DWC_CTLL_SRC_MSIZE(n)	((n)<<14)
 #define DWC_CTLL_S_GATH_EN	(1 << 17)	/* src gather, !FIX */
 #define DWC_CTLL_D_SCAT_EN	(1 << 18)	/* dst scatter, !FIX */
+#define DWC_CTLL_FC(n)		((n) << 20)
 #define DWC_CTLL_FC_M2M		(0 << 20)	/* mem-to-mem */
 #define DWC_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
 #define DWC_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
index 3ba2f06..6998d93 100644
--- a/include/linux/dw_dmac.h
+++ b/include/linux/dw_dmac.h
@@ -42,6 +42,30 @@ enum dw_dma_slave_width {
 	DW_DMA_SLAVE_WIDTH_32BIT,
 };
 
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
+/* flow controller */
+enum dw_dma_fc {
+	DW_DMA_FC_D_M2M,
+	DW_DMA_FC_D_M2P,
+	DW_DMA_FC_D_P2M,
+	DW_DMA_FC_D_P2P,
+	DW_DMA_FC_P_P2M,
+	DW_DMA_FC_SP_P2P,
+	DW_DMA_FC_P_M2P,
+	DW_DMA_FC_DP_P2P,
+};
+
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
  *
@@ -55,6 +79,9 @@ enum dw_dma_slave_width {
  * @cfg_lo: Platform-specific initializer for the CFG_LO register
  * @src_master: src master for transfers on allocated channel.
  * @dst_master: dest master for transfers on allocated channel.
+ * @src_msize: src burst size.
+ * @dst_msize: dest burst size.
+ * @fc: flow controller for DMA transfer
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
@@ -65,6 +92,9 @@ struct dw_dma_slave {
 	u32			cfg_lo;
 	u8			src_master;
 	u8			dst_master;
+	u8			src_msize;
+	u8			dst_msize;
+	u8			fc;
 };
 
 /* Platform-configurable bits in CFG_HI */
-- 
cgit v0.10.2


From e51dc53b8c7fa2d9ac4ef8f317f5dfe07a79e65a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:25 +0530
Subject: dw_dmac: Setting Default Burst length for transfers as 16.

This patch sets default Burst length for all transfer to 16. This will
enhance performance when user doesn't have any chan->private data.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 90ea08a..9c25c7d 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -36,8 +36,8 @@
 		struct dw_dma_slave *__slave = (private);	\
 		int dms = __slave ? __slave->dst_master : 0;	\
 		int sms = __slave ? __slave->src_master : 1;	\
-		u8 smsize = __slave ? __slave->src_msize : 0;	\
-		u8 dmsize = __slave ? __slave->dst_msize : 0;	\
+		u8 smsize = __slave ? __slave->src_msize : DW_DMA_MSIZE_16; \
+		u8 dmsize = __slave ? __slave->dst_msize : DW_DMA_MSIZE_16; \
 								\
 		(DWC_CTLL_DST_MSIZE(dmsize)			\
 		 | DWC_CTLL_SRC_MSIZE(smsize)			\
-- 
cgit v0.10.2


From 1c5b0538c719f52cface39f699fb5d39a50149d6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Thu, 3 Mar 2011 15:47:26 +0530
Subject: avr32: at32ap700x: Specify DMA Flow Controller, Src and Dst msize

Now that the dw_dmac DMA driver supports configurable Flow Controller, source
and destination burst or msize, we need to specify which ones to use. Msize or
burst size was previously hardcoded to 1, Flow controller was DMA for both
M2P & P2M transfers.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 2747cde..b4aaebd 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -2050,6 +2050,9 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 		rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
 		rx_dws->src_master = 0;
 		rx_dws->dst_master = 1;
+		rx_dws->src_msize = DW_DMA_MSIZE_1;
+		rx_dws->dst_msize = DW_DMA_MSIZE_1;
+		rx_dws->fc = DW_DMA_FC_D_P2M;
 	}
 
 	/* Check if DMA slave interface for playback should be configured. */
@@ -2060,6 +2063,9 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 		tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
 		rx_dws->src_master = 0;
 		rx_dws->dst_master = 1;
+		tx_dws->src_msize = DW_DMA_MSIZE_1;
+		tx_dws->dst_msize = DW_DMA_MSIZE_1;
+		tx_dws->fc = DW_DMA_FC_D_M2P;
 	}
 
 	if (platform_device_add_data(pdev, data,
@@ -2134,6 +2140,9 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data)
 	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
 	dws->src_master = 0;
 	dws->dst_master = 1;
+	dws->src_msize = DW_DMA_MSIZE_1;
+	dws->dst_msize = DW_DMA_MSIZE_1;
+	dws->fc = DW_DMA_FC_D_M2P;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct atmel_abdac_pdata)))
-- 
cgit v0.10.2


From 29782da5f0206335e2325508ba4fee0d624ddab6 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Fri, 4 Mar 2011 14:58:32 +0530
Subject: dmaengine/dw_dmac fix: use readl & writel instead of __raw_readl &
 __raw_writel

On ARMv7 cores, device memory mapped as Normal Non-cacheable, may not guarantee
ordered access causing failures in device drivers that do not use the mandatory
memory barriers. readl & writel versions contain necessary memory barriers for
this.

commit 79f64dbf68c8a9779a7e9a25e0a9f0217a25b57a: "ARM: 6273/1: Add barriers to
the I/O accessors if ARM_DMA_MEM_BUFFERABLE" can be referred for more
information on this.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index 9a32964..720f821 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -159,9 +159,9 @@ __dwc_regs(struct dw_dma_chan *dwc)
 }
 
 #define channel_readl(dwc, name) \
-	__raw_readl(&(__dwc_regs(dwc)->name))
+	readl(&(__dwc_regs(dwc)->name))
 #define channel_writel(dwc, name, val) \
-	__raw_writel((val), &(__dwc_regs(dwc)->name))
+	writel((val), &(__dwc_regs(dwc)->name))
 
 static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
 {
@@ -185,9 +185,9 @@ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
 }
 
 #define dma_readl(dw, name) \
-	__raw_readl(&(__dw_regs(dw)->name))
+	readl(&(__dw_regs(dw)->name))
 #define dma_writel(dw, name, val) \
-	__raw_writel((val), &(__dw_regs(dw)->name))
+	writel((val), &(__dw_regs(dw)->name))
 
 #define channel_set_bit(dw, reg, mask) \
 	dma_writel(dw, reg, ((mask) << 8) | (mask))
-- 
cgit v0.10.2


From 0b863b333f529c7ddd8bee58e6696a7254417a05 Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Sun, 6 Mar 2011 17:26:10 +0600
Subject: drivers, pch_dma: Fix warning when CONFIG_PM=n.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_PM=n, we get the following warning:

drivers/dma/pch_dma.c:741: warning: ‘pch_dma_suspend’ defined but not used
drivers/dma/pch_dma.c:755: warning: ‘pch_dma_resume’ defined but not used

To fix it, wrap pch_dma_{suspend,resume} and
pch_dma_{save,restore}_regs functions with CONFIG_PM.

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index b1dfba7..8d8fef1 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -693,6 +693,7 @@ static irqreturn_t pd_irq(int irq, void *devid)
 	return ret;
 }
 
+#ifdef	CONFIG_PM
 static void pch_dma_save_regs(struct pch_dma *pd)
 {
 	struct pch_dma_chan *pd_chan;
@@ -770,6 +771,7 @@ static int pch_dma_resume(struct pci_dev *pdev)
 
 	return 0;
 }
+#endif
 
 static int __devinit pch_dma_probe(struct pci_dev *pdev,
 				   const struct pci_device_id *id)
-- 
cgit v0.10.2


From d47effe1be0c4fc983306a9c704632e3a087eed8 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Tue, 1 Mar 2011 17:06:37 +0530
Subject: vhost: Cleanup vhost.c and net.c

Minor cleanup of vhost.c and net.c to match coding style.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f616cef..59dad9f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -60,6 +60,7 @@ static int move_iovec_hdr(struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iov_count) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -79,6 +80,7 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
 {
 	int seg = 0;
 	size_t size;
+
 	while (len && seg < iovcount) {
 		size = min(from->iov_len, len);
 		to->iov_base = from->iov_base;
@@ -296,17 +298,16 @@ static void handle_rx_big(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr hdr = {
 		.flags = 0,
 		.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t len, total_len = 0;
 	int err;
 	size_t hdr_size;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -405,18 +406,17 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
-
 	struct virtio_net_hdr_mrg_rxbuf hdr = {
 		.hdr.flags = 0,
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
-
 	size_t total_len = 0;
 	int err, headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
+
 	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
 		return;
 
@@ -654,6 +654,7 @@ static struct socket *get_raw_socket(int fd)
 	} uaddr;
 	int uaddr_len = sizeof uaddr, r;
 	struct socket *sock = sockfd_lookup(fd, &r);
+
 	if (!sock)
 		return ERR_PTR(-ENOTSOCK);
 
@@ -682,6 +683,7 @@ static struct socket *get_tap_socket(int fd)
 {
 	struct file *file = fget(fd);
 	struct socket *sock;
+
 	if (!file)
 		return ERR_PTR(-EBADF);
 	sock = tun_get_socket(file);
@@ -696,6 +698,7 @@ static struct socket *get_tap_socket(int fd)
 static struct socket *get_socket(int fd)
 {
 	struct socket *sock;
+
 	/* special case to disable backend */
 	if (fd == -1)
 		return NULL;
@@ -741,9 +744,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	oldsock = rcu_dereference_protected(vq->private_data,
 					    lockdep_is_held(&vq->mutex));
 	if (sock != oldsock) {
-                vhost_net_disable_vq(n, vq);
-                rcu_assign_pointer(vq->private_data, sock);
-                vhost_net_enable_vq(n, vq);
+		vhost_net_disable_vq(n, vq);
+		rcu_assign_pointer(vq->private_data, sock);
+		vhost_net_enable_vq(n, vq);
 	}
 
 	mutex_unlock(&vq->mutex);
@@ -768,6 +771,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
 	struct socket *tx_sock = NULL;
 	struct socket *rx_sock = NULL;
 	long err;
+
 	mutex_lock(&n->dev.mutex);
 	err = vhost_dev_check_owner(&n->dev);
 	if (err)
@@ -829,6 +833,7 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 	struct vhost_vring_file backend;
 	u64 features;
 	int r;
+
 	switch (ioctl) {
 	case VHOST_NET_SET_BACKEND:
 		if (copy_from_user(&backend, argp, sizeof backend))
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ade0568..b0cc7f8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -41,8 +41,8 @@ static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
 	struct vhost_poll *poll;
-	poll = container_of(pt, struct vhost_poll, table);
 
+	poll = container_of(pt, struct vhost_poll, table);
 	poll->wqh = wqh;
 	add_wait_queue(wqh, &poll->wait);
 }
@@ -85,6 +85,7 @@ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
 void vhost_poll_start(struct vhost_poll *poll, struct file *file)
 {
 	unsigned long mask;
+
 	mask = file->f_op->poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -101,6 +102,7 @@ static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
 				unsigned seq)
 {
 	int left;
+
 	spin_lock_irq(&dev->work_lock);
 	left = seq - work->done_seq;
 	spin_unlock_irq(&dev->work_lock);
@@ -222,6 +224,7 @@ static int vhost_worker(void *data)
 static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
 					       UIO_MAXIOV, GFP_KERNEL);
@@ -235,6 +238,7 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
 			goto err_nomem;
 	}
 	return 0;
+
 err_nomem:
 	for (; i >= 0; --i) {
 		kfree(dev->vqs[i].indirect);
@@ -247,6 +251,7 @@ err_nomem:
 static void vhost_dev_free_iovecs(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		kfree(dev->vqs[i].indirect);
 		dev->vqs[i].indirect = NULL;
@@ -296,26 +301,28 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 
 struct vhost_attach_cgroups_struct {
-        struct vhost_work work;
-        struct task_struct *owner;
-        int ret;
+	struct vhost_work work;
+	struct task_struct *owner;
+	int ret;
 };
 
 static void vhost_attach_cgroups_work(struct vhost_work *work)
 {
-        struct vhost_attach_cgroups_struct *s;
-        s = container_of(work, struct vhost_attach_cgroups_struct, work);
-        s->ret = cgroup_attach_task_all(s->owner, current);
+	struct vhost_attach_cgroups_struct *s;
+
+	s = container_of(work, struct vhost_attach_cgroups_struct, work);
+	s->ret = cgroup_attach_task_all(s->owner, current);
 }
 
 static int vhost_attach_cgroups(struct vhost_dev *dev)
 {
-        struct vhost_attach_cgroups_struct attach;
-        attach.owner = current;
-        vhost_work_init(&attach.work, vhost_attach_cgroups_work);
-        vhost_work_queue(dev, &attach.work);
-        vhost_work_flush(dev, &attach.work);
-        return attach.ret;
+	struct vhost_attach_cgroups_struct attach;
+
+	attach.owner = current;
+	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
+	vhost_work_queue(dev, &attach.work);
+	vhost_work_flush(dev, &attach.work);
+	return attach.ret;
 }
 
 /* Caller should have device mutex */
@@ -323,11 +330,13 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
 {
 	struct task_struct *worker;
 	int err;
+
 	/* Is there an owner already? */
 	if (dev->mm) {
 		err = -EBUSY;
 		goto err_mm;
 	}
+
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
 	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
@@ -380,6 +389,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
 void vhost_dev_cleanup(struct vhost_dev *dev)
 {
 	int i;
+
 	for (i = 0; i < dev->nvqs; ++i) {
 		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
 			vhost_poll_stop(&dev->vqs[i].poll);
@@ -421,6 +431,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
 {
 	u64 a = addr / VHOST_PAGE_SIZE / 8;
+
 	/* Make sure 64 bit math will not overflow. */
 	if (a > ULONG_MAX - (unsigned long)log_base ||
 	    a + (unsigned long)log_base > ULONG_MAX)
@@ -461,6 +472,7 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
 			    int log_all)
 {
 	int i;
+
 	for (i = 0; i < d->nvqs; ++i) {
 		int ok;
 		mutex_lock(&d->vqs[i].mutex);
@@ -527,6 +539,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 {
 	struct vhost_memory mem, *newmem, *oldmem;
 	unsigned long size = offsetof(struct vhost_memory, regions);
+
 	if (copy_from_user(&mem, m, size))
 		return -EFAULT;
 	if (mem.padding)
@@ -544,7 +557,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
 		return -EFAULT;
 	}
 
-	if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) {
+	if (!memory_access_ok(d, newmem,
+			      vhost_has_feature(d, VHOST_F_LOG_ALL))) {
 		kfree(newmem);
 		return -EFAULT;
 	}
@@ -560,6 +574,7 @@ static int init_used(struct vhost_virtqueue *vq,
 		     struct vring_used __user *used)
 {
 	int r = put_user(vq->used_flags, &used->flags);
+
 	if (r)
 		return r;
 	return get_user(vq->last_used_idx, &used->idx);
@@ -849,6 +864,7 @@ static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
 {
 	struct vhost_memory_region *reg;
 	int i;
+
 	/* linear search is not brilliant, but we really have on the order of 6
 	 * regions in practice */
 	for (i = 0; i < mem->nregions; ++i) {
@@ -871,6 +887,7 @@ static int set_bit_to_user(int nr, void __user *addr)
 	void *base;
 	int bit = nr + (log % PAGE_SIZE) * 8;
 	int r;
+
 	r = get_user_pages_fast(log, 1, 1, &page);
 	if (r < 0)
 		return r;
@@ -888,6 +905,7 @@ static int log_write(void __user *log_base,
 {
 	u64 write_page = write_address / VHOST_PAGE_SIZE;
 	int r;
+
 	if (!write_length)
 		return 0;
 	write_length += write_address % VHOST_PAGE_SIZE;
@@ -1037,8 +1055,8 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, count);
 			return -EINVAL;
 		}
-		if (unlikely(memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
-					      sizeof desc))) {
+		if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
+					      vq->indirect, sizeof desc))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)indirect->addr + i * sizeof desc);
 			return -EINVAL;
@@ -1317,6 +1335,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
 {
 	__u16 flags;
+
 	/* Flush out used index updates. This is paired
 	 * with the barrier that the Guest executes when enabling
 	 * interrupts. */
@@ -1361,6 +1380,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 {
 	u16 avail_idx;
 	int r;
+
 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
 		return false;
 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
@@ -1387,6 +1407,7 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
 void vhost_disable_notify(struct vhost_virtqueue *vq)
 {
 	int r;
+
 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
 		return;
 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
-- 
cgit v0.10.2


From fcc042a2806064ffcaed7a0c5cb710eca0e99108 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 6 Mar 2011 13:33:49 +0200
Subject: vhost: copy_from_user -> __copy_from_user

copy_from_user is pretty high on perf top profile,
replacing it with __copy_from_user helps.
It's also safe because we do access_ok checks during setup.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b0cc7f8..2ab2912 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1171,7 +1171,7 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 			       i, vq->num, head);
 			return -EINVAL;
 		}
-		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		ret = __copy_from_user(&desc, vq->desc + i, sizeof desc);
 		if (unlikely(ret)) {
 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
 			       i, vq->desc + i);
-- 
cgit v0.10.2


From a110f4ef810ee29d810876df725f41d66629733e Mon Sep 17 00:00:00 2001
From: Marek Belisko <marek.belisko@open-nandra.com>
Date: Wed, 9 Mar 2011 21:46:20 +0100
Subject: ASoC: mini2440: Fix uda134x codec problem.

ASoC audio for mini2440 platform in current kenrel doesn't work.
First problem is samsung_asoc_dma device is missing in initialization.
Next problem is with codec. Codec is initialized but never probed
because no platform_device exist for codec driver. It leads to errors
during codec binding to asoc dai. Next problem was platform data which
was passed from board to asoc main driver but not passed to codec when
called codec_soc_probe().

Following patch should fix issues. But not sure if in correct way.
Please review.

Signed-off-by: Marek Belisko <marek.belisko@open-nandra.com>
Acked-by: Liam Girdwood <lrg@ti.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org

diff --git a/arch/arm/mach-s3c2440/mach-mini2440.c b/arch/arm/mach-s3c2440/mach-mini2440.c
index f62bb4c..7c3fb07 100644
--- a/arch/arm/mach-s3c2440/mach-mini2440.c
+++ b/arch/arm/mach-s3c2440/mach-mini2440.c
@@ -506,6 +506,11 @@ static struct i2c_board_info mini2440_i2c_devs[] __initdata = {
 	},
 };
 
+static struct platform_device uda1340_codec = {
+		.name = "uda134x-codec",
+		.id = -1,
+};
+
 static struct platform_device *mini2440_devices[] __initdata = {
 	&s3c_device_ohci,
 	&s3c_device_wdt,
@@ -521,7 +526,9 @@ static struct platform_device *mini2440_devices[] __initdata = {
 	&s3c_device_nand,
 	&s3c_device_sdi,
 	&s3c_device_iis,
+	&uda1340_codec,
 	&mini2440_audio,
+	&samsung_asoc_dma,
 };
 
 static void __init mini2440_map_io(void)
diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c
index e76847a..48ffd40 100644
--- a/sound/soc/codecs/uda134x.c
+++ b/sound/soc/codecs/uda134x.c
@@ -486,7 +486,8 @@ static struct snd_soc_dai_driver uda134x_dai = {
 static int uda134x_soc_probe(struct snd_soc_codec *codec)
 {
 	struct uda134x_priv *uda134x;
-	struct uda134x_platform_data *pd = dev_get_drvdata(codec->card->dev);
+	struct uda134x_platform_data *pd = codec->card->dev->platform_data;
+
 	int ret;
 
 	printk(KERN_INFO "UDA134X SoC Audio Codec\n");
diff --git a/sound/soc/samsung/s3c24xx_uda134x.c b/sound/soc/samsung/s3c24xx_uda134x.c
index 3cb7007..dc9d551 100644
--- a/sound/soc/samsung/s3c24xx_uda134x.c
+++ b/sound/soc/samsung/s3c24xx_uda134x.c
@@ -219,7 +219,7 @@ static struct snd_soc_ops s3c24xx_uda134x_ops = {
 static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = {
 	.name = "UDA134X",
 	.stream_name = "UDA134X",
-	.codec_name = "uda134x-hifi",
+	.codec_name = "uda134x-codec",
 	.codec_dai_name = "uda134x-hifi",
 	.cpu_dai_name = "s3c24xx-iis",
 	.ops = &s3c24xx_uda134x_ops,
@@ -314,6 +314,7 @@ static int s3c24xx_uda134x_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(s3c24xx_uda134x_snd_device,
 			     &snd_soc_s3c24xx_uda134x);
+	platform_device_add_data(s3c24xx_uda134x_snd_device, &s3c24xx_uda134x, sizeof(s3c24xx_uda134x));
 	ret = platform_device_add(s3c24xx_uda134x_snd_device);
 	if (ret) {
 		printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: Unable to add\n");
-- 
cgit v0.10.2


From 1a757fe5d4234293d6a3acccd7196f1386443956 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 25 Feb 2011 11:38:51 -0600
Subject: slub: min_partial needs to be in first cacheline

It is used in unfreeze_slab() which is a performance critical
function.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 8b6e8ae..875df55 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -70,6 +70,7 @@ struct kmem_cache {
 	struct kmem_cache_cpu __percpu *cpu_slab;
 	/* Used for retriving partial slabs etc */
 	unsigned long flags;
+	unsigned long min_partial;
 	int size;		/* The size of an object including meta data */
 	int objsize;		/* The size of an object without meta data */
 	int offset;		/* Free pointer offset. */
@@ -83,7 +84,6 @@ struct kmem_cache {
 	void (*ctor)(void *);
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
-	unsigned long min_partial;
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
 #ifdef CONFIG_SYSFS
-- 
cgit v0.10.2


From d3f661d69a486db0e0e6343b452f45d91b4b3656 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 25 Feb 2011 11:38:52 -0600
Subject: slub: Get rid of slab_free_hook_irq()

The following patch will make the fastpaths lockless and will no longer
require interrupts to be disabled. Calling the free hook with irq disabled
will no longer be possible.

Move the slab_free_hook_irq() logic into slab_free_hook. Only disable
interrupts if the features are selected that require callbacks with
interrupts off and reenable after calls have been made.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index e15aa7f..bae7a5c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -807,14 +807,24 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
 {
 	kmemleak_free_recursive(x, s->flags);
-}
 
-static inline void slab_free_hook_irq(struct kmem_cache *s, void *object)
-{
-	kmemcheck_slab_free(s, object, s->objsize);
-	debug_check_no_locks_freed(object, s->objsize);
-	if (!(s->flags & SLAB_DEBUG_OBJECTS))
-		debug_check_no_obj_freed(object, s->objsize);
+	/*
+	 * Trouble is that we may no longer disable interupts in the fast path
+	 * So in order to make the debug calls that expect irqs to be
+	 * disabled we need to disable interrupts temporarily.
+	 */
+#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
+	{
+		unsigned long flags;
+
+		local_irq_save(flags);
+		kmemcheck_slab_free(s, x, s->objsize);
+		debug_check_no_locks_freed(x, s->objsize);
+		if (!(s->flags & SLAB_DEBUG_OBJECTS))
+			debug_check_no_obj_freed(x, s->objsize);
+		local_irq_restore(flags);
+	}
+#endif
 }
 
 /*
@@ -1101,9 +1111,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 
 static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
 
-static inline void slab_free_hook_irq(struct kmem_cache *s,
-		void *object) {}
-
 #endif /* CONFIG_SLUB_DEBUG */
 
 /*
@@ -1909,8 +1916,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
 	local_irq_save(flags);
 	c = __this_cpu_ptr(s->cpu_slab);
 
-	slab_free_hook_irq(s, x);
-
 	if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
 		set_freepointer(s, object, c->freelist);
 		c->freelist = object;
-- 
cgit v0.10.2


From 8a5ec0ba42c4919e2d8f4c3138cc8b987fdb0b79 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 25 Feb 2011 11:38:54 -0600
Subject: Lockless (and preemptless) fastpaths for slub

Use the this_cpu_cmpxchg_double functionality to implement a lockless
allocation algorithm on arches that support fast this_cpu_ops.

Each of the per cpu pointers is paired with a transaction id that ensures
that updates of the per cpu information can only occur in sequence on
a certain cpu.

A transaction id is a "long" integer that is comprised of an event number
and the cpu number. The event number is incremented for every change to the
per cpu state. This means that the cmpxchg instruction can verify for an
update that nothing interfered and that we are updating the percpu structure
for the processor where we picked up the information and that we are also
currently on that processor when we update the information.

This results in a significant decrease of the overhead in the fastpaths. It
also makes it easy to adopt the fast path for realtime kernels since this
is lockless and does not require the use of the current per cpu area
over the critical section. It is only important that the per cpu area is
current at the beginning of the critical section and at the end.

So there is no need even to disable preemption.

Test results show that the fastpath cycle count is reduced by up to ~ 40%
(alloc/free test goes from ~140 cycles down to ~80). The slowpath for kfree
adds a few cycles.

Sadly this does nothing for the slowpath which is where the main issues with
performance in slub are but the best case performance rises significantly.
(For that see the more complex slub patches that require cmpxchg_double)

Kmalloc: alloc/free test

Before:

10000 times kmalloc(8)/kfree -> 134 cycles
10000 times kmalloc(16)/kfree -> 152 cycles
10000 times kmalloc(32)/kfree -> 144 cycles
10000 times kmalloc(64)/kfree -> 142 cycles
10000 times kmalloc(128)/kfree -> 142 cycles
10000 times kmalloc(256)/kfree -> 132 cycles
10000 times kmalloc(512)/kfree -> 132 cycles
10000 times kmalloc(1024)/kfree -> 135 cycles
10000 times kmalloc(2048)/kfree -> 135 cycles
10000 times kmalloc(4096)/kfree -> 135 cycles
10000 times kmalloc(8192)/kfree -> 144 cycles
10000 times kmalloc(16384)/kfree -> 754 cycles

After:

10000 times kmalloc(8)/kfree -> 78 cycles
10000 times kmalloc(16)/kfree -> 78 cycles
10000 times kmalloc(32)/kfree -> 82 cycles
10000 times kmalloc(64)/kfree -> 88 cycles
10000 times kmalloc(128)/kfree -> 79 cycles
10000 times kmalloc(256)/kfree -> 79 cycles
10000 times kmalloc(512)/kfree -> 85 cycles
10000 times kmalloc(1024)/kfree -> 82 cycles
10000 times kmalloc(2048)/kfree -> 82 cycles
10000 times kmalloc(4096)/kfree -> 85 cycles
10000 times kmalloc(8192)/kfree -> 82 cycles
10000 times kmalloc(16384)/kfree -> 706 cycles

Kmalloc: Repeatedly allocate then free test

Before:

10000 times kmalloc(8) -> 211 cycles kfree -> 113 cycles
10000 times kmalloc(16) -> 174 cycles kfree -> 115 cycles
10000 times kmalloc(32) -> 235 cycles kfree -> 129 cycles
10000 times kmalloc(64) -> 222 cycles kfree -> 120 cycles
10000 times kmalloc(128) -> 343 cycles kfree -> 139 cycles
10000 times kmalloc(256) -> 827 cycles kfree -> 147 cycles
10000 times kmalloc(512) -> 1048 cycles kfree -> 272 cycles
10000 times kmalloc(1024) -> 2043 cycles kfree -> 528 cycles
10000 times kmalloc(2048) -> 4002 cycles kfree -> 571 cycles
10000 times kmalloc(4096) -> 7740 cycles kfree -> 628 cycles
10000 times kmalloc(8192) -> 8062 cycles kfree -> 850 cycles
10000 times kmalloc(16384) -> 8895 cycles kfree -> 1249 cycles

After:

10000 times kmalloc(8) -> 190 cycles kfree -> 129 cycles
10000 times kmalloc(16) -> 76 cycles kfree -> 123 cycles
10000 times kmalloc(32) -> 126 cycles kfree -> 124 cycles
10000 times kmalloc(64) -> 181 cycles kfree -> 128 cycles
10000 times kmalloc(128) -> 310 cycles kfree -> 140 cycles
10000 times kmalloc(256) -> 809 cycles kfree -> 165 cycles
10000 times kmalloc(512) -> 1005 cycles kfree -> 269 cycles
10000 times kmalloc(1024) -> 1999 cycles kfree -> 527 cycles
10000 times kmalloc(2048) -> 3967 cycles kfree -> 570 cycles
10000 times kmalloc(4096) -> 7658 cycles kfree -> 637 cycles
10000 times kmalloc(8192) -> 8111 cycles kfree -> 859 cycles
10000 times kmalloc(16384) -> 8791 cycles kfree -> 1173 cycles

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 875df55..009b002 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -35,7 +35,10 @@ enum stat_item {
 	NR_SLUB_STAT_ITEMS };
 
 struct kmem_cache_cpu {
-	void **freelist;	/* Pointer to first free per cpu object */
+	void **freelist;	/* Pointer to next available object */
+#ifdef CONFIG_CMPXCHG_LOCAL
+	unsigned long tid;	/* Globally unique transaction id */
+#endif
 	struct page *page;	/* The slab from which we are allocating */
 	int node;		/* The node of the page (or -1 for debug) */
 #ifdef CONFIG_SLUB_STATS
diff --git a/mm/slub.c b/mm/slub.c
index bae7a5c..65030c7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1494,6 +1494,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	}
 }
 
+#ifdef CONFIG_CMPXCHG_LOCAL
+#ifdef CONFIG_PREEMPT
+/*
+ * Calculate the next globally unique transaction for disambiguiation
+ * during cmpxchg. The transactions start with the cpu number and are then
+ * incremented by CONFIG_NR_CPUS.
+ */
+#define TID_STEP  roundup_pow_of_two(CONFIG_NR_CPUS)
+#else
+/*
+ * No preemption supported therefore also no need to check for
+ * different cpus.
+ */
+#define TID_STEP 1
+#endif
+
+static inline unsigned long next_tid(unsigned long tid)
+{
+	return tid + TID_STEP;
+}
+
+static inline unsigned int tid_to_cpu(unsigned long tid)
+{
+	return tid % TID_STEP;
+}
+
+static inline unsigned long tid_to_event(unsigned long tid)
+{
+	return tid / TID_STEP;
+}
+
+static inline unsigned int init_tid(int cpu)
+{
+	return cpu;
+}
+
+static inline void note_cmpxchg_failure(const char *n,
+		const struct kmem_cache *s, unsigned long tid)
+{
+#ifdef SLUB_DEBUG_CMPXCHG
+	unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
+
+	printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
+
+#ifdef CONFIG_PREEMPT
+	if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
+		printk("due to cpu change %d -> %d\n",
+			tid_to_cpu(tid), tid_to_cpu(actual_tid));
+	else
+#endif
+	if (tid_to_event(tid) != tid_to_event(actual_tid))
+		printk("due to cpu running other code. Event %ld->%ld\n",
+			tid_to_event(tid), tid_to_event(actual_tid));
+	else
+		printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
+			actual_tid, tid, next_tid(tid));
+#endif
+}
+
+#endif
+
+void init_kmem_cache_cpus(struct kmem_cache *s)
+{
+#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
+#endif
+
+}
 /*
  * Remove the cpu slab
  */
@@ -1525,6 +1596,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 		page->inuse--;
 	}
 	c->page = NULL;
+#ifdef CONFIG_CMPXCHG_LOCAL
+	c->tid = next_tid(c->tid);
+#endif
 	unfreeze_slab(s, page, tail);
 }
 
@@ -1659,6 +1733,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 {
 	void **object;
 	struct page *new;
+#ifdef CONFIG_CMPXCHG_LOCAL
+	unsigned long flags;
+
+	local_irq_save(flags);
+#ifdef CONFIG_PREEMPT
+	/*
+	 * We may have been preempted and rescheduled on a different
+	 * cpu before disabling interrupts. Need to reload cpu area
+	 * pointer.
+	 */
+	c = this_cpu_ptr(s->cpu_slab);
+#endif
+#endif
 
 	/* We handle __GFP_ZERO in the caller */
 	gfpflags &= ~__GFP_ZERO;
@@ -1685,6 +1772,10 @@ load_freelist:
 	c->node = page_to_nid(c->page);
 unlock_out:
 	slab_unlock(c->page);
+#ifdef CONFIG_CMPXCHG_LOCAL
+	c->tid = next_tid(c->tid);
+	local_irq_restore(flags);
+#endif
 	stat(s, ALLOC_SLOWPATH);
 	return object;
 
@@ -1746,23 +1837,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 {
 	void **object;
 	struct kmem_cache_cpu *c;
+#ifdef CONFIG_CMPXCHG_LOCAL
+	unsigned long tid;
+#else
 	unsigned long flags;
+#endif
 
 	if (slab_pre_alloc_hook(s, gfpflags))
 		return NULL;
 
+#ifndef CONFIG_CMPXCHG_LOCAL
 	local_irq_save(flags);
+#else
+redo:
+#endif
+
+	/*
+	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
+	 * enabled. We may switch back and forth between cpus while
+	 * reading from one cpu area. That does not matter as long
+	 * as we end up on the original cpu again when doing the cmpxchg.
+	 */
 	c = __this_cpu_ptr(s->cpu_slab);
+
+#ifdef CONFIG_CMPXCHG_LOCAL
+	/*
+	 * The transaction ids are globally unique per cpu and per operation on
+	 * a per cpu queue. Thus they can be guarantee that the cmpxchg_double
+	 * occurs on the right processor and that there was no operation on the
+	 * linked list in between.
+	 */
+	tid = c->tid;
+	barrier();
+#endif
+
 	object = c->freelist;
 	if (unlikely(!object || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
+#ifdef CONFIG_CMPXCHG_LOCAL
+		/*
+		 * The cmpxchg will only match if there was no additonal
+		 * operation and if we are on the right processor.
+		 *
+		 * The cmpxchg does the following atomically (without lock semantics!)
+		 * 1. Relocate first pointer to the current per cpu area.
+		 * 2. Verify that tid and freelist have not been changed
+		 * 3. If they were not changed replace tid and freelist
+		 *
+		 * Since this is without lock semantics the protection is only against
+		 * code executing on this cpu *not* from access by other cpus.
+		 */
+		if (unlikely(!this_cpu_cmpxchg_double(
+				s->cpu_slab->freelist, s->cpu_slab->tid,
+				object, tid,
+				get_freepointer(s, object), next_tid(tid)))) {
+
+			note_cmpxchg_failure("slab_alloc", s, tid);
+			goto redo;
+		}
+#else
 		c->freelist = get_freepointer(s, object);
+#endif
 		stat(s, ALLOC_FASTPATH);
 	}
+
+#ifndef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
+#endif
 
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->objsize);
@@ -1840,9 +1984,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 {
 	void *prior;
 	void **object = (void *)x;
+#ifdef CONFIG_CMPXCHG_LOCAL
+	unsigned long flags;
 
-	stat(s, FREE_SLOWPATH);
+	local_irq_save(flags);
+#endif
 	slab_lock(page);
+	stat(s, FREE_SLOWPATH);
 
 	if (kmem_cache_debug(s))
 		goto debug;
@@ -1872,6 +2020,9 @@ checks_ok:
 
 out_unlock:
 	slab_unlock(page);
+#ifdef CONFIG_CMPXCHG_LOCAL
+	local_irq_restore(flags);
+#endif
 	return;
 
 slab_empty:
@@ -1883,6 +2034,9 @@ slab_empty:
 		stat(s, FREE_REMOVE_PARTIAL);
 	}
 	slab_unlock(page);
+#ifdef CONFIG_CMPXCHG_LOCAL
+	local_irq_restore(flags);
+#endif
 	stat(s, FREE_SLAB);
 	discard_slab(s, page);
 	return;
@@ -1909,21 +2063,54 @@ static __always_inline void slab_free(struct kmem_cache *s,
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
+#ifdef CONFIG_CMPXCHG_LOCAL
+	unsigned long tid;
+#else
 	unsigned long flags;
+#endif
 
 	slab_free_hook(s, x);
 
+#ifndef CONFIG_CMPXCHG_LOCAL
 	local_irq_save(flags);
+#endif
+
+redo:
+	/*
+	 * Determine the currently cpus per cpu slab.
+	 * The cpu may change afterward. However that does not matter since
+	 * data is retrieved via this pointer. If we are on the same cpu
+	 * during the cmpxchg then the free will succedd.
+	 */
 	c = __this_cpu_ptr(s->cpu_slab);
 
+#ifdef CONFIG_CMPXCHG_LOCAL
+	tid = c->tid;
+	barrier();
+#endif
+
 	if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
 		set_freepointer(s, object, c->freelist);
+
+#ifdef CONFIG_CMPXCHG_LOCAL
+		if (unlikely(!this_cpu_cmpxchg_double(
+				s->cpu_slab->freelist, s->cpu_slab->tid,
+				c->freelist, tid,
+				object, next_tid(tid)))) {
+
+			note_cmpxchg_failure("slab_free", s, tid);
+			goto redo;
+		}
+#else
 		c->freelist = object;
+#endif
 		stat(s, FREE_FASTPATH);
 	} else
 		__slab_free(s, page, x, addr);
 
+#ifndef CONFIG_CMPXCHG_LOCAL
 	local_irq_restore(flags);
+#endif
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2115,9 +2302,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
 			SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
 
+#ifdef CONFIG_CMPXCHG_LOCAL
+	/*
+	 * Must align to double word boundary for the double cmpxchg instructions
+	 * to work.
+	 */
+	s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
+#else
+	/* Regular alignment is sufficient */
 	s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
+#endif
+
+	if (!s->cpu_slab)
+		return 0;
+
+	init_kmem_cache_cpus(s);
 
-	return s->cpu_slab != NULL;
+	return 1;
 }
 
 static struct kmem_cache *kmem_cache_node;
-- 
cgit v0.10.2


From ab9a0f196f2f4f080df54402493ea3dc31b5243e Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 10 Mar 2011 15:21:48 +0800
Subject: slub: automatically reserve bytes at the end of slab

There is no "struct" for slub's slab, it shares with struct page.
But struct page is very small, it is insufficient when we need
to add some metadata for slab.

So we add a field "reserved" to struct kmem_cache, when a slab
is allocated, kmem_cache->reserved bytes are automatically reserved
at the end of the slab for slab's metadata.

Changed from v1:
	Export the reserved field via sysfs

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 8b6e8ae..ae0093c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -83,6 +83,7 @@ struct kmem_cache {
 	void (*ctor)(void *);
 	int inuse;		/* Offset to metadata */
 	int align;		/* Alignment */
+	int reserved;		/* Reserved bytes at the end of slabs */
 	unsigned long min_partial;
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slab caches */
diff --git a/mm/slub.c b/mm/slub.c
index e15aa7f..d3d1767 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -281,11 +281,16 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
 	return (p - addr) / s->size;
 }
 
+static inline int order_objects(int order, unsigned long size, int reserved)
+{
+	return ((PAGE_SIZE << order) - reserved) / size;
+}
+
 static inline struct kmem_cache_order_objects oo_make(int order,
-						unsigned long size)
+		unsigned long size, int reserved)
 {
 	struct kmem_cache_order_objects x = {
-		(order << OO_SHIFT) + (PAGE_SIZE << order) / size
+		(order << OO_SHIFT) + order_objects(order, size, reserved)
 	};
 
 	return x;
@@ -617,7 +622,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
 		return 1;
 
 	start = page_address(page);
-	length = (PAGE_SIZE << compound_order(page));
+	length = (PAGE_SIZE << compound_order(page)) - s->reserved;
 	end = start + length;
 	remainder = length % s->size;
 	if (!remainder)
@@ -698,7 +703,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
 		return 0;
 	}
 
-	maxobj = (PAGE_SIZE << compound_order(page)) / s->size;
+	maxobj = order_objects(compound_order(page), s->size, s->reserved);
 	if (page->objects > maxobj) {
 		slab_err(s, page, "objects %u > max %u",
 			s->name, page->objects, maxobj);
@@ -748,7 +753,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 		nr++;
 	}
 
-	max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
+	max_objects = order_objects(compound_order(page), s->size, s->reserved);
 	if (max_objects > MAX_OBJS_PER_PAGE)
 		max_objects = MAX_OBJS_PER_PAGE;
 
@@ -1988,13 +1993,13 @@ static int slub_nomerge;
  * the smallest order which will fit the object.
  */
 static inline int slab_order(int size, int min_objects,
-				int max_order, int fract_leftover)
+				int max_order, int fract_leftover, int reserved)
 {
 	int order;
 	int rem;
 	int min_order = slub_min_order;
 
-	if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
 		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
 
 	for (order = max(min_order,
@@ -2003,10 +2008,10 @@ static inline int slab_order(int size, int min_objects,
 
 		unsigned long slab_size = PAGE_SIZE << order;
 
-		if (slab_size < min_objects * size)
+		if (slab_size < min_objects * size + reserved)
 			continue;
 
-		rem = slab_size % size;
+		rem = (slab_size - reserved) % size;
 
 		if (rem <= slab_size / fract_leftover)
 			break;
@@ -2016,7 +2021,7 @@ static inline int slab_order(int size, int min_objects,
 	return order;
 }
 
-static inline int calculate_order(int size)
+static inline int calculate_order(int size, int reserved)
 {
 	int order;
 	int min_objects;
@@ -2034,14 +2039,14 @@ static inline int calculate_order(int size)
 	min_objects = slub_min_objects;
 	if (!min_objects)
 		min_objects = 4 * (fls(nr_cpu_ids) + 1);
-	max_objects = (PAGE_SIZE << slub_max_order)/size;
+	max_objects = order_objects(slub_max_order, size, reserved);
 	min_objects = min(min_objects, max_objects);
 
 	while (min_objects > 1) {
 		fraction = 16;
 		while (fraction >= 4) {
 			order = slab_order(size, min_objects,
-						slub_max_order, fraction);
+					slub_max_order, fraction, reserved);
 			if (order <= slub_max_order)
 				return order;
 			fraction /= 2;
@@ -2053,14 +2058,14 @@ static inline int calculate_order(int size)
 	 * We were unable to place multiple objects in a slab. Now
 	 * lets see if we can place a single object there.
 	 */
-	order = slab_order(size, 1, slub_max_order, 1);
+	order = slab_order(size, 1, slub_max_order, 1, reserved);
 	if (order <= slub_max_order)
 		return order;
 
 	/*
 	 * Doh this slab cannot be placed using slub_max_order.
 	 */
-	order = slab_order(size, 1, MAX_ORDER, 1);
+	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
 	if (order < MAX_ORDER)
 		return order;
 	return -ENOSYS;
@@ -2311,7 +2316,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	if (forced_order >= 0)
 		order = forced_order;
 	else
-		order = calculate_order(size);
+		order = calculate_order(size, s->reserved);
 
 	if (order < 0)
 		return 0;
@@ -2329,8 +2334,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	/*
 	 * Determine the number of objects per slab
 	 */
-	s->oo = oo_make(order, size);
-	s->min = oo_make(get_order(size), size);
+	s->oo = oo_make(order, size, s->reserved);
+	s->min = oo_make(get_order(size), size, s->reserved);
 	if (oo_objects(s->oo) > oo_objects(s->max))
 		s->max = s->oo;
 
@@ -2349,6 +2354,7 @@ static int kmem_cache_open(struct kmem_cache *s,
 	s->objsize = size;
 	s->align = align;
 	s->flags = kmem_cache_flags(size, flags, name, ctor);
+	s->reserved = 0;
 
 	if (!calculate_sizes(s, -1))
 		goto error;
@@ -4017,6 +4023,12 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
 }
 SLAB_ATTR_RO(destroy_by_rcu);
 
+static ssize_t reserved_show(struct kmem_cache *s, char *buf)
+{
+	return sprintf(buf, "%d\n", s->reserved);
+}
+SLAB_ATTR_RO(reserved);
+
 #ifdef CONFIG_SLUB_DEBUG
 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
 {
@@ -4303,6 +4315,7 @@ static struct attribute *slab_attrs[] = {
 	&reclaim_account_attr.attr,
 	&destroy_by_rcu_attr.attr,
 	&shrink_attr.attr,
+	&reserved_attr.attr,
 #ifdef CONFIG_SLUB_DEBUG
 	&total_objects_attr.attr,
 	&slabs_attr.attr,
-- 
cgit v0.10.2


From da9a638c6f8fc0633fa94a334f1c053f5e307177 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 10 Mar 2011 15:22:00 +0800
Subject: slub,rcu: don't assume the size of struct rcu_head

The size of struct rcu_head may be changed. When it becomes larger,
it will pollute the page array.

We reserve some some bytes for struct rcu_head when a slab
is allocated in this situation.

Changed from V1:
	use VM_BUG_ON instead BUG_ON

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index d3d1767..ebba3eb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1254,21 +1254,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	__free_pages(page, order);
 }
 
+#define need_reserve_slab_rcu						\
+	(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
+
 static void rcu_free_slab(struct rcu_head *h)
 {
 	struct page *page;
 
-	page = container_of((struct list_head *)h, struct page, lru);
+	if (need_reserve_slab_rcu)
+		page = virt_to_head_page(h);
+	else
+		page = container_of((struct list_head *)h, struct page, lru);
+
 	__free_slab(page->slab, page);
 }
 
 static void free_slab(struct kmem_cache *s, struct page *page)
 {
 	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
-		/*
-		 * RCU free overloads the RCU head over the LRU
-		 */
-		struct rcu_head *head = (void *)&page->lru;
+		struct rcu_head *head;
+
+		if (need_reserve_slab_rcu) {
+			int order = compound_order(page);
+			int offset = (PAGE_SIZE << order) - s->reserved;
+
+			VM_BUG_ON(s->reserved != sizeof(*head));
+			head = page_address(page) + offset;
+		} else {
+			/*
+			 * RCU free overloads the RCU head over the LRU
+			 */
+			head = (void *)&page->lru;
+		}
 
 		call_rcu(head, rcu_free_slab);
 	} else
@@ -2356,6 +2373,9 @@ static int kmem_cache_open(struct kmem_cache *s,
 	s->flags = kmem_cache_flags(size, flags, name, ctor);
 	s->reserved = 0;
 
+	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
+		s->reserved = sizeof(struct rcu_head);
+
 	if (!calculate_sizes(s, -1))
 		goto error;
 	if (disable_higher_order_debug) {
-- 
cgit v0.10.2


From 5bfe53a77e8a3ffce4a10003c75f464a138e272d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 10 Mar 2011 15:22:24 +0800
Subject: slab,rcu: don't assume the size of struct rcu_head

The size of struct rcu_head may be changed. When it becomes larger,
it may pollute the data after struct slab.

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slab.c b/mm/slab.c
index 37961d1..52cf0b4 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,22 +191,6 @@ typedef unsigned int kmem_bufctl_t;
 #define	SLAB_LIMIT	(((kmem_bufctl_t)(~0U))-3)
 
 /*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
- */
-struct slab {
-	struct list_head list;
-	unsigned long colouroff;
-	void *s_mem;		/* including colour offset */
-	unsigned int inuse;	/* num of objs active in slab */
-	kmem_bufctl_t free;
-	unsigned short nodeid;
-};
-
-/*
  * struct slab_rcu
  *
  * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
@@ -219,8 +203,6 @@ struct slab {
  *
  * rcu_read_lock before reading the address, then rcu_read_unlock after
  * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
  */
 struct slab_rcu {
 	struct rcu_head head;
@@ -229,6 +211,27 @@ struct slab_rcu {
 };
 
 /*
+ * struct slab
+ *
+ * Manages the objs in a slab. Placed either at the beginning of mem allocated
+ * for a slab, or allocated from an general cache.
+ * Slabs are chained into three list: fully used, partial, fully free slabs.
+ */
+struct slab {
+	union {
+		struct {
+			struct list_head list;
+			unsigned long colouroff;
+			void *s_mem;		/* including colour offset */
+			unsigned int inuse;	/* num of objs active in slab */
+			kmem_bufctl_t free;
+			unsigned short nodeid;
+		};
+		struct slab_rcu __slab_cover_slab_rcu;
+	};
+};
+
+/*
  * struct array_cache
  *
  * Purpose:
-- 
cgit v0.10.2


From b203bd3f6b9c3db3b1979c2ff79bb2b9be8f03a3 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:53 +0000
Subject: dmatest: fix automatic buffer unmap type

The dmatest code relies on the DMAEngine API to automatically call
dma_unmap_single() on src buffers. The flags it passes are incorrect,
fix them.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 5589358..7e1b0aa 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -285,7 +285,12 @@ static int dmatest_func(void *data)
 
 	set_user_nice(current, 10);
 
-	flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
+	/*
+	 * src buffers are freed by the DMAEngine code with dma_unmap_single()
+	 * dst buffers are freed by ourselves below
+	 */
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
+	      | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
 
 	while (!kthread_should_stop()
 	       && !(iterations && total_tests >= iterations)) {
-- 
cgit v0.10.2


From e8bd84df27c5921a9ac866aef06e044590ac118f Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:54 +0000
Subject: fsldma: move related helper functions near each other

This is a purely cosmetic cleanup. It is nice to have related functions
right next to each other in the code.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 4de947a..2e1af45 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -39,33 +39,9 @@
 
 static const char msg_ld_oom[] = "No free memory for link descriptor\n";
 
-static void dma_init(struct fsldma_chan *chan)
-{
-	/* Reset the channel */
-	DMA_OUT(chan, &chan->regs->mr, 0, 32);
-
-	switch (chan->feature & FSL_DMA_IP_MASK) {
-	case FSL_DMA_IP_85XX:
-		/* Set the channel to below modes:
-		 * EIE - Error interrupt enable
-		 * EOSIE - End of segments interrupt enable (basic mode)
-		 * EOLNIE - End of links interrupt enable
-		 * BWC - Bandwidth sharing among channels
-		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
-				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE
-				| FSL_DMA_MR_EOSIE, 32);
-		break;
-	case FSL_DMA_IP_83XX:
-		/* Set the channel to below modes:
-		 * EOTIE - End-of-transfer interrupt enable
-		 * PRC_RM - PCI read multiple
-		 */
-		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
-				| FSL_DMA_MR_PRC_RM, 32);
-		break;
-	}
-}
+/*
+ * Register Helpers
+ */
 
 static void set_sr(struct fsldma_chan *chan, u32 val)
 {
@@ -77,6 +53,30 @@ static u32 get_sr(struct fsldma_chan *chan)
 	return DMA_IN(chan, &chan->regs->sr, 32);
 }
 
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+{
+	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static dma_addr_t get_ndar(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->ndar, 64);
+}
+
+static u32 get_bcr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->bcr, 32);
+}
+
+/*
+ * Descriptor Helpers
+ */
+
 static void set_desc_cnt(struct fsldma_chan *chan,
 				struct fsl_dma_ld_hw *hw, u32 count)
 {
@@ -113,24 +113,49 @@ static void set_desc_next(struct fsldma_chan *chan,
 	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
 }
 
-static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+static void set_ld_eol(struct fsldma_chan *chan,
+			struct fsl_desc_sw *desc)
 {
-	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
-}
+	u64 snoop_bits;
 
-static dma_addr_t get_cdar(struct fsldma_chan *chan)
-{
-	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
-}
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
 
-static dma_addr_t get_ndar(struct fsldma_chan *chan)
-{
-	return DMA_IN(chan, &chan->regs->ndar, 64);
+	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+			| snoop_bits, 64);
 }
 
-static u32 get_bcr(struct fsldma_chan *chan)
+/*
+ * DMA Engine Hardware Control Helpers
+ */
+
+static void dma_init(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->bcr, 32);
+	/* Reset the channel */
+	DMA_OUT(chan, &chan->regs->mr, 0, 32);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		/* Set the channel to below modes:
+		 * EIE - Error interrupt enable
+		 * EOSIE - End of segments interrupt enable (basic mode)
+		 * EOLNIE - End of links interrupt enable
+		 * BWC - Bandwidth sharing among channels
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
+				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE
+				| FSL_DMA_MR_EOSIE, 32);
+		break;
+	case FSL_DMA_IP_83XX:
+		/* Set the channel to below modes:
+		 * EOTIE - End-of-transfer interrupt enable
+		 * PRC_RM - PCI read multiple
+		 */
+		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_EOTIE
+				| FSL_DMA_MR_PRC_RM, 32);
+		break;
+	}
 }
 
 static int dma_is_idle(struct fsldma_chan *chan)
@@ -185,19 +210,6 @@ static void dma_halt(struct fsldma_chan *chan)
 		dev_err(chan->dev, "DMA halt timeout!\n");
 }
 
-static void set_ld_eol(struct fsldma_chan *chan,
-			struct fsl_desc_sw *desc)
-{
-	u64 snoop_bits;
-
-	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
-		? FSL_DMA_SNEN : 0;
-
-	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
-		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
-			| snoop_bits, 64);
-}
-
 /**
  * fsl_chan_set_src_loop_size - Set source address hold transfer size
  * @chan : Freescale DMA channel
-- 
cgit v0.10.2


From b158471ef63bf399165db96e945a828096502d9d Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:55 +0000
Subject: fsldma: use channel name in printk output

This makes debugging the driver much easier when multiple channels are
running concurrently. In addition, you can see how much descriptor
memory each channel has allocated via the dmapool API in sysfs.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 2e1af45..e535cd1 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -37,7 +37,12 @@
 
 #include "fsldma.h"
 
-static const char msg_ld_oom[] = "No free memory for link descriptor\n";
+#define chan_dbg(chan, fmt, arg...)					\
+	dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)					\
+	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+static const char msg_ld_oom[] = "No free memory for link descriptor";
 
 /*
  * Register Helpers
@@ -207,7 +212,7 @@ static void dma_halt(struct fsldma_chan *chan)
 	}
 
 	if (!dma_is_idle(chan))
-		dev_err(chan->dev, "DMA halt timeout!\n");
+		chan_err(chan, "DMA halt timeout!\n");
 }
 
 /**
@@ -405,7 +410,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
 
 	desc = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
 	if (!desc) {
-		dev_dbg(chan->dev, "out of memory for link desc\n");
+		chan_dbg(chan, "out of memory for link descriptor\n");
 		return NULL;
 	}
 
@@ -439,13 +444,11 @@ static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
 	 * We need the descriptor to be aligned to 32bytes
 	 * for meeting FSL DMA specification requirement.
 	 */
-	chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
-					  chan->dev,
+	chan->desc_pool = dma_pool_create(chan->name, chan->dev,
 					  sizeof(struct fsl_desc_sw),
 					  __alignof__(struct fsl_desc_sw), 0);
 	if (!chan->desc_pool) {
-		dev_err(chan->dev, "unable to allocate channel %d "
-				   "descriptor pool\n", chan->id);
+		chan_err(chan, "unable to allocate descriptor pool\n");
 		return -ENOMEM;
 	}
 
@@ -491,7 +494,7 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
 	unsigned long flags;
 
-	dev_dbg(chan->dev, "Free all channel resources.\n");
+	chan_dbg(chan, "free all channel resources\n");
 	spin_lock_irqsave(&chan->desc_lock, flags);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
@@ -514,7 +517,7 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 
 	new = fsl_dma_alloc_descriptor(chan);
 	if (!new) {
-		dev_err(chan->dev, msg_ld_oom);
+		chan_err(chan, "%s\n", msg_ld_oom);
 		return NULL;
 	}
 
@@ -551,11 +554,11 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 		/* Allocate the link descriptor from DMA pool */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev, msg_ld_oom);
+			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
+		chan_dbg(chan, "new link desc alloc %p\n", new);
 #endif
 
 		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
@@ -639,11 +642,11 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
 		/* allocate and populate the descriptor */
 		new = fsl_dma_alloc_descriptor(chan);
 		if (!new) {
-			dev_err(chan->dev, msg_ld_oom);
+			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
 #ifdef FSL_DMA_LD_DEBUG
-		dev_dbg(chan->dev, "new link desc alloc %p\n", new);
+		chan_dbg(chan, "new link desc alloc %p\n", new);
 #endif
 
 		set_desc_cnt(chan, &new->hw, len);
@@ -815,7 +818,7 @@ static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
 	if (list_empty(&chan->ld_running)) {
-		dev_dbg(chan->dev, "no running descriptors\n");
+		chan_dbg(chan, "no running descriptors\n");
 		goto out_unlock;
 	}
 
@@ -863,7 +866,7 @@ static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	dev_dbg(chan->dev, "chan completed_cookie = %d\n", chan->completed_cookie);
+	chan_dbg(chan, "chan completed_cookie = %d\n", chan->completed_cookie);
 	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
 		dma_async_tx_callback callback;
 		void *callback_param;
@@ -879,7 +882,7 @@ static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 		callback_param = desc->async_tx.callback_param;
 		if (callback) {
 			spin_unlock_irqrestore(&chan->desc_lock, flags);
-			dev_dbg(chan->dev, "LD %p callback\n", desc);
+			chan_dbg(chan, "LD %p callback\n", desc);
 			callback(callback_param);
 			spin_lock_irqsave(&chan->desc_lock, flags);
 		}
@@ -913,7 +916,7 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 * don't need to do any work at all
 	 */
 	if (list_empty(&chan->ld_pending)) {
-		dev_dbg(chan->dev, "no pending LDs\n");
+		chan_dbg(chan, "no pending LDs\n");
 		goto out_unlock;
 	}
 
@@ -923,7 +926,7 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 * at the end of the current transaction
 	 */
 	if (!dma_is_idle(chan)) {
-		dev_dbg(chan->dev, "DMA controller still busy\n");
+		chan_dbg(chan, "DMA controller still busy\n");
 		goto out_unlock;
 	}
 
@@ -1003,14 +1006,14 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	/* save and clear the status register */
 	stat = get_sr(chan);
 	set_sr(chan, stat);
-	dev_dbg(chan->dev, "irq: channel %d, stat = 0x%x\n", chan->id, stat);
+	chan_dbg(chan, "irq: stat = 0x%x\n", stat);
 
 	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
 	if (!stat)
 		return IRQ_NONE;
 
 	if (stat & FSL_DMA_SR_TE)
-		dev_err(chan->dev, "Transfer Error!\n");
+		chan_err(chan, "Transfer Error!\n");
 
 	/*
 	 * Programming Error
@@ -1018,7 +1021,7 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * triger a PE interrupt.
 	 */
 	if (stat & FSL_DMA_SR_PE) {
-		dev_dbg(chan->dev, "irq: Programming Error INT\n");
+		chan_dbg(chan, "irq: Programming Error INT\n");
 		if (get_bcr(chan) == 0) {
 			/* BCR register is 0, this is a DMA_INTERRUPT async_tx.
 			 * Now, update the completed cookie, and continue the
@@ -1035,8 +1038,8 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * we will recycle the used descriptor.
 	 */
 	if (stat & FSL_DMA_SR_EOSI) {
-		dev_dbg(chan->dev, "irq: End-of-segments INT\n");
-		dev_dbg(chan->dev, "irq: clndar 0x%llx, nlndar 0x%llx\n",
+		chan_dbg(chan, "irq: End-of-segments INT\n");
+		chan_dbg(chan, "irq: clndar 0x%llx, nlndar 0x%llx\n",
 			(unsigned long long)get_cdar(chan),
 			(unsigned long long)get_ndar(chan));
 		stat &= ~FSL_DMA_SR_EOSI;
@@ -1048,7 +1051,7 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * and start the next transfer if it exist.
 	 */
 	if (stat & FSL_DMA_SR_EOCDI) {
-		dev_dbg(chan->dev, "irq: End-of-Chain link INT\n");
+		chan_dbg(chan, "irq: End-of-Chain link INT\n");
 		stat &= ~FSL_DMA_SR_EOCDI;
 		update_cookie = 1;
 		xfer_ld_q = 1;
@@ -1060,7 +1063,7 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 * prepare next transfer.
 	 */
 	if (stat & FSL_DMA_SR_EOLNI) {
-		dev_dbg(chan->dev, "irq: End-of-link INT\n");
+		chan_dbg(chan, "irq: End-of-link INT\n");
 		stat &= ~FSL_DMA_SR_EOLNI;
 		xfer_ld_q = 1;
 	}
@@ -1070,9 +1073,9 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	if (xfer_ld_q)
 		fsl_chan_xfer_ld_queue(chan);
 	if (stat)
-		dev_dbg(chan->dev, "irq: unhandled sr 0x%02x\n", stat);
+		chan_dbg(chan, "irq: unhandled sr 0x%08x\n", stat);
 
-	dev_dbg(chan->dev, "irq: Exit\n");
+	chan_dbg(chan, "irq: Exit\n");
 	tasklet_schedule(&chan->tasklet);
 	return IRQ_HANDLED;
 }
@@ -1128,7 +1131,7 @@ static void fsldma_free_irqs(struct fsldma_device *fdev)
 	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
 		chan = fdev->chan[i];
 		if (chan && chan->irq != NO_IRQ) {
-			dev_dbg(fdev->dev, "free channel %d IRQ\n", chan->id);
+			chan_dbg(chan, "free per-channel IRQ\n");
 			free_irq(chan->irq, chan);
 		}
 	}
@@ -1155,19 +1158,16 @@ static int fsldma_request_irqs(struct fsldma_device *fdev)
 			continue;
 
 		if (chan->irq == NO_IRQ) {
-			dev_err(fdev->dev, "no interrupts property defined for "
-					   "DMA channel %d. Please fix your "
-					   "device tree\n", chan->id);
+			chan_err(chan, "interrupts property missing in device tree\n");
 			ret = -ENODEV;
 			goto out_unwind;
 		}
 
-		dev_dbg(fdev->dev, "request channel %d IRQ\n", chan->id);
+		chan_dbg(chan, "request per-channel IRQ\n");
 		ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
 				  "fsldma-chan", chan);
 		if (ret) {
-			dev_err(fdev->dev, "unable to request IRQ for DMA "
-					   "channel %d\n", chan->id);
+			chan_err(chan, "unable to request per-channel IRQ\n");
 			goto out_unwind;
 		}
 	}
@@ -1242,6 +1242,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 
 	fdev->chan[chan->id] = chan;
 	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id);
 
 	/* Initialize the channel */
 	dma_init(chan);
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index ba9f403..113e713 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -135,6 +135,7 @@ struct fsldma_device {
 #define FSL_DMA_CHAN_START_EXT	0x00002000
 
 struct fsldma_chan {
+	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
 	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
 	spinlock_t desc_lock;		/* Descriptor operation lock */
-- 
cgit v0.10.2


From 0ab09c36818ca88f65c88f4d8c6d067fbf10578d Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:56 +0000
Subject: fsldma: improve link descriptor debugging

This adds better tracking to link descriptor allocations, callbacks, and
frees. This makes it much easier to track errors with link descriptors.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e535cd1..82b8e9f 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -420,6 +420,10 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
 	desc->async_tx.tx_submit = fsl_dma_tx_submit;
 	desc->async_tx.phys = pdesc;
 
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p allocated\n", desc);
+#endif
+
 	return desc;
 }
 
@@ -470,6 +474,9 @@ static void fsldma_free_desc_list(struct fsldma_chan *chan,
 
 	list_for_each_entry_safe(desc, _desc, list, node) {
 		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 }
@@ -481,6 +488,9 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
 
 	list_for_each_entry_safe_reverse(desc, _desc, list, node) {
 		list_del(&desc->node);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 }
@@ -557,9 +567,6 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "new link desc alloc %p\n", new);
-#endif
 
 		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
 
@@ -645,9 +652,6 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_sg(struct dma_chan *dchan,
 			chan_err(chan, "%s\n", msg_ld_oom);
 			goto fail;
 		}
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "new link desc alloc %p\n", new);
-#endif
 
 		set_desc_cnt(chan, &new->hw, len);
 		set_desc_src(chan, &new->hw, src);
@@ -882,13 +886,18 @@ static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 		callback_param = desc->async_tx.callback_param;
 		if (callback) {
 			spin_unlock_irqrestore(&chan->desc_lock, flags);
+#ifdef FSL_DMA_LD_DEBUG
 			chan_dbg(chan, "LD %p callback\n", desc);
+#endif
 			callback(callback_param);
 			spin_lock_irqsave(&chan->desc_lock, flags);
 		}
 
 		/* Run any dependencies, then free the descriptor */
 		dma_run_dependencies(&desc->async_tx);
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p free\n", desc);
+#endif
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 
-- 
cgit v0.10.2


From 31f4306c83a2daa3e348056b720de511bffe5a9b Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:57 +0000
Subject: fsldma: minor codingstyle and consistency fixes

This fixes some minor violations of the coding style. It also changes
the style of the device_prep_dma_*() function definitions so they are
identical.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 82b8e9f..5da1a4a 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -89,7 +89,7 @@ static void set_desc_cnt(struct fsldma_chan *chan,
 }
 
 static void set_desc_src(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t src)
+			 struct fsl_dma_ld_hw *hw, dma_addr_t src)
 {
 	u64 snoop_bits;
 
@@ -99,7 +99,7 @@ static void set_desc_src(struct fsldma_chan *chan,
 }
 
 static void set_desc_dst(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t dst)
+			 struct fsl_dma_ld_hw *hw, dma_addr_t dst)
 {
 	u64 snoop_bits;
 
@@ -109,7 +109,7 @@ static void set_desc_dst(struct fsldma_chan *chan,
 }
 
 static void set_desc_next(struct fsldma_chan *chan,
-				struct fsl_dma_ld_hw *hw, dma_addr_t next)
+			  struct fsl_dma_ld_hw *hw, dma_addr_t next)
 {
 	u64 snoop_bits;
 
@@ -118,8 +118,7 @@ static void set_desc_next(struct fsldma_chan *chan,
 	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
 }
 
-static void set_ld_eol(struct fsldma_chan *chan,
-			struct fsl_desc_sw *desc)
+static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
 	u64 snoop_bits;
 
@@ -338,8 +337,7 @@ static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
 		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
 }
 
-static void append_ld_queue(struct fsldma_chan *chan,
-			    struct fsl_desc_sw *desc)
+static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
 	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
 
@@ -380,8 +378,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	cookie = chan->common.cookie;
 	list_for_each_entry(child, &desc->tx_list, node) {
 		cookie++;
-		if (cookie < 0)
-			cookie = 1;
+		if (cookie < DMA_MIN_COOKIE)
+			cookie = DMA_MIN_COOKIE;
 
 		child->async_tx.cookie = cookie;
 	}
@@ -402,8 +400,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
  *
  * Return - The descriptor allocated. NULL for failed.
  */
-static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
-					struct fsldma_chan *chan)
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc;
 	dma_addr_t pdesc;
@@ -427,7 +424,6 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
 	return desc;
 }
 
-
 /**
  * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
  * @chan : Freescale DMA channel
@@ -537,14 +533,15 @@ fsl_dma_prep_interrupt(struct dma_chan *dchan, unsigned long flags)
 	/* Insert the link descriptor to the LD ring */
 	list_add_tail(&new->node, &new->tx_list);
 
-	/* Set End-of-link to the last link descriptor of new list*/
+	/* Set End-of-link to the last link descriptor of new list */
 	set_ld_eol(chan, new);
 
 	return &new->async_tx;
 }
 
-static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
-	struct dma_chan *dchan, dma_addr_t dma_dst, dma_addr_t dma_src,
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
 	size_t len, unsigned long flags)
 {
 	struct fsldma_chan *chan;
@@ -594,7 +591,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
 	new->async_tx.flags = flags; /* client is in control of this ack */
 	new->async_tx.cookie = -EBUSY;
 
-	/* Set End-of-link to the last link descriptor of new list*/
+	/* Set End-of-link to the last link descriptor of new list */
 	set_ld_eol(chan, new);
 
 	return &first->async_tx;
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 113e713..49189da 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -102,8 +102,8 @@ struct fsl_desc_sw {
 } __attribute__((aligned(32)));
 
 struct fsldma_chan_regs {
-	u32 mr;	/* 0x00 - Mode Register */
-	u32 sr;	/* 0x04 - Status Register */
+	u32 mr;		/* 0x00 - Mode Register */
+	u32 sr;		/* 0x04 - Status Register */
 	u64 cdar;	/* 0x08 - Current descriptor address register */
 	u64 sar;	/* 0x10 - Source Address Register */
 	u64 dar;	/* 0x18 - Destination Address Register */
-- 
cgit v0.10.2


From f04cd40701deace2efb9edd7120e59366bda2118 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:58 +0000
Subject: fsldma: fix controller lockups

Enabling poisoning in the dmapool API quickly showed that the DMA
controller was fetching descriptors that should not have been in use.
This has caused intermittent controller lockups during testing.

I have been unable to figure out the exact set of conditions which cause
this to happen. However, I believe it is related to the driver using the
hardware registers to track whether the controller is busy or not. The
code can incorrectly decide that the hardware is idle due to lag between
register writes and the hardware actually becoming busy.

To fix this, the driver has been reworked to explicitly track the state
of the hardware, rather than try to guess what it is doing based on the
register values.

This has passed dmatest with 10 threads per channel, 100000 iterations
per thread several times without error. Previously, this would fail
within a few seconds.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 5da1a4a..6e9ad6e 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -68,11 +68,6 @@ static dma_addr_t get_cdar(struct fsldma_chan *chan)
 	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
 }
 
-static dma_addr_t get_ndar(struct fsldma_chan *chan)
-{
-	return DMA_IN(chan, &chan->regs->ndar, 64);
-}
-
 static u32 get_bcr(struct fsldma_chan *chan)
 {
 	return DMA_IN(chan, &chan->regs->bcr, 32);
@@ -143,13 +138,11 @@ static void dma_init(struct fsldma_chan *chan)
 	case FSL_DMA_IP_85XX:
 		/* Set the channel to below modes:
 		 * EIE - Error interrupt enable
-		 * EOSIE - End of segments interrupt enable (basic mode)
 		 * EOLNIE - End of links interrupt enable
 		 * BWC - Bandwidth sharing among channels
 		 */
 		DMA_OUT(chan, &chan->regs->mr, FSL_DMA_MR_BWC
-				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE
-				| FSL_DMA_MR_EOSIE, 32);
+				| FSL_DMA_MR_EIE | FSL_DMA_MR_EOLNIE, 32);
 		break;
 	case FSL_DMA_IP_83XX:
 		/* Set the channel to below modes:
@@ -168,25 +161,32 @@ static int dma_is_idle(struct fsldma_chan *chan)
 	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
 }
 
+/*
+ * Start the DMA controller
+ *
+ * Preconditions:
+ * - the CDAR register must point to the start descriptor
+ * - the MRn[CS] bit must be cleared
+ */
 static void dma_start(struct fsldma_chan *chan)
 {
 	u32 mode;
 
 	mode = DMA_IN(chan, &chan->regs->mr, 32);
 
-	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
-		if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
-			DMA_OUT(chan, &chan->regs->bcr, 0, 32);
-			mode |= FSL_DMA_MR_EMP_EN;
-		} else {
-			mode &= ~FSL_DMA_MR_EMP_EN;
-		}
+	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+		DMA_OUT(chan, &chan->regs->bcr, 0, 32);
+		mode |= FSL_DMA_MR_EMP_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMP_EN;
 	}
 
-	if (chan->feature & FSL_DMA_CHAN_START_EXT)
+	if (chan->feature & FSL_DMA_CHAN_START_EXT) {
 		mode |= FSL_DMA_MR_EMS_EN;
-	else
+	} else {
+		mode &= ~FSL_DMA_MR_EMS_EN;
 		mode |= FSL_DMA_MR_CS;
+	}
 
 	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 }
@@ -760,14 +760,15 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
+		spin_lock_irqsave(&chan->desc_lock, flags);
+
 		/* Halt the DMA engine */
 		dma_halt(chan);
 
-		spin_lock_irqsave(&chan->desc_lock, flags);
-
 		/* Remove and free all of the descriptors in the LD queue */
 		fsldma_free_desc_list(chan, &chan->ld_pending);
 		fsldma_free_desc_list(chan, &chan->ld_running);
+		chan->idle = true;
 
 		spin_unlock_irqrestore(&chan->desc_lock, flags);
 		return 0;
@@ -805,76 +806,43 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }
 
 /**
- * fsl_dma_update_completed_cookie - Update the completed cookie.
+ * fsl_chan_ld_cleanup - Clean up link descriptors
  * @chan : Freescale DMA channel
  *
- * CONTEXT: hardirq
+ * This function is run after the queue of running descriptors has been
+ * executed by the DMA engine. It will run any callbacks, and then free
+ * the descriptors.
+ *
+ * HARDWARE STATE: idle
  */
-static void fsl_dma_update_completed_cookie(struct fsldma_chan *chan)
+static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 {
-	struct fsl_desc_sw *desc;
+	struct fsl_desc_sw *desc, *_desc;
 	unsigned long flags;
-	dma_cookie_t cookie;
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
+	/* if the ld_running list is empty, there is nothing to do */
 	if (list_empty(&chan->ld_running)) {
-		chan_dbg(chan, "no running descriptors\n");
+		chan_dbg(chan, "no descriptors to cleanup\n");
 		goto out_unlock;
 	}
 
-	/* Get the last descriptor, update the cookie to that */
+	/*
+	 * Get the last descriptor, update the cookie to it
+	 *
+	 * This is done before callbacks run so that clients can check the
+	 * status of their DMA transfer inside the callback.
+	 */
 	desc = to_fsl_desc(chan->ld_running.prev);
-	if (dma_is_idle(chan))
-		cookie = desc->async_tx.cookie;
-	else {
-		cookie = desc->async_tx.cookie - 1;
-		if (unlikely(cookie < DMA_MIN_COOKIE))
-			cookie = DMA_MAX_COOKIE;
-	}
-
-	chan->completed_cookie = cookie;
-
-out_unlock:
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-}
-
-/**
- * fsldma_desc_status - Check the status of a descriptor
- * @chan: Freescale DMA channel
- * @desc: DMA SW descriptor
- *
- * This function will return the status of the given descriptor
- */
-static enum dma_status fsldma_desc_status(struct fsldma_chan *chan,
-					  struct fsl_desc_sw *desc)
-{
-	return dma_async_is_complete(desc->async_tx.cookie,
-				     chan->completed_cookie,
-				     chan->common.cookie);
-}
-
-/**
- * fsl_chan_ld_cleanup - Clean up link descriptors
- * @chan : Freescale DMA channel
- *
- * This function clean up the ld_queue of DMA channel.
- */
-static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc, *_desc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	chan->completed_cookie = desc->async_tx.cookie;
+	chan_dbg(chan, "completed_cookie = %d\n", chan->completed_cookie);
 
-	chan_dbg(chan, "chan completed_cookie = %d\n", chan->completed_cookie);
+	/* Run the callback for each descriptor, in order */
 	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
 		dma_async_tx_callback callback;
 		void *callback_param;
 
-		if (fsldma_desc_status(chan, desc) == DMA_IN_PROGRESS)
-			break;
-
 		/* Remove from the list of running transactions */
 		list_del(&desc->node);
 
@@ -898,6 +866,7 @@ static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
 	}
 
+out_unlock:
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
@@ -905,10 +874,7 @@ static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
  * fsl_chan_xfer_ld_queue - transfer any pending transactions
  * @chan : Freescale DMA channel
  *
- * This will make sure that any pending transactions will be run.
- * If the DMA controller is idle, it will be started. Otherwise,
- * the DMA controller's interrupt handler will start any pending
- * transactions when it becomes idle.
+ * HARDWARE STATE: idle
  */
 static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 {
@@ -927,23 +893,16 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	}
 
 	/*
-	 * The DMA controller is not idle, which means the interrupt
-	 * handler will start any queued transactions when it runs
-	 * at the end of the current transaction
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
 	 */
-	if (!dma_is_idle(chan)) {
+	if (!chan->idle) {
 		chan_dbg(chan, "DMA controller still busy\n");
 		goto out_unlock;
 	}
 
 	/*
-	 * TODO:
-	 * make sure the dma_halt() function really un-wedges the
-	 * controller as much as possible
-	 */
-	dma_halt(chan);
-
-	/*
 	 * If there are some link descriptors which have not been
 	 * transferred, we need to start the controller
 	 */
@@ -952,15 +911,32 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 * Move all elements from the queue of pending transactions
 	 * onto the list of running transactions
 	 */
+	chan_dbg(chan, "idle, starting controller\n");
 	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
 	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
 
 	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = DMA_IN(chan, &chan->regs->mr, 32);
+		mode &= ~FSL_DMA_MR_CS;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+	}
+
+	/*
 	 * Program the descriptor's address into the DMA controller,
 	 * then start the DMA transaction
 	 */
 	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
 	dma_start(chan);
+	chan->idle = false;
 
 out_unlock:
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
@@ -985,16 +961,18 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					struct dma_tx_state *txstate)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	dma_cookie_t last_used;
 	dma_cookie_t last_complete;
+	dma_cookie_t last_used;
+	unsigned long flags;
 
-	fsl_chan_ld_cleanup(chan);
+	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	last_used = dchan->cookie;
 	last_complete = chan->completed_cookie;
+	last_used = dchan->cookie;
 
-	dma_set_tx_state(txstate, last_complete, last_used, 0);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
+	dma_set_tx_state(txstate, last_complete, last_used, 0);
 	return dma_async_is_complete(cookie, last_complete, last_used);
 }
 
@@ -1005,8 +983,6 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 static irqreturn_t fsldma_chan_irq(int irq, void *data)
 {
 	struct fsldma_chan *chan = data;
-	int update_cookie = 0;
-	int xfer_ld_q = 0;
 	u32 stat;
 
 	/* save and clear the status register */
@@ -1014,6 +990,7 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	set_sr(chan, stat);
 	chan_dbg(chan, "irq: stat = 0x%x\n", stat);
 
+	/* check that this was really our device */
 	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
 	if (!stat)
 		return IRQ_NONE;
@@ -1028,28 +1005,9 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	 */
 	if (stat & FSL_DMA_SR_PE) {
 		chan_dbg(chan, "irq: Programming Error INT\n");
-		if (get_bcr(chan) == 0) {
-			/* BCR register is 0, this is a DMA_INTERRUPT async_tx.
-			 * Now, update the completed cookie, and continue the
-			 * next uncompleted transfer.
-			 */
-			update_cookie = 1;
-			xfer_ld_q = 1;
-		}
 		stat &= ~FSL_DMA_SR_PE;
-	}
-
-	/*
-	 * If the link descriptor segment transfer finishes,
-	 * we will recycle the used descriptor.
-	 */
-	if (stat & FSL_DMA_SR_EOSI) {
-		chan_dbg(chan, "irq: End-of-segments INT\n");
-		chan_dbg(chan, "irq: clndar 0x%llx, nlndar 0x%llx\n",
-			(unsigned long long)get_cdar(chan),
-			(unsigned long long)get_ndar(chan));
-		stat &= ~FSL_DMA_SR_EOSI;
-		update_cookie = 1;
+		if (get_bcr(chan) != 0)
+			chan_err(chan, "Programming Error!\n");
 	}
 
 	/*
@@ -1059,8 +1017,6 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	if (stat & FSL_DMA_SR_EOCDI) {
 		chan_dbg(chan, "irq: End-of-Chain link INT\n");
 		stat &= ~FSL_DMA_SR_EOCDI;
-		update_cookie = 1;
-		xfer_ld_q = 1;
 	}
 
 	/*
@@ -1071,25 +1027,44 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 	if (stat & FSL_DMA_SR_EOLNI) {
 		chan_dbg(chan, "irq: End-of-link INT\n");
 		stat &= ~FSL_DMA_SR_EOLNI;
-		xfer_ld_q = 1;
 	}
 
-	if (update_cookie)
-		fsl_dma_update_completed_cookie(chan);
-	if (xfer_ld_q)
-		fsl_chan_xfer_ld_queue(chan);
+	/* check that the DMA controller is really idle */
+	if (!dma_is_idle(chan))
+		chan_err(chan, "irq: controller not idle!\n");
+
+	/* check that we handled all of the bits */
 	if (stat)
-		chan_dbg(chan, "irq: unhandled sr 0x%08x\n", stat);
+		chan_err(chan, "irq: unhandled sr 0x%08x\n", stat);
 
-	chan_dbg(chan, "irq: Exit\n");
+	/*
+	 * Schedule the tasklet to handle all cleanup of the current
+	 * transaction. It will start a new transaction if there is
+	 * one pending.
+	 */
 	tasklet_schedule(&chan->tasklet);
+	chan_dbg(chan, "irq: Exit\n");
 	return IRQ_HANDLED;
 }
 
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
+	unsigned long flags;
+
+	chan_dbg(chan, "tasklet entry\n");
+
+	/* run all callbacks, free all used descriptors */
 	fsl_chan_ld_cleanup(chan);
+
+	/* the channel is now idle */
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	chan->idle = true;
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* start any pending transactions automatically */
+	fsl_chan_xfer_ld_queue(chan);
+	chan_dbg(chan, "tasklet exit\n");
 }
 
 static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
@@ -1269,6 +1244,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
 	spin_lock_init(&chan->desc_lock);
 	INIT_LIST_HEAD(&chan->ld_pending);
 	INIT_LIST_HEAD(&chan->ld_running);
+	chan->idle = true;
 
 	chan->common.device = &fdev->common;
 
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 49189da..9cb5aa5 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -148,6 +148,7 @@ struct fsldma_chan {
 	int id;				/* Raw id of this channel */
 	struct tasklet_struct tasklet;
 	u32 feature;
+	bool idle;			/* DMA controller is idle */
 
 	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
 	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
-- 
cgit v0.10.2


From 9c4d1e7bdeb1ed4dc0c3341d40662a6fbc5f2dc2 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:54:59 +0000
Subject: fsldma: support async_tx dependencies and automatic unmapping

Previous to this patch, the dma_run_dependencies() function has been
called while holding desc_lock. This function can call tx_submit() for
other descriptors, which may try to re-grab the lock. Avoid this by
moving the descriptors to be cleaned up to a temporary list, and
dropping the lock before cleanup.

At the same time, add support for automatic unmapping of src and dst
buffers, as offered by the DMAEngine API.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 6e9ad6e..526579d 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -83,6 +83,11 @@ static void set_desc_cnt(struct fsldma_chan *chan,
 	hw->count = CPU_TO_DMA(chan, count, 32);
 }
 
+static u32 get_desc_cnt(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	return DMA_TO_CPU(chan, desc->hw.count, 32);
+}
+
 static void set_desc_src(struct fsldma_chan *chan,
 			 struct fsl_dma_ld_hw *hw, dma_addr_t src)
 {
@@ -93,6 +98,16 @@ static void set_desc_src(struct fsldma_chan *chan,
 	hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
 }
 
+static dma_addr_t get_desc_src(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.src_addr, 64) & ~snoop_bits;
+}
+
 static void set_desc_dst(struct fsldma_chan *chan,
 			 struct fsl_dma_ld_hw *hw, dma_addr_t dst)
 {
@@ -103,6 +118,16 @@ static void set_desc_dst(struct fsldma_chan *chan,
 	hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
 }
 
+static dma_addr_t get_desc_dst(struct fsldma_chan *chan,
+			       struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	return DMA_TO_CPU(chan, desc->hw.dst_addr, 64) & ~snoop_bits;
+}
+
 static void set_desc_next(struct fsldma_chan *chan,
 			  struct fsl_dma_ld_hw *hw, dma_addr_t next)
 {
@@ -806,6 +831,57 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 }
 
 /**
+ * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, and then
+ * free the descriptor.
+ */
+static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
+				      struct fsl_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct device *dev = chan->common.device->dev;
+	dma_addr_t src = get_desc_src(chan, desc);
+	dma_addr_t dst = get_desc_dst(chan, desc);
+	u32 len = get_desc_cnt(chan, desc);
+
+	/* Run the link descriptor callback function */
+	if (txd->callback) {
+#ifdef FSL_DMA_LD_DEBUG
+		chan_dbg(chan, "LD %p callback\n", desc);
+#endif
+		txd->callback(txd->callback_param);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	/* Unmap the dst buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
+		else
+			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
+	}
+
+	/* Unmap the src buffer, if requested */
+	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
+	}
+
+#ifdef FSL_DMA_LD_DEBUG
+	chan_dbg(chan, "LD %p free\n", desc);
+#endif
+	dma_pool_free(chan->desc_pool, desc, txd->phys);
+}
+
+/**
  * fsl_chan_ld_cleanup - Clean up link descriptors
  * @chan : Freescale DMA channel
  *
@@ -818,56 +894,39 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc, *_desc;
+	LIST_HEAD(ld_cleanup);
 	unsigned long flags;
 
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* if the ld_running list is empty, there is nothing to do */
-	if (list_empty(&chan->ld_running)) {
-		chan_dbg(chan, "no descriptors to cleanup\n");
-		goto out_unlock;
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->ld_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_fsl_desc(chan->ld_running.prev);
+		cookie = desc->async_tx.cookie;
+
+		chan->completed_cookie = cookie;
+		chan_dbg(chan, "completed cookie=%d\n", cookie);
 	}
 
 	/*
-	 * Get the last descriptor, update the cookie to it
-	 *
-	 * This is done before callbacks run so that clients can check the
-	 * status of their DMA transfer inside the callback.
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
 	 */
-	desc = to_fsl_desc(chan->ld_running.prev);
-	chan->completed_cookie = desc->async_tx.cookie;
-	chan_dbg(chan, "completed_cookie = %d\n", chan->completed_cookie);
+	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
-		dma_async_tx_callback callback;
-		void *callback_param;
+	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
 
-		/* Remove from the list of running transactions */
+		/* Remove from the list of transactions */
 		list_del(&desc->node);
 
-		/* Run the link descriptor callback function */
-		callback = desc->async_tx.callback;
-		callback_param = desc->async_tx.callback_param;
-		if (callback) {
-			spin_unlock_irqrestore(&chan->desc_lock, flags);
-#ifdef FSL_DMA_LD_DEBUG
-			chan_dbg(chan, "LD %p callback\n", desc);
-#endif
-			callback(callback_param);
-			spin_lock_irqsave(&chan->desc_lock, flags);
-		}
-
-		/* Run any dependencies, then free the descriptor */
-		dma_run_dependencies(&desc->async_tx);
-#ifdef FSL_DMA_LD_DEBUG
-		chan_dbg(chan, "LD %p free\n", desc);
-#endif
-		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+		/* Run all cleanup for this descriptor */
+		fsldma_cleanup_descriptor(chan, desc);
 	}
-
-out_unlock:
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
-- 
cgit v0.10.2


From dc8d4091575ba81e886ebcdfd1e559c981f82f86 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:55:00 +0000
Subject: fsldma: reduce locking during descriptor cleanup

This merges the fsl_chan_ld_cleanup() function into the dma_do_tasklet()
function to reduce locking overhead. In the best case, we will be able
to keep the DMA controller busy while we are freeing used descriptors.
In all cases, the spinlock is grabbed two times fewer than before on
each transaction.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 526579d..d300de4 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -882,65 +882,15 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
 }
 
 /**
- * fsl_chan_ld_cleanup - Clean up link descriptors
- * @chan : Freescale DMA channel
- *
- * This function is run after the queue of running descriptors has been
- * executed by the DMA engine. It will run any callbacks, and then free
- * the descriptors.
- *
- * HARDWARE STATE: idle
- */
-static void fsl_chan_ld_cleanup(struct fsldma_chan *chan)
-{
-	struct fsl_desc_sw *desc, *_desc;
-	LIST_HEAD(ld_cleanup);
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->ld_running)) {
-		dma_cookie_t cookie;
-
-		desc = to_fsl_desc(chan->ld_running.prev);
-		cookie = desc->async_tx.cookie;
-
-		chan->completed_cookie = cookie;
-		chan_dbg(chan, "completed cookie=%d\n", cookie);
-	}
-
-	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
-	 */
-	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
-
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
-
-		/* Remove from the list of transactions */
-		list_del(&desc->node);
-
-		/* Run all cleanup for this descriptor */
-		fsldma_cleanup_descriptor(chan, desc);
-	}
-}
-
-/**
  * fsl_chan_xfer_ld_queue - transfer any pending transactions
  * @chan : Freescale DMA channel
  *
  * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
  */
 static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 {
 	struct fsl_desc_sw *desc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
 
 	/*
 	 * If the list of pending descriptors is empty, then we
@@ -948,7 +898,7 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 */
 	if (list_empty(&chan->ld_pending)) {
 		chan_dbg(chan, "no pending LDs\n");
-		goto out_unlock;
+		return;
 	}
 
 	/*
@@ -958,7 +908,7 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 	 */
 	if (!chan->idle) {
 		chan_dbg(chan, "DMA controller still busy\n");
-		goto out_unlock;
+		return;
 	}
 
 	/*
@@ -996,9 +946,6 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 
 	dma_start(chan);
 	chan->idle = false;
-
-out_unlock:
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
@@ -1008,7 +955,11 @@ out_unlock:
 static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
 	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
 }
 
 /**
@@ -1109,20 +1060,53 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
+	struct fsl_desc_sw *desc, *_desc;
+	LIST_HEAD(ld_cleanup);
 	unsigned long flags;
 
 	chan_dbg(chan, "tasklet entry\n");
 
-	/* run all callbacks, free all used descriptors */
-	fsl_chan_ld_cleanup(chan);
-
-	/* the channel is now idle */
 	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	/* update the cookie if we have some descriptors to cleanup */
+	if (!list_empty(&chan->ld_running)) {
+		dma_cookie_t cookie;
+
+		desc = to_fsl_desc(chan->ld_running.prev);
+		cookie = desc->async_tx.cookie;
+
+		chan->completed_cookie = cookie;
+		chan_dbg(chan, "completed_cookie=%d\n", cookie);
+	}
+
+	/*
+	 * move the descriptors to a temporary list so we can drop the lock
+	 * during the entire cleanup operation
+	 */
+	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+
+	/* the hardware is now idle and ready for more */
 	chan->idle = true;
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
-	/* start any pending transactions automatically */
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
 	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+
+		/* Run all cleanup for this descriptor */
+		fsldma_cleanup_descriptor(chan, desc);
+	}
+
 	chan_dbg(chan, "tasklet exit\n");
 }
 
-- 
cgit v0.10.2


From a00ae34ac8bc8a5897d9b6b9b685c39b955b14b9 Mon Sep 17 00:00:00 2001
From: Ira Snyder <iws@ovro.caltech.edu>
Date: Thu, 3 Mar 2011 07:55:01 +0000
Subject: fsldma: make halt behave nicely on all supported controllers

The original dma_halt() function set the CA (channel abort) bit on both
the 83xx and 85xx controllers. This is incorrect on the 83xx, where this
bit means TEM (transfer error mask) instead. The 83xx doesn't support
channel abort, so we only do this operation on 85xx.

Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index d300de4..8670a50 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -221,13 +221,26 @@ static void dma_halt(struct fsldma_chan *chan)
 	u32 mode;
 	int i;
 
+	/* read the mode register */
 	mode = DMA_IN(chan, &chan->regs->mr, 32);
-	mode |= FSL_DMA_MR_CA;
-	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 
-	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA);
+	/*
+	 * The 85xx controller supports channel abort, which will stop
+	 * the current transfer. On 83xx, this bit is the transfer error
+	 * mask bit, which should not be changed.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		mode |= FSL_DMA_MR_CA;
+		DMA_OUT(chan, &chan->regs->mr, mode, 32);
+
+		mode &= ~FSL_DMA_MR_CA;
+	}
+
+	/* stop the DMA controller */
+	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
 	DMA_OUT(chan, &chan->regs->mr, mode, 32);
 
+	/* wait for the DMA controller to become idle */
 	for (i = 0; i < 100; i++) {
 		if (dma_is_idle(chan))
 			return;
-- 
cgit v0.10.2


From cfbdab951369f15de890597530076bf0119361be Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:10:59 +0800
Subject: vhost-net: check the support of mergeable buffer outside the receive
 loop

No need to check the support of mergeable buffer inside the recevie
loop as the whole handle_rx()_xx is in the read critical region.  So
this patch move it ahead of the receiving loop.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 59dad9f..9f57cd4 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -411,7 +411,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 	size_t total_len = 0;
-	int err, headcount;
+	int err, headcount, mergeable;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	/* TODO: check that we are running from vhost_worker? */
@@ -427,6 +427,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 
 	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
 		vq->log : NULL;
+	mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
 
 	while ((sock_len = peek_head_len(sock->sk))) {
 		sock_len += sock_hlen;
@@ -476,7 +477,7 @@ static void handle_rx_mergeable(struct vhost_net *net)
 			break;
 		}
 		/* TODO: Should check and handle checksum. */
-		if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF) &&
+		if (likely(mergeable) &&
 		    memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount,
 				      offsetof(typeof(hdr), num_buffers),
 				      sizeof hdr.num_buffers)) {
-- 
cgit v0.10.2


From 94249369e9930276e30087da205349a55478cbb5 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:08 +0800
Subject: vhost-net: Unify the code of mergeable and big buffer handling

Codes duplication were found between the handling of mergeable and big
buffers, so this patch tries to unify them. This could be easily done
by adding a quota to the get_rx_bufs() which is used to limit the
number of buffers it returns (for mergeable buffer, the quota is
simply UIO_MAXIOV, for big buffers, the quota is just 1), and then the
previous handle_rx_mergeable() could be resued also for big buffers.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9f57cd4..0329c41 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -229,6 +229,7 @@ static int peek_head_len(struct sock *sk)
  * @iovcount	- returned count of io vectors we fill
  * @log		- vhost log
  * @log_num	- log offset
+ * @quota       - headcount quota, 1 for big buffer
  *	returns number of buffer heads allocated, negative on error
  */
 static int get_rx_bufs(struct vhost_virtqueue *vq,
@@ -236,7 +237,8 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 		       int datalen,
 		       unsigned *iovcount,
 		       struct vhost_log *log,
-		       unsigned *log_num)
+		       unsigned *log_num,
+		       unsigned int quota)
 {
 	unsigned int out, in;
 	int seg = 0;
@@ -244,7 +246,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
 	unsigned d;
 	int r, nlogs = 0;
 
-	while (datalen > 0) {
+	while (datalen > 0 && headcount < quota) {
 		if (unlikely(seg >= UIO_MAXIOV)) {
 			r = -ENOBUFS;
 			goto err;
@@ -284,116 +286,7 @@ err:
 
 /* Expects to be always run from workqueue - which acts as
  * read-size critical section for our kind of RCU. */
-static void handle_rx_big(struct vhost_net *net)
-{
-	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
-	unsigned out, in, log, s;
-	int head;
-	struct vhost_log *vq_log;
-	struct msghdr msg = {
-		.msg_name = NULL,
-		.msg_namelen = 0,
-		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
-		.msg_controllen = 0,
-		.msg_iov = vq->iov,
-		.msg_flags = MSG_DONTWAIT,
-	};
-	struct virtio_net_hdr hdr = {
-		.flags = 0,
-		.gso_type = VIRTIO_NET_HDR_GSO_NONE
-	};
-	size_t len, total_len = 0;
-	int err;
-	size_t hdr_size;
-	/* TODO: check that we are running from vhost_worker? */
-	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
-		return;
-
-	mutex_lock(&vq->mutex);
-	vhost_disable_notify(vq);
-	hdr_size = vq->vhost_hlen;
-
-	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
-		vq->log : NULL;
-
-	for (;;) {
-		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
-					 ARRAY_SIZE(vq->iov),
-					 &out, &in,
-					 vq_log, &log);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* OK, now we need to know about added descriptors. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(vq))) {
-				/* They have slipped one in as we were
-				 * doing that: check again. */
-				vhost_disable_notify(vq);
-				continue;
-			}
-			/* Nothing new?  Wait for eventfd to tell us
-			 * they refilled. */
-			break;
-		}
-		/* We don't need to be notified again. */
-		if (out) {
-			vq_err(vq, "Unexpected descriptor format for RX: "
-			       "out %d, int %d\n",
-			       out, in);
-			break;
-		}
-		/* Skip header. TODO: support TSO/mergeable rx buffers. */
-		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
-		msg.msg_iovlen = in;
-		len = iov_length(vq->iov, in);
-		/* Sanity check */
-		if (!len) {
-			vq_err(vq, "Unexpected header len for RX: "
-			       "%zd expected %zd\n",
-			       iov_length(vq->hdr, s), hdr_size);
-			break;
-		}
-		err = sock->ops->recvmsg(NULL, sock, &msg,
-					 len, MSG_DONTWAIT | MSG_TRUNC);
-		/* TODO: Check specific error and bomb out unless EAGAIN? */
-		if (err < 0) {
-			vhost_discard_vq_desc(vq, 1);
-			break;
-		}
-		/* TODO: Should check and handle checksum. */
-		if (err > len) {
-			pr_debug("Discarded truncated rx packet: "
-				 " len %d > %zd\n", err, len);
-			vhost_discard_vq_desc(vq, 1);
-			continue;
-		}
-		len = err;
-		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
-		if (err) {
-			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
-			       vq->iov->iov_base, err);
-			break;
-		}
-		len += hdr_size;
-		vhost_add_used_and_signal(&net->dev, vq, head, len);
-		if (unlikely(vq_log))
-			vhost_log_write(vq, vq_log, log, len);
-		total_len += len;
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-			vhost_poll_queue(&vq->poll);
-			break;
-		}
-	}
-
-	mutex_unlock(&vq->mutex);
-}
-
-/* Expects to be always run from workqueue - which acts as
- * read-size critical section for our kind of RCU. */
-static void handle_rx_mergeable(struct vhost_net *net)
+static void handle_rx(struct vhost_net *net)
 {
 	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
 	unsigned uninitialized_var(in), log;
@@ -433,7 +326,8 @@ static void handle_rx_mergeable(struct vhost_net *net)
 		sock_len += sock_hlen;
 		vhost_len = sock_len + vhost_hlen;
 		headcount = get_rx_bufs(vq, vq->heads, vhost_len,
-					&in, vq_log, &log);
+					&in, vq_log, &log,
+					likely(mergeable) ? UIO_MAXIOV : 1);
 		/* On error, stop handling until the next kick. */
 		if (unlikely(headcount < 0))
 			break;
@@ -499,14 +393,6 @@ static void handle_rx_mergeable(struct vhost_net *net)
 	mutex_unlock(&vq->mutex);
 }
 
-static void handle_rx(struct vhost_net *net)
-{
-	if (vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF))
-		handle_rx_mergeable(net);
-	else
-		handle_rx_big(net);
-}
-
 static void handle_tx_kick(struct vhost_work *work)
 {
 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
-- 
cgit v0.10.2


From 783e3988544b94ff3918666b9f36866ac547fba1 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 17 Jan 2011 16:11:17 +0800
Subject: vhost: lock receive queue, not the socket

vhost takes a sock lock to try and prevent
the skb from being pulled from the receive queue
after skb_peek.  However this is not the right lock to use for that,
sk_receive_queue.lock is. Fix that up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 0329c41..5720301 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -213,12 +213,13 @@ static int peek_head_len(struct sock *sk)
 {
 	struct sk_buff *head;
 	int len = 0;
+	unsigned long flags;
 
-	lock_sock(sk);
+	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
 	head = skb_peek(&sk->sk_receive_queue);
-	if (head)
+	if (likely(head))
 		len = head->len;
-	release_sock(sk);
+	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
 	return len;
 }
 
-- 
cgit v0.10.2


From de4d768a428d9de943dd6dc82bcd61742955cb6e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 13 Mar 2011 23:00:52 +0200
Subject: vhost-net: remove unlocked use of receive_queue

Use of skb_queue_empty(&sock->sk->sk_receive_queue)
without taking the sk_receive_queue.lock is unsafe
or useless. Take it out.

Reported-by:  Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 5720301..2f7c76a 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -311,7 +311,7 @@ static void handle_rx(struct vhost_net *net)
 	/* TODO: check that we are running from vhost_worker? */
 	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
 
-	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+	if (!sock)
 		return;
 
 	mutex_lock(&vq->mutex);
-- 
cgit v0.10.2


From b09734b1f4abd86e046777f0f268215b4ef1b523 Mon Sep 17 00:00:00 2001
From: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Date: Wed, 2 Feb 2011 11:39:32 -0800
Subject: libceph: Fix base64-decoding when input ends in newline.

It used to return -EINVAL because it thought the end was not aligned
to 4 bytes.

Clean up superfluous src < end test in if, the while itself guarantees
that.

Signed-off-by: Tommi Virtanen <tommi.virtanen@dreamhost.com>
Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index eb2a666..1fc1ee1 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -78,8 +78,10 @@ int ceph_unarmor(char *dst, const char *src, const char *end)
 	while (src < end) {
 		int a, b, c, d;
 
-		if (src < end && src[0] == '\n')
+		if (src[0] == '\n') {
 			src++;
+			continue;
+		}
 		if (src + 4 > end)
 			return -EINVAL;
 		a = decode_bits(src[0]);
-- 
cgit v0.10.2


From 09adc80c611bb8902daa8ccfe34dbbc009d6befe Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Fri, 4 Feb 2011 21:38:47 -0800
Subject: ceph: preserve I_COMPLETE across rename

d_move puts the renamed dentry at the end of d_subdirs, screwing with our
cached dentry directory offsets.  We were just clearing I_COMPLETE to avoid
any possibility of trouble.  However, assigning the renamed dentry an
offset at the end of the directory (to match it's new d_subdirs position)
is sufficient to maintain correct behavior and hold onto I_COMPLETE.

This is especially important for workloads like rsync, which renames files
into place.  Before, we would lose I_COMPLETE and do MDS lookups for each
file.  With this patch we only talk to the MDS on create and rename.

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 193bfa5..6045636 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1030,9 +1030,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			dout("fill_trace doing d_move %p -> %p\n",
 			     req->r_old_dentry, dn);
 
-			/* d_move screws up d_subdirs order */
-			ceph_i_clear(dir, CEPH_I_COMPLETE);
-
 			d_move(req->r_old_dentry, dn);
 			dout(" src %p '%.*s' dst %p '%.*s'\n",
 			     req->r_old_dentry,
@@ -1044,12 +1041,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 			   rehashing bug in vfs_rename_dir */
 			ceph_invalidate_dentry_lease(dn);
 
-			/* take overwritten dentry's readdir offset */
-			dout("dn %p gets %p offset %lld (old offset %lld)\n",
-			     req->r_old_dentry, dn, ceph_dentry(dn)->offset,
+			/*
+			 * d_move() puts the renamed dentry at the end of
+			 * d_subdirs.  We need to assign it an appropriate
+			 * directory offset so we can behave when holding
+			 * I_COMPLETE.
+			 */
+			ceph_set_dentry_offset(req->r_old_dentry);
+			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 
 			     ceph_dentry(req->r_old_dentry)->offset);
-			ceph_dentry(req->r_old_dentry)->offset =
-				ceph_dentry(dn)->offset;
 
 			dn = req->r_old_dentry;  /* use old_dentry */
 			in = dn->d_inode;
-- 
cgit v0.10.2


From b1a56b331aec59be04f25ac99694d855d591c539 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 16 Mar 2011 18:18:53 +0000
Subject: ASoC: Remove bogus check for register validity in debugfs write

Since not all registers need to be cached and the cache is entirely
optional anyway we shouldn't be checking that a register is in the
cached range. If the register is invalid then the actual I/O code
can determine that and report an error.

Similarly, the step size can and should be enforced by the lower level
code if it's important.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 17efacd..4dda589 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -259,8 +259,6 @@ static ssize_t codec_reg_write_file(struct file *file,
 	while (*start == ' ')
 		start++;
 	reg = simple_strtoul(start, &start, 16);
-	if ((reg >= codec->driver->reg_cache_size) || (reg % step))
-		return -EINVAL;
 	while (*start == ' ')
 		start++;
 	if (strict_strtoul(start, 16, &value))
-- 
cgit v0.10.2


From 977a6ef3c0cb622b572fb8e2e5088dbe09521375 Mon Sep 17 00:00:00 2001
From: Xiaochen Wang <wangxiaochen0@gmail.com>
Date: Fri, 18 Mar 2011 16:29:25 +0800
Subject: sound: read i_size with i_size_read()

Convert direct read of inode->i_size to using i_size_read().
i_size_read is guaranteed to return a valid value and
its caller does not need to use addtional locking.

Signed-off-by: Xiaochen Wang <wangxiaochen0@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/sound_firmware.c b/sound/sound_firmware.c
index 340a0bc..7e96249 100644
--- a/sound/sound_firmware.c
+++ b/sound/sound_firmware.c
@@ -19,7 +19,7 @@ static int do_mod_firmware_load(const char *fn, char **fp)
 		printk(KERN_INFO "Unable to load '%s'.\n", fn);
 		return 0;
 	}
-	l = filp->f_path.dentry->d_inode->i_size;
+	l = i_size_read(filp->f_path.dentry->d_inode);
 	if (l <= 0 || l > 131072)
 	{
 		printk(KERN_INFO "Invalid firmware '%s'\n", fn);
-- 
cgit v0.10.2


From 4363c2fddb1399b728ef21ee8101c148a311ea45 Mon Sep 17 00:00:00 2001
From: Alex Dubov <oakad@yahoo.com>
Date: Wed, 16 Mar 2011 17:57:13 +0000
Subject: gianfar: Fall back to software tcp/udp checksum on older controllers

As specified by errata eTSEC49 of MPC8548 and errata eTSEC12 of MPC83xx,
older revisions of gianfar controllers will be unable to calculate a TCP/UDP
packet checksum for some alignments of the appropriate FCB. This patch checks
for FCB alignment on such controllers and falls back to software checksumming
if the alignment is known to be bad.

Signed-off-by: Alex Dubov <oakad@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index ccb231c..2a0ad9a 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -949,6 +949,11 @@ static void gfar_detect_errata(struct gfar_private *priv)
 			(pvr == 0x80861010 && (mod & 0xfff9) == 0x80c0))
 		priv->errata |= GFAR_ERRATA_A002;
 
+	/* MPC8313 Rev < 2.0, MPC8548 rev 2.0 */
+	if ((pvr == 0x80850010 && mod == 0x80b0 && rev < 0x0020) ||
+			(pvr == 0x80210020 && mod == 0x8030 && rev == 0x0020))
+		priv->errata |= GFAR_ERRATA_12;
+
 	if (priv->errata)
 		dev_info(dev, "enabled errata workarounds, flags: 0x%x\n",
 			 priv->errata);
@@ -2154,8 +2159,15 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Set up checksumming */
 	if (CHECKSUM_PARTIAL == skb->ip_summed) {
 		fcb = gfar_add_fcb(skb);
-		lstatus |= BD_LFLAG(TXBD_TOE);
-		gfar_tx_checksum(skb, fcb);
+		/* as specified by errata */
+		if (unlikely(gfar_has_errata(priv, GFAR_ERRATA_12)
+			     && ((unsigned long)fcb % 0x20) > 0x18)) {
+			__skb_pull(skb, GMAC_FCB_LEN);
+			skb_checksum_help(skb);
+		} else {
+			lstatus |= BD_LFLAG(TXBD_TOE);
+			gfar_tx_checksum(skb, fcb);
+		}
 	}
 
 	if (vlan_tx_tag_present(skb)) {
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index 54de413..ec5d595 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -1039,6 +1039,7 @@ enum gfar_errata {
 	GFAR_ERRATA_74		= 0x01,
 	GFAR_ERRATA_76		= 0x02,
 	GFAR_ERRATA_A002	= 0x04,
+	GFAR_ERRATA_12		= 0x08, /* a.k.a errata eTSEC49 */
 };
 
 /* Struct stolen almost completely (and shamelessly) from the FCC enet source
-- 
cgit v0.10.2


From 67c5c6cb8129c595f21e88254a3fc6b3b841ae8e Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Thu, 17 Mar 2011 01:40:10 +0000
Subject: econet: 4 byte infoleak to the network

struct aunhdr has 4 padding bytes between 'pad' and 'handle' fields on
x86_64.  These bytes are not initialized in the variable 'ah' before
sending 'ah' to the network.  This leads to 4 bytes kernel stack
infoleak.

This bug was introduced before the git epoch.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Phil Blundell <philb@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 0c28263..116d3fd 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -435,10 +435,10 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		udpdest.sin_addr.s_addr = htonl(network | addr.station);
 	}
 
+	memset(&ah, 0, sizeof(ah));
 	ah.port = port;
 	ah.cb = cb & 0x7f;
 	ah.code = 2;		/* magic */
-	ah.pad = 0;
 
 	/* tack our header on the front of the iovec */
 	size = sizeof(struct aunhdr);
-- 
cgit v0.10.2


From 5e5069b41d5b82bcadc1dbf73f48476b428c102f Mon Sep 17 00:00:00 2001
From: Roger Luethi <rl@hellgate.ch>
Date: Thu, 17 Mar 2011 06:37:21 +0000
Subject: ethtool: __ethtool_set_sg: check for function pointer before using it

__ethtool_set_sg does not check if dev->ethtool_ops->set_sg is defined
which can result in a NULL pointer dereference when ethtool is used to
change SG settings for drivers without SG support.

Signed-off-by: Roger Luethi <rl@hellgate.ch>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c1a71bb..a1086fb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1457,6 +1457,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
 {
 	int err;
 
+	if (!dev->ethtool_ops->set_sg)
+		return -EOPNOTSUPP;
+
 	if (data && !(dev->features & NETIF_F_ALL_CSUM))
 		return -EINVAL;
 
-- 
cgit v0.10.2


From 3a7da39d165e0c363c294feec119db1427032afd Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Mar 2011 07:34:32 +0000
Subject: ethtool: Compat handling for struct ethtool_rxnfc

This structure was accidentally defined such that its layout can
differ between 32-bit and 64-bit processes.  Add compat structure
definitions and an ioctl wrapper function.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: stable@kernel.org [2.6.30+]
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index aac3e2e..b297f28 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -13,6 +13,9 @@
 #ifndef _LINUX_ETHTOOL_H
 #define _LINUX_ETHTOOL_H
 
+#ifdef __KERNEL__
+#include <linux/compat.h>
+#endif
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
@@ -450,6 +453,37 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+#ifdef __KERNEL__
+#ifdef CONFIG_COMPAT
+
+struct compat_ethtool_rx_flow_spec {
+	u32		flow_type;
+	union {
+		struct ethtool_tcpip4_spec		tcp_ip4_spec;
+		struct ethtool_tcpip4_spec		udp_ip4_spec;
+		struct ethtool_tcpip4_spec		sctp_ip4_spec;
+		struct ethtool_ah_espip4_spec		ah_ip4_spec;
+		struct ethtool_ah_espip4_spec		esp_ip4_spec;
+		struct ethtool_usrip4_spec		usr_ip4_spec;
+		struct ethhdr				ether_spec;
+		u8					hdata[72];
+	} h_u, m_u;
+	compat_u64	ring_cookie;
+	u32		location;
+};
+
+struct compat_ethtool_rxnfc {
+	u32				cmd;
+	u32				flow_type;
+	compat_u64			data;
+	struct compat_ethtool_rx_flow_spec fs;
+	u32				rule_cnt;
+	u32				rule_locs[0];
+};
+
+#endif /* CONFIG_COMPAT */
+#endif /* __KERNEL__ */
+
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
diff --git a/net/socket.c b/net/socket.c
index 937d0fc..5212447 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2588,23 +2588,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 
 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 {
+	struct compat_ethtool_rxnfc __user *compat_rxnfc;
+	bool convert_in = false, convert_out = false;
+	size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
+	struct ethtool_rxnfc __user *rxnfc;
 	struct ifreq __user *ifr;
+	u32 rule_cnt = 0, actual_rule_cnt;
+	u32 ethcmd;
 	u32 data;
-	void __user *datap;
+	int ret;
+
+	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+		return -EFAULT;
 
-	ifr = compat_alloc_user_space(sizeof(*ifr));
+	compat_rxnfc = compat_ptr(data);
 
-	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+	if (get_user(ethcmd, &compat_rxnfc->cmd))
 		return -EFAULT;
 
-	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
+	/* Most ethtool structures are defined without padding.
+	 * Unfortunately struct ethtool_rxnfc is an exception.
+	 */
+	switch (ethcmd) {
+	default:
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		/* Buffer size is variable */
+		if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
+			return -EFAULT;
+		if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
+			return -ENOMEM;
+		buf_size += rule_cnt * sizeof(u32);
+		/* fall through */
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+		convert_out = true;
+		/* fall through */
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		buf_size += sizeof(struct ethtool_rxnfc);
+		convert_in = true;
+		break;
+	}
+
+	ifr = compat_alloc_user_space(buf_size);
+	rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
+
+	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
 		return -EFAULT;
 
-	datap = compat_ptr(data);
-	if (put_user(datap, &ifr->ifr_ifru.ifru_data))
+	if (put_user(convert_in ? rxnfc : compat_ptr(data),
+		     &ifr->ifr_ifru.ifru_data))
 		return -EFAULT;
 
-	return dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (convert_in) {
+		/* We expect there to be holes between fs.m_u and
+		 * fs.ring_cookie and at the end of fs, but nowhere else.
+		 */
+		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_u) +
+			     sizeof(compat_rxnfc->fs.m_u) !=
+			     offsetof(struct ethtool_rxnfc, fs.m_u) +
+			     sizeof(rxnfc->fs.m_u));
+		BUILD_BUG_ON(
+			offsetof(struct compat_ethtool_rxnfc, fs.location) -
+			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+			offsetof(struct ethtool_rxnfc, fs.location) -
+			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+		if (copy_in_user(rxnfc, compat_rxnfc,
+				 (void *)(&rxnfc->fs.m_u + 1) -
+				 (void *)rxnfc) ||
+		    copy_in_user(&rxnfc->fs.ring_cookie,
+				 &compat_rxnfc->fs.ring_cookie,
+				 (void *)(&rxnfc->fs.location + 1) -
+				 (void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+	}
+
+	ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+	if (ret)
+		return ret;
+
+	if (convert_out) {
+		if (copy_in_user(compat_rxnfc, rxnfc,
+				 (const void *)(&rxnfc->fs.m_u + 1) -
+				 (const void *)rxnfc) ||
+		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
+				 &rxnfc->fs.ring_cookie,
+				 (const void *)(&rxnfc->fs.location + 1) -
+				 (const void *)&rxnfc->fs.ring_cookie) ||
+		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
+				 sizeof(rxnfc->rule_cnt)))
+			return -EFAULT;
+
+		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+			/* As an optimisation, we only copy the actual
+			 * number of rules that the underlying
+			 * function returned.  Since Mallory might
+			 * change the rule count in user memory, we
+			 * check that it is less than the rule count
+			 * originally given (as the user buffer size),
+			 * which has been range-checked.
+			 */
+			if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
+				return -EFAULT;
+			if (actual_rule_cnt < rule_cnt)
+				rule_cnt = actual_rule_cnt;
+			if (copy_in_user(&compat_rxnfc->rule_locs[0],
+					 &rxnfc->rule_locs[0],
+					 rule_cnt * sizeof(u32)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
 }
 
 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
-- 
cgit v0.10.2


From d870bfb9d366c5d466c0f5419a4ec95a3f71ea8a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Mar 2011 00:27:27 +0000
Subject: vlan: should take into account needed_headroom

Commit c95b819ad7 (gre: Use needed_headroom)
made gre use needed_headroom instead of hard_header_len

This uncover a bug in vlan code.

We should make sure vlan devices take into account their
real_dev->needed_headroom or we risk a crash in ipgre_header(), because
we dont have enough room to push IP header in skb.

Reported-by: Diddi Oscarsson <diddi@diddi.se>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ae610f0..e34ea9e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -720,6 +720,7 @@ static int vlan_dev_init(struct net_device *dev)
 	dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid;
 #endif
 
+	dev->needed_headroom = real_dev->needed_headroom;
 	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
 		dev->header_ops      = real_dev->header_ops;
 		dev->hard_header_len = real_dev->hard_header_len;
-- 
cgit v0.10.2


From 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Mar 2011 05:27:28 +0000
Subject: bridge: Reset IPCB when entering IP stack on NF_FORWARD

Whenever we enter the IP stack proper from bridge netfilter we
need to ensure that the skb is in a form the IP stack expects
it to be in.

The entry point on NF_FORWARD did not meet the requirements of
the IP stack, therefore leading to potential crashes/panics.

This patch fixes the problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f97af559..008ff6c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -739,6 +739,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
+	if (br_parse_ip_options(skb))
+		return NF_DROP;
+
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
-- 
cgit v0.10.2


From b51bdad63046d1d5a4807630cc8c02845cf67893 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 18 Mar 2011 08:50:37 +0000
Subject: headers: use __aligned_xx types for userspace

Now that we finally have __aligned_xx exported to userspace, convert
the headers that get exported over to the proper type.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/if_ppp.h b/include/linux/if_ppp.h
index fcef103..c9ad383 100644
--- a/include/linux/if_ppp.h
+++ b/include/linux/if_ppp.h
@@ -114,14 +114,14 @@ struct pppol2tp_ioc_stats {
 	__u16		tunnel_id;	/* redundant */
 	__u16		session_id;	/* if zero, get tunnel stats */
 	__u32		using_ipsec:1;	/* valid only for session_id == 0 */
-	aligned_u64	tx_packets;
-	aligned_u64	tx_bytes;
-	aligned_u64	tx_errors;
-	aligned_u64	rx_packets;
-	aligned_u64	rx_bytes;
-	aligned_u64	rx_seq_discards;
-	aligned_u64	rx_oos_packets;
-	aligned_u64	rx_errors;
+	__aligned_u64	tx_packets;
+	__aligned_u64	tx_bytes;
+	__aligned_u64	tx_errors;
+	__aligned_u64	rx_packets;
+	__aligned_u64	rx_bytes;
+	__aligned_u64	rx_seq_discards;
+	__aligned_u64	rx_oos_packets;
+	__aligned_u64	rx_errors;
 };
 
 #define ifr__name       b.ifr_ifrn.ifrn_name
diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index ea9b8d3..90c2c95 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -28,8 +28,8 @@ struct nfulnl_msg_packet_hw {
 };
 
 struct nfulnl_msg_packet_timestamp {
-	aligned_be64	sec;
-	aligned_be64	usec;
+	__aligned_be64	sec;
+	__aligned_be64	usec;
 };
 
 enum nfulnl_attr_type {
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 2455fe5..af94e00 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -25,8 +25,8 @@ struct nfqnl_msg_packet_hw {
 };
 
 struct nfqnl_msg_packet_timestamp {
-	aligned_be64	sec;
-	aligned_be64	usec;
+	__aligned_be64	sec;
+	__aligned_be64	usec;
 };
 
 enum nfqnl_attr_type {
diff --git a/include/linux/netfilter/xt_connbytes.h b/include/linux/netfilter/xt_connbytes.h
index 92fcbb0..f1d6c15 100644
--- a/include/linux/netfilter/xt_connbytes.h
+++ b/include/linux/netfilter/xt_connbytes.h
@@ -17,8 +17,8 @@ enum xt_connbytes_direction {
 
 struct xt_connbytes_info {
 	struct {
-		aligned_u64 from;	/* count to be matched */
-		aligned_u64 to;		/* count to be matched */
+		__aligned_u64 from;	/* count to be matched */
+		__aligned_u64 to;	/* count to be matched */
 	} count;
 	__u8 what;		/* ipt_connbytes_what */
 	__u8 direction;	/* ipt_connbytes_direction */
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h
index ca6e03e..9314723 100644
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -13,7 +13,7 @@ struct xt_quota_priv;
 struct xt_quota_info {
 	__u32 flags;
 	__u32 pad;
-	aligned_u64 quota;
+	__aligned_u64 quota;
 
 	/* Used internally by the kernel */
 	struct xt_quota_priv	*master;
-- 
cgit v0.10.2


From 93d03203d5a165d7a757546245dd1543dfe0ff80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 18 Mar 2011 21:53:03 -0700
Subject: ftmac100: use resource_size()

The calculation is off-by-one.  It should be "end - start + 1".  This
patch fixes it to use resource_size() instead.  Oddly, the code already
uses resource size correctly a couple lines earlier when it calls
request_mem_region() for this memory.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ftmac100.c b/drivers/net/ftmac100.c
index 1d6f4b8..a316619 100644
--- a/drivers/net/ftmac100.c
+++ b/drivers/net/ftmac100.c
@@ -1102,7 +1102,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 		goto err_req_mem;
 	}
 
-	priv->base = ioremap(res->start, res->end - res->start);
+	priv->base = ioremap(res->start, resource_size(res));
 	if (!priv->base) {
 		dev_err(&pdev->dev, "Failed to ioremap ethernet registers\n");
 		err = -EIO;
-- 
cgit v0.10.2


From e5f15b45ddf3afa2bbbb10c7ea34fb32b6de0a0e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 18 Feb 2011 11:30:30 +0000
Subject: x86: Cleanup highmap after brk is concluded

Now cleanup_highmap actually is in two steps: one is early in head64.c
and only clears above _end; a second one is in init_memory_mapping() and
tries to clean from _brk_end to _end.
It should check if those boundaries are PMD_SIZE aligned but currently
does not.
Also init_memory_mapping() is called several times for numa or memory
hotplug, so we really should not handle initial kernel mappings there.

This patch moves cleanup_highmap() down after _brk_end is settled so
we can do everything in one step.
Also we honor max_pfn_mapped in the implementation of cleanup_highmap.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
LKML-Reference: <alpine.DEB.2.00.1103171739050.3382@kaball-desktop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2d2673c..5655c22 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	/* Make NULL pointers segfault */
 	zap_identity_mappings();
 
-	/* Cleanup the over mapped high alias */
-	cleanup_highmap();
-
 	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
 
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b176f2b..4a52a5f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -294,30 +294,11 @@ static void __init init_gbpages(void)
 	else
 		direct_gbpages = 0;
 }
-
-static void __init cleanup_highmap_brk_end(void)
-{
-	pud_t *pud;
-	pmd_t *pmd;
-
-	mmu_cr4_features = read_cr4();
-
-	/*
-	 * _brk_end cannot change anymore, but it and _end may be
-	 * located on different 2M pages. cleanup_highmap(), however,
-	 * can only consider _end when it runs, so destroy any
-	 * mappings beyond _brk_end here.
-	 */
-	pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
-	pmd = pmd_offset(pud, _brk_end - 1);
-	while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
-		pmd_clear(pmd);
-}
 #else
 static inline void init_gbpages(void)
 {
 }
-static inline void cleanup_highmap_brk_end(void)
+static void __init cleanup_highmap(void)
 {
 }
 #endif
@@ -330,8 +311,6 @@ static void __init reserve_brk(void)
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
 	_brk_start = 0;
-
-	cleanup_highmap_brk_end();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -950,6 +929,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	reserve_brk();
 
+	cleanup_highmap();
+
 	memblock.current_limit = get_max_mapped();
 	memblock_x86_fill();
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a08a62c..7026505 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -51,6 +51,7 @@
 #include <asm/numa.h>
 #include <asm/cacheflush.h>
 #include <asm/init.h>
+#include <asm/setup.h>
 
 static int __init parse_direct_gbpages_off(char *arg)
 {
@@ -293,18 +294,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
  * to the compile time generated pmds. This results in invalid pmds up
  * to the point where we hit the physaddr 0 mapping.
  *
- * We limit the mappings to the region from _text to _end.  _end is
- * rounded up to the 2MB boundary. This catches the invalid pmds as
+ * We limit the mappings to the region from _text to _brk_end.  _brk_end
+ * is rounded up to the 2MB boundary. This catches the invalid pmds as
  * well, as they are located before _text:
  */
 void __init cleanup_highmap(void)
 {
 	unsigned long vaddr = __START_KERNEL_map;
-	unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
+	unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+	unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
 	pmd_t *pmd = level2_kernel_pgt;
-	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 
-	for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) {
+	for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
 		if (pmd_none(*pmd))
 			continue;
 		if (vaddr < (unsigned long) _text || vaddr > end)
-- 
cgit v0.10.2


From 14988a4d350ce3b41ecad4f63c4f44c56f5ae34d Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 18 Feb 2011 11:32:40 +0000
Subject: xen: set max_pfn_mapped to the last pfn mapped

Do not set max_pfn_mapped to the end of the initial memory mappings,
that also contain pages that don't belong in pfn space (like the mfn
list).

Set max_pfn_mapped to the last real pfn mapped in the initial memory
mappings that is the pfn backing _end.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
LKML-Reference: <alpine.DEB.2.00.1103171739050.3382@kaball-desktop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a2d78ad..6e27979 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1701,9 +1701,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
 			pte_t pte;
 
-			if (pfn > max_pfn_mapped)
-				max_pfn_mapped = pfn;
-
 			if (!pte_none(pte_page[pteidx]))
 				continue;
 
@@ -1761,6 +1758,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 	pud_t *l3;
 	pmd_t *l2;
 
+	/* max_pfn_mapped is the last pfn mapped in the initial memory
+	 * mappings. Considering that on Xen after the kernel mappings we
+	 * have the mappings of some pages that don't exist in pfn space, we
+	 * set max_pfn_mapped to the last real pfn mapped. */
+	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
+
 	/* Zap identity mapping */
 	init_level4_pgt[0] = __pgd(0);
 
@@ -1865,9 +1868,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
 	initial_kernel_pmd =
 		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 
-	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
-				  xen_start_info->nr_pt_frames * PAGE_SIZE +
-				  512*1024);
+	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
-- 
cgit v0.10.2


From d8aa5ec3382e6a545b8f25178d1e0992d4927f19 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 9 Mar 2011 14:22:05 +0000
Subject: xen: update mask_rw_pte after kernel page tables init changes

After "x86-64, mm: Put early page table high" already existing kernel
page table pages can be mapped using early_ioremap too so we need to
update mask_rw_pte to make sure these pages are still mapped RO.
The reason why we have to do that is explain by the commit message of
fef5ba797991f9335bcfc295942b684f9bf613a1:

"Xen requires that all pages containing pagetable entries to be mapped
read-only.  If pages used for the initial pagetable are already mapped
then we can change the mapping to RO.  However, if they are initially
unmapped, we need to make sure that when they are later mapped, they
are also mapped RO.

..SNIP..

the pagetable setup code early_ioremaps the pages to write their
entries, so we must make sure that mappings created in the early_ioremap
fixmap area are mapped RW.  (Those mappings are removed before the pages
are presented to Xen as pagetable pages.)"

We accomplish all this in mask_rw_pte by mapping RO all the pages mapped
using early_ioremap apart from the last one that has been allocated
because it is not a page table page yet (it has not been hooked into the
page tables yet).

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
LKML-Reference: <alpine.DEB.2.00.1103171739050.3382@kaball-desktop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6e27979..21058ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1488,10 +1488,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
 	/*
 	 * If the new pfn is within the range of the newly allocated
 	 * kernel pagetable, and it isn't being mapped into an
-	 * early_ioremap fixmap slot, make sure it is RO.
+	 * early_ioremap fixmap slot as a freshly allocated page, make sure
+	 * it is RO.
 	 */
-	if (!is_early_ioremap_ptep(ptep) &&
-	    pfn >= pgt_buf_start && pfn < pgt_buf_end)
+	if (((!is_early_ioremap_ptep(ptep) &&
+			pfn >= pgt_buf_start && pfn < pgt_buf_end)) ||
+			(is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1)))
 		pte = pte_wrprotect(pte);
 
 	return pte;
-- 
cgit v0.10.2


From dadaa10b077133e5c03333131b82ecb13679af2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolas=20de=20Peslo=C3=BCan?= <nicolas.2p.debian@free.fr>
Date: Sat, 19 Mar 2011 13:36:18 -0700
Subject: bonding: fix a typo in a comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nicolas de Pesloüan <nicolas.2p.debian@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a6e9eb..338bea1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2130,7 +2130,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 }
 
 /*
-* First release a slave and than destroy the bond if no more slaves are left.
+* First release a slave and then destroy the bond if no more slaves are left.
 * Must be under rtnl_lock when this function is called.
 */
 static int  bond_release_and_destroy(struct net_device *bond_dev,
-- 
cgit v0.10.2


From b26fa4e0275426450238a14158bc1db24bb696e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 19 Mar 2011 05:39:11 +0000
Subject: r8169: fix a bug in rtl8169_init_phy()

commit 54405cde7624 (r8169: support control of advertising.)
introduced a bug in rtl8169_init_phy()

Reported-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Oliver Neukum <oliver@neukum.org>
Cc: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Anca Emanuel <anca.emanuel@gmail.com>
Tested-by: Piotr Hosowicz <piotr@hosowicz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 5e40351..493b0de 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2685,9 +2685,9 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
 	rtl8169_set_speed(dev, AUTONEG_ENABLE, SPEED_1000, DUPLEX_FULL,
 		ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full |
 		ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full |
-		tp->mii.supports_gmii ?
+		(tp->mii.supports_gmii ?
 			ADVERTISED_1000baseT_Half |
-			ADVERTISED_1000baseT_Full : 0);
+			ADVERTISED_1000baseT_Full : 0));
 
 	if (RTL_R8(PHYstatus) & TBI_Enable)
 		netif_info(tp, link, dev, "TBI auto-negotiating\n");
-- 
cgit v0.10.2


From a769f4968396093d5cc1b1a86204cef579784b24 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 19 Mar 2011 23:06:33 -0700
Subject: niu: Rename NIU parent platform device name to fix conflict.

When the OF device driver bits were converted over to the platform
device infrastructure in commit 74888760d40b3ac9054f9c5fa07b566c0676ba2d
("dt/net: Eliminate users of of_platform_{,un}register_driver") we
inadvertantly created probing problems in the OF case.

The NIU driver creates a dummy platform device to represent the
board that contains one or more child NIU devices.  Unfortunately
we use the same name, "niu", as the OF device driver itself uses.

The result is that we try to probe the dummy "niu" parent device we
create, and since it has a NULL ofdevice pointer etc. everything
explodes:

[783019.128243] niu: niu.c:v1.1 (Apr 22, 2010)
[783019.128810] Unable to handle kernel NULL pointer dereference
[783019.128949] tsk->{mm,active_mm}->context = 000000000000039e
[783019.129078] tsk->{mm,active_mm}->pgd = fffff803afc5a000
[783019.129206]               \|/ ____ \|/
[783019.129213]               "@'/ .. \`@"
[783019.129220]               /_| \__/ |_\
[783019.129226]                  \__U_/
[783019.129378] modprobe(2004): Oops [#1]
[783019.129423] TSTATE: 0000000011001602 TPC: 0000000010052ff8 TNPC: 000000000061bbb4 Y: 00000000    Not tainted
[783019.129542] TPC: <niu_of_probe+0x3c/0x2dc [niu]>
[783019.129624] g0: 8080000000000000 g1: 0000000000000000 g2: 0000000010056000 g3: 0000000000000002
[783019.129733] g4: fffff803fc1da0c0 g5: fffff800441e2000 g6: fffff803fba84000 g7: 0000000000000000
[783019.129842] o0: fffff803fe7df010 o1: 0000000010055700 o2: 0000000000000000 o3: fffff803fbacaca0
[783019.129951] o4: 0000000000000080 o5: 0000000000777908 sp: fffff803fba866e1 ret_pc: 0000000010052ff4
[783019.130083] RPC: <niu_of_probe+0x38/0x2dc [niu]>
[783019.130165] l0: fffff803fe7df010 l1: fffff803fbacafc0 l2: fffff803fbacaca0 l3: ffffffffffffffed
[783019.130273] l4: 0000000000000000 l5: 000000007fffffff l6: fffff803fba86f40 l7: 0000000000000001
[783019.130382] i0: fffff803fe7df000 i1: fffff803fc20aba0 i2: 0000000000000000 i3: 0000000000000001
[783019.130490] i4: 0000000000000000 i5: 0000000000000000 i6: fffff803fba867a1 i7: 000000000062038c
[783019.130614] I7: <platform_drv_probe+0xc/0x20>

Fix by simply renaming the parent device to "niu-board".

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 40fa59e..32678b6 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -9501,7 +9501,7 @@ static struct niu_parent * __devinit niu_new_parent(struct niu *np,
 	struct niu_parent *p;
 	int i;
 
-	plat_dev = platform_device_register_simple("niu", niu_parent_index,
+	plat_dev = platform_device_register_simple("niu-board", niu_parent_index,
 						   NULL, 0);
 	if (IS_ERR(plat_dev))
 		return NULL;
-- 
cgit v0.10.2


From 5e0c1eb7e6b61998c7ecd39b7f69a15773d894d4 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:33:26 +0100
Subject: netfilter: ipset: fix address ranges at hash:*port* types

The hash:*port* types with IPv4 silently ignored when address ranges
with non TCP/UDP were added/deleted from the set and used the first
address from the range only.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index 3882a81..5aebd17 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -18,4 +18,14 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
 				__be16 *port);
 
+static inline bool ip_set_proto_with_ports(u8 proto)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		return true;
+	}
+	return false;
+}
+
 #endif /*_IP_SET_GETPORT_H*/
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index adbe787..b921414 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -150,6 +150,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -172,21 +173,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -195,7 +190,6 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -219,13 +213,12 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -361,6 +354,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipport6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -385,21 +379,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -407,9 +395,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 22e23ab..4642872 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -154,6 +154,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip4_elem data = { };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -180,21 +181,15 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -203,7 +198,6 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -227,13 +221,12 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -375,6 +368,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportip6_elem data = { };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -403,21 +397,15 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -425,9 +413,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 6033e8b..2cb84a5 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -174,6 +174,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK };
 	u32 ip, ip_to, p, port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -208,21 +209,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -231,7 +226,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	}
 
 	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
 	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
 	      tb[IPSET_ATTR_PORT_TO])) {
 		ret = adtfn(set, &data, timeout);
@@ -255,13 +249,12 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else
 		ip_to = ip;
 
-	port = ntohs(data.port);
-	if (tb[IPSET_ATTR_PORT_TO]) {
+	port_to = port = ntohs(data.port);
+	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
 		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
 		if (port > port_to)
 			swap(port, port_to);
-	} else
-		port_to = port;
+	}
 
 	for (; !before(ip_to, ip); ip++)
 		for (p = port; p <= port_to; p++) {
@@ -429,6 +422,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
@@ -465,21 +459,15 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -487,9 +475,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 34a1656..8598676 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -170,6 +170,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport4_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -198,21 +199,15 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMP:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMP))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -220,9 +215,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
@@ -390,6 +383,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 	struct hash_netport6_elem data = { .cidr = HOST_MASK };
 	u32 port, port_to;
 	u32 timeout = h->timeout;
+	bool with_ports = false;
 	int ret;
 
 	if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -418,21 +412,15 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_PROTO]) {
 		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
+		with_ports = ip_set_proto_with_ports(data.proto);
 
 		if (data.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
 	} else
 		return -IPSET_ERR_MISSING_PROTO;
 
-	switch (data.proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_ICMPV6:
-		break;
-	default:
+	if (!(with_ports || data.proto == IPPROTO_ICMPV6))
 		data.port = 0;
-		break;
-	}
 
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!with_timeout(h->timeout))
@@ -440,9 +428,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
 
-	if (adt == IPSET_TEST ||
-	    !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) ||
-	    !tb[IPSET_ATTR_PORT_TO]) {
+	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
 		ret = adtfn(set, &data, timeout);
 		return ip_set_eexist(ret, flags) ? 0 : ret;
 	}
-- 
cgit v0.10.2


From 5c1aba467828bf0574ec5754c84884d573f590af Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sun, 20 Mar 2011 15:35:01 +0100
Subject: netfilter: ipset: fix checking the type revision at create command

The revision of the set type was not checked at the create command: if the
userspace sent a valid set type but with not supported revision number,
it'd create a loop.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 618a615..d6b4823 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -94,16 +94,28 @@ static int
 find_set_type_get(const char *name, u8 family, u8 revision,
 		  struct ip_set_type **found)
 {
+	struct ip_set_type *type;
+	int err;
+
 	rcu_read_lock();
 	*found = find_set_type(name, family, revision);
 	if (*found) {
-		int err = !try_module_get((*found)->me);
-		rcu_read_unlock();
-		return err ? -EFAULT : 0;
+		err = !try_module_get((*found)->me) ? -EFAULT : 0;
+		goto unlock;
 	}
+	/* Make sure the type is loaded but we don't support the revision */
+	list_for_each_entry_rcu(type, &ip_set_type_list, list)
+		if (STREQ(type->name, name)) {
+			err = -IPSET_ERR_FIND_TYPE;
+			goto unlock;
+		}
 	rcu_read_unlock();
 
 	return try_to_load_type(name);
+
+unlock:
+	rcu_read_unlock();
+	return err;
 }
 
 /* Find a given set type by name and family.
@@ -116,7 +128,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 	struct ip_set_type *type;
 	bool found = false;
 
-	*min = *max = 0;
+	*min = 255; *max = 0;
 	rcu_read_lock();
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STREQ(type->name, name) &&
@@ -124,7 +136,7 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)
 			found = true;
 			if (type->revision < *min)
 				*min = type->revision;
-			else if (type->revision > *max)
+			if (type->revision > *max)
 				*max = type->revision;
 		}
 	rcu_read_unlock();
-- 
cgit v0.10.2


From db856674ac69e31946e56085239757cca3f7655f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 20 Mar 2011 15:40:06 +0100
Subject: netfilter: xtables: fix reentrancy

commit f3c5c1bfd4308 (make ip_tables reentrant) introduced a race in
handling the stackptr restore, at the end of ipt_do_table()

We should do it before the call to xt_info_rdunlock_bh(), or we allow
cpu preemption and another cpu overwrites stackptr of original one.

A second fix is to change the underflow test to check the origptr value
instead of 0 to detect underflow, or else we allow a jump from different
hooks.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b09ed0d..ffcea0d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -387,7 +387,7 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0) {
+				if (*stackptr <= origptr) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 					pr_debug("Underflow (this is normal) "
@@ -427,10 +427,10 @@ ipt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c9598a9..0b2af9b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -410,7 +410,7 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				if (*stackptr == 0)
+				if (*stackptr <= origptr)
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 				else
@@ -441,8 +441,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!acpar.hotdrop);
 
-	xt_info_rdunlock_bh();
 	*stackptr = origptr;
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
-- 
cgit v0.10.2


From 961ed183a9fd080cf306c659b8736007e44065a5 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Sun, 20 Mar 2011 15:42:52 +0100
Subject: netfilter: ipt_CLUSTERIP: fix buffer overflow

'buffer' string is copied from userspace.  It is not checked whether it is
zero terminated.  This may lead to overflow inside of simple_strtoul().
Changli Gao suggested to copy not more than user supplied 'size' bytes.

It was introduced before the git epoch.  Files "ipt_CLUSTERIP/*" are
root writable only by default, however, on some setups permissions might be
relaxed to e.g. network admin user.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 403ca57..d609ac3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -664,8 +664,11 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
 	char buffer[PROC_WRITELEN+1];
 	unsigned long nodenum;
 
-	if (copy_from_user(buffer, input, PROC_WRITELEN))
+	if (size > PROC_WRITELEN)
+		return -EIO;
+	if (copy_from_user(buffer, input, size))
 		return -EFAULT;
+	buffer[size] = 0;
 
 	if (*buffer == '+') {
 		nodenum = simple_strtoul(buffer+1, NULL, 10);
-- 
cgit v0.10.2


From a24c5a0ea902bcda348f086bd909cc2d6e305bf8 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 15 Mar 2011 12:45:21 -0500
Subject: slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case

The redo label needs #ifdeffery. Fixes the following problem introduced by
commit 8a5ec0ba42c4 ("Lockless (and preemptless) fastpaths for slub"):

  mm/slub.c: In function 'slab_free':
  mm/slub.c:2124: warning: label 'redo' defined but not used

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index 65030c7..f32aee3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2073,9 +2073,11 @@ static __always_inline void slab_free(struct kmem_cache *s,
 
 #ifndef CONFIG_CMPXCHG_LOCAL
 	local_irq_save(flags);
-#endif
 
+#else
 redo:
+#endif
+
 	/*
 	 * Determine the currently cpus per cpu slab.
 	 * The cpu may change afterward. However that does not matter since
-- 
cgit v0.10.2


From bbb1c832ac52f61b926a4635dc1a708f03250e88 Mon Sep 17 00:00:00 2001
From: "Arnaud Patard (Rtp)" <arnaud.patard@rtp-net.org>
Date: Thu, 6 Jan 2011 21:52:46 +0200
Subject: ARM: S3C2410: H1940: Add keys device

Add buttons definition for H1940.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 1e93f17..0756440 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -23,6 +23,8 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c.h>
 #include <video/platform_lcd.h>
@@ -362,7 +364,44 @@ static struct i2c_board_info h1940_i2c_devices[] = {
 	},
 };
 
+#define DECLARE_BUTTON(p, k, n, w)	\
+	{				\
+		.gpio		= p,	\
+		.code		= k,	\
+		.desc		= n,	\
+		.wakeup		= w,	\
+		.active_low	= 1,	\
+	}
+
+static struct gpio_keys_button h1940_buttons[] = {
+	DECLARE_BUTTON(S3C2410_GPF(0),       KEY_POWER,          "Power", 1),
+	DECLARE_BUTTON(S3C2410_GPF(6),       KEY_ENTER,         "Select", 1),
+	DECLARE_BUTTON(S3C2410_GPF(7),      KEY_RECORD,         "Record", 0),
+	DECLARE_BUTTON(S3C2410_GPG(0),         KEY_F11,       "Calendar", 0),
+	DECLARE_BUTTON(S3C2410_GPG(2),         KEY_F12,       "Contacts", 0),
+	DECLARE_BUTTON(S3C2410_GPG(3),        KEY_MAIL,           "Mail", 0),
+	DECLARE_BUTTON(S3C2410_GPG(6),        KEY_LEFT,     "Left_arrow", 0),
+	DECLARE_BUTTON(S3C2410_GPG(7),    KEY_HOMEPAGE,           "Home", 0),
+	DECLARE_BUTTON(S3C2410_GPG(8),       KEY_RIGHT,    "Right_arrow", 0),
+	DECLARE_BUTTON(S3C2410_GPG(9),          KEY_UP,       "Up_arrow", 0),
+	DECLARE_BUTTON(S3C2410_GPG(10),       KEY_DOWN,     "Down_arrow", 0),
+};
+
+static struct gpio_keys_platform_data h1940_buttons_data = {
+	.buttons	= h1940_buttons,
+	.nbuttons	= ARRAY_SIZE(h1940_buttons),
+};
+
+static struct platform_device h1940_dev_buttons = {
+	.name		= "gpio-keys",
+	.id		= -1,
+	.dev		= {
+		.platform_data  = &h1940_buttons_data,
+	}
+};
+
 static struct platform_device *h1940_devices[] __initdata = {
+	&h1940_dev_buttons,
 	&s3c_device_ohci,
 	&s3c_device_lcd,
 	&s3c_device_wdt,
-- 
cgit v0.10.2


From 4119242876d7d6ff539e1aa3d736a87a28fe7b67 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:47 +0200
Subject: ARM: S3C2410: H1940: Make h1940-bluetooth.c compile again

It was broken by removal of rfkill_set_led_trigger_name function,
and now compiler complains about implicit declaration.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 6b86a72..6c59ef5 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -85,7 +85,6 @@ static int __devinit h1940bt_probe(struct platform_device *pdev)
 	s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0);
 	s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE);
 
-
 	rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH,
 			&h1940bt_rfkill_ops, NULL);
 	if (!rfk) {
@@ -93,8 +92,6 @@ static int __devinit h1940bt_probe(struct platform_device *pdev)
 		goto err_rfk_alloc;
 	}
 
-	rfkill_set_led_trigger_name(rfk, "h1940-bluetooth");
-
 	ret = rfkill_register(rfk);
 	if (ret)
 		goto err_rfkill;
-- 
cgit v0.10.2


From 50e2d10da82774e0733ab561c15c84cb92d9ab33 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:48 +0200
Subject: ARM: S3C2410: H1940: Use leds-gpio driver for LEDs managing

We can use generic leds-gpio driver, as latch api was converted
to gpiolib.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/h1940-bluetooth.c b/arch/arm/mach-s3c2410/h1940-bluetooth.c
index 6c59ef5..2c126bb 100644
--- a/arch/arm/mach-s3c2410/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c2410/h1940-bluetooth.c
@@ -18,12 +18,14 @@
 #include <linux/leds.h>
 #include <linux/gpio.h>
 #include <linux/rfkill.h>
+#include <linux/leds.h>
 
 #include <mach/regs-gpio.h>
 #include <mach/hardware.h>
 #include <mach/h1940-latch.h>
+#include <mach/h1940.h>
 
-#define DRV_NAME              "h1940-bt"
+#define DRV_NAME "h1940-bt"
 
 /* Bluetooth control */
 static void h1940bt_enable(int on)
@@ -37,6 +39,8 @@ static void h1940bt_enable(int on)
 		gpio_set_value(S3C2410_GPH(1), 1);
 		mdelay(10);
 		gpio_set_value(S3C2410_GPH(1), 0);
+
+		h1940_led_blink_set(-EINVAL, GPIO_LED_BLINK, NULL, NULL);
 	}
 	else {
 		gpio_set_value(S3C2410_GPH(1), 1);
@@ -44,6 +48,8 @@ static void h1940bt_enable(int on)
 		gpio_set_value(S3C2410_GPH(1), 0);
 		mdelay(10);
 		gpio_set_value(H1940_LATCH_BLUETOOTH_POWER, 0);
+
+		h1940_led_blink_set(-EINVAL, GPIO_LED_NO_BLINK_LOW, NULL, NULL);
 	}
 }
 
diff --git a/arch/arm/mach-s3c2410/include/mach/h1940.h b/arch/arm/mach-s3c2410/include/mach/h1940.h
index 4559784..2aa683c 100644
--- a/arch/arm/mach-s3c2410/include/mach/h1940.h
+++ b/arch/arm/mach-s3c2410/include/mach/h1940.h
@@ -17,5 +17,8 @@
 #define H1940_SUSPEND_CHECK		(0x30080000)
 
 extern void h1940_pm_return(void);
+extern int h1940_led_blink_set(unsigned gpio, int state,
+	unsigned long *delay_on, unsigned long *delay_off);
+
 
 #endif /* __ASM_ARCH_H1940_H */
diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 0756440..36cdb72 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -27,6 +27,8 @@
 #include <linux/gpio_keys.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c.h>
+#include <linux/leds.h>
+
 #include <video/platform_lcd.h>
 
 #include <linux/mmc/host.h>
@@ -216,9 +218,87 @@ static struct s3c2410fb_mach_info h1940_fb_info __initdata = {
 	.gpdup_mask=	0xffffffff,
 };
 
+DEFINE_SPINLOCK(h1940_blink_spin);
+
+int h1940_led_blink_set(unsigned gpio, int state,
+	unsigned long *delay_on, unsigned long *delay_off)
+{
+	int blink_gpio, check_gpio1, check_gpio2;
+
+	switch (gpio) {
+	case H1940_LATCH_LED_GREEN:
+		blink_gpio = S3C2410_GPA(7);
+		check_gpio1 = S3C2410_GPA(1);
+		check_gpio2 = S3C2410_GPA(3);
+		break;
+	case H1940_LATCH_LED_RED:
+		blink_gpio = S3C2410_GPA(1);
+		check_gpio1 = S3C2410_GPA(7);
+		check_gpio2 = S3C2410_GPA(3);
+		break;
+	default:
+		blink_gpio = S3C2410_GPA(3);
+		check_gpio1 = S3C2410_GPA(1);
+		check_gpio1 = S3C2410_GPA(7);
+		break;
+	}
+
+	if (delay_on && delay_off && !*delay_on && !*delay_off)
+		*delay_on = *delay_off = 500;
+
+	spin_lock(&h1940_blink_spin);
+
+	switch (state) {
+	case GPIO_LED_NO_BLINK_LOW:
+	case GPIO_LED_NO_BLINK_HIGH:
+		if (!gpio_get_value(check_gpio1) &&
+		    !gpio_get_value(check_gpio2))
+			gpio_set_value(H1940_LATCH_LED_FLASH, 0);
+		gpio_set_value(blink_gpio, 0);
+		if (gpio_is_valid(gpio))
+			gpio_set_value(gpio, state);
+		break;
+	case GPIO_LED_BLINK:
+		if (gpio_is_valid(gpio))
+			gpio_set_value(gpio, 0);
+		gpio_set_value(H1940_LATCH_LED_FLASH, 1);
+		gpio_set_value(blink_gpio, 1);
+		break;
+	}
+
+	spin_unlock(&h1940_blink_spin);
+
+	return 0;
+}
+EXPORT_SYMBOL(h1940_led_blink_set);
+
+static struct gpio_led h1940_leds_desc[] = {
+	{
+		.name			= "Green",
+		.default_trigger	= "main-battery-charging-or-full",
+		.gpio			= H1940_LATCH_LED_GREEN,
+		.retain_state_suspended	= 1,
+	},
+	{
+		.name			= "Red",
+		.default_trigger	= "main-battery-full",
+		.gpio			= H1940_LATCH_LED_RED,
+		.retain_state_suspended	= 1,
+	},
+};
+
+static struct gpio_led_platform_data h1940_leds_pdata = {
+	.num_leds	= ARRAY_SIZE(h1940_leds_desc),
+	.leds		= h1940_leds_desc,
+	.gpio_blink_set	= h1940_led_blink_set,
+};
+
 static struct platform_device h1940_device_leds = {
-	.name             = "h1940-leds",
-	.id               = -1,
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+			.platform_data = &h1940_leds_pdata,
+	},
 };
 
 static struct platform_device h1940_device_bluetooth = {
@@ -500,6 +580,15 @@ static void __init h1940_init(void)
 
 	platform_add_devices(h1940_devices, ARRAY_SIZE(h1940_devices));
 
+	gpio_request(S3C2410_GPA(1), "Red LED blink");
+	gpio_request(S3C2410_GPA(3), "Blue LED blink");
+	gpio_request(S3C2410_GPA(7), "Green LED blink");
+	gpio_request(H1940_LATCH_LED_FLASH, "LED blink");
+	gpio_direction_output(S3C2410_GPA(1), 0);
+	gpio_direction_output(S3C2410_GPA(3), 0);
+	gpio_direction_output(S3C2410_GPA(7), 0);
+	gpio_direction_output(H1940_LATCH_LED_FLASH, 0);
+
 	i2c_register_board_info(0, h1940_i2c_devices,
 		ARRAY_SIZE(h1940_i2c_devices));
 }
-- 
cgit v0.10.2


From a8e99850d39cac11d03100aa9c40ebaea16e48c5 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:49 +0200
Subject: ARM: S3C2410: H1940: Add battery support

Add battery monitoring/charging support via pda_power and
s3c_adc_battery drivers

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 36cdb72..a0116ef 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -28,6 +28,8 @@
 #include <linux/pwm_backlight.h>
 #include <linux/i2c.h>
 #include <linux/leds.h>
+#include <linux/pda_power.h>
+#include <linux/s3c_adc_battery.h>
 
 #include <video/platform_lcd.h>
 
@@ -218,6 +220,146 @@ static struct s3c2410fb_mach_info h1940_fb_info __initdata = {
 	.gpdup_mask=	0xffffffff,
 };
 
+static int power_supply_init(struct device *dev)
+{
+	return gpio_request(S3C2410_GPF(2), "cable plugged");
+}
+
+static int h1940_is_ac_online(void)
+{
+	return !gpio_get_value(S3C2410_GPF(2));
+}
+
+static void power_supply_exit(struct device *dev)
+{
+	gpio_free(S3C2410_GPF(2));
+}
+
+static char *h1940_supplicants[] = {
+	"main-battery",
+	"backup-battery",
+};
+
+static struct pda_power_pdata power_supply_info = {
+	.init			= power_supply_init,
+	.is_ac_online		= h1940_is_ac_online,
+	.exit			= power_supply_exit,
+	.supplied_to		= h1940_supplicants,
+	.num_supplicants	= ARRAY_SIZE(h1940_supplicants),
+};
+
+static struct resource power_supply_resources[] = {
+	[0] = {
+			.name	= "ac",
+			.flags	= IORESOURCE_IRQ | IORESOURCE_IRQ_LOWEDGE |
+					  IORESOURCE_IRQ_HIGHEDGE,
+			.start	= IRQ_EINT2,
+			.end	= IRQ_EINT2,
+	},
+};
+
+static struct platform_device power_supply = {
+	.name		= "pda-power",
+	.id		= -1,
+	.dev		= {
+				.platform_data =
+					&power_supply_info,
+	},
+	.resource	= power_supply_resources,
+	.num_resources	= ARRAY_SIZE(power_supply_resources),
+};
+
+static const struct s3c_adc_bat_thresh bat_lut_noac[] = {
+	{ .volt = 4070, .cur = 162, .level = 100},
+	{ .volt = 4040, .cur = 165, .level = 95},
+	{ .volt = 4016, .cur = 164, .level = 90},
+	{ .volt = 3996, .cur = 166, .level = 85},
+	{ .volt = 3971, .cur = 168, .level = 80},
+	{ .volt = 3951, .cur = 168, .level = 75},
+	{ .volt = 3931, .cur = 170, .level = 70},
+	{ .volt = 3903, .cur = 172, .level = 65},
+	{ .volt = 3886, .cur = 172, .level = 60},
+	{ .volt = 3858, .cur = 176, .level = 55},
+	{ .volt = 3842, .cur = 176, .level = 50},
+	{ .volt = 3818, .cur = 176, .level = 45},
+	{ .volt = 3789, .cur = 180, .level = 40},
+	{ .volt = 3769, .cur = 180, .level = 35},
+	{ .volt = 3749, .cur = 184, .level = 30},
+	{ .volt = 3732, .cur = 184, .level = 25},
+	{ .volt = 3716, .cur = 184, .level = 20},
+	{ .volt = 3708, .cur = 184, .level = 15},
+	{ .volt = 3716, .cur = 96, .level = 10},
+	{ .volt = 3700, .cur = 96, .level = 5},
+	{ .volt = 3684, .cur = 96, .level = 0},
+};
+
+static const struct s3c_adc_bat_thresh bat_lut_acin[] = {
+	{ .volt = 4130, .cur = 0, .level = 100},
+	{ .volt = 3982, .cur = 0, .level = 50},
+	{ .volt = 3854, .cur = 0, .level = 10},
+	{ .volt = 3841, .cur = 0, .level = 0},
+};
+
+int h1940_bat_init(void)
+{
+	int ret;
+
+	ret = gpio_request(H1940_LATCH_SM803_ENABLE, "h1940-charger-enable");
+	if (ret)
+		return ret;
+	gpio_direction_output(H1940_LATCH_SM803_ENABLE, 0);
+
+	return 0;
+
+}
+
+void h1940_bat_exit(void)
+{
+	gpio_free(H1940_LATCH_SM803_ENABLE);
+}
+
+void h1940_enable_charger(void)
+{
+	gpio_set_value(H1940_LATCH_SM803_ENABLE, 1);
+}
+
+void h1940_disable_charger(void)
+{
+	gpio_set_value(H1940_LATCH_SM803_ENABLE, 0);
+}
+
+static struct s3c_adc_bat_pdata h1940_bat_cfg = {
+	.init = h1940_bat_init,
+	.exit = h1940_bat_exit,
+	.enable_charger = h1940_enable_charger,
+	.disable_charger = h1940_disable_charger,
+	.gpio_charge_finished = S3C2410_GPF(3),
+	.gpio_inverted = 1,
+	.lut_noac = bat_lut_noac,
+	.lut_noac_cnt = ARRAY_SIZE(bat_lut_noac),
+	.lut_acin = bat_lut_acin,
+	.lut_acin_cnt = ARRAY_SIZE(bat_lut_acin),
+	.volt_channel = 0,
+	.current_channel = 1,
+	.volt_mult = 4056,
+	.current_mult = 1893,
+	.internal_impedance = 200,
+	.backup_volt_channel = 3,
+	/* TODO Check backup volt multiplier */
+	.backup_volt_mult = 4056,
+	.backup_volt_min = 0,
+	.backup_volt_max = 4149288
+};
+
+static struct platform_device h1940_battery = {
+	.name             = "s3c-adc-battery",
+	.id               = -1,
+	.dev = {
+		.parent = &s3c_device_adc.dev,
+		.platform_data = &h1940_bat_cfg,
+	},
+};
+
 DEFINE_SPINLOCK(h1940_blink_spin);
 
 int h1940_led_blink_set(unsigned gpio, int state,
@@ -498,6 +640,8 @@ static struct platform_device *h1940_devices[] __initdata = {
 	&h1940_lcd_powerdev,
 	&s3c_device_adc,
 	&s3c_device_ts,
+	&power_supply,
+	&h1940_battery,
 };
 
 static void __init h1940_map_io(void)
-- 
cgit v0.10.2


From 25d391cbe610009d2ea10d820f36621b7610c963 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:50 +0200
Subject: ARM: S3C2410: H1940: Fix lcd_power_set function

Current implementation of lcd_power_set is not reliable, sometimes
it does not enable LCD at all.
Mimic WinCE behavior to fix this issue.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index a0116ef..882b32a 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -30,6 +30,7 @@
 #include <linux/leds.h>
 #include <linux/pda_power.h>
 #include <linux/s3c_adc_battery.h>
+#include <linux/delay.h>
 
 #include <video/platform_lcd.h>
 
@@ -209,15 +210,15 @@ static struct s3c2410fb_mach_info h1940_fb_info __initdata = {
 	.num_displays = 1,
 	.default_display = 0,
 
-	.lpcsel=	0x02,
-	.gpccon=	0xaa940659,
-	.gpccon_mask=	0xffffffff,
-	.gpcup=		0x0000ffff,
-	.gpcup_mask=	0xffffffff,
-	.gpdcon=	0xaa84aaa0,
-	.gpdcon_mask=	0xffffffff,
-	.gpdup=		0x0000faff,
-	.gpdup_mask=	0xffffffff,
+	.lpcsel =	0x02,
+	.gpccon =	0xaa940659,
+	.gpccon_mask =	0xffffc0f0,
+	.gpcup =	0x0000ffff,
+	.gpcup_mask =	0xffffffff,
+	.gpdcon =	0xaa84aaa0,
+	.gpdcon_mask =	0xffffffff,
+	.gpdup =	0x0000faff,
+	.gpdup_mask =	0xffffffff,
 };
 
 static int power_supply_init(struct device *dev)
@@ -526,14 +527,14 @@ static struct platform_device h1940_backlight = {
 static void h1940_lcd_power_set(struct plat_lcd_data *pd,
 					unsigned int power)
 {
-	int value;
+	int value, retries = 100;
 
 	if (!power) {
 		gpio_set_value(S3C2410_GPC(0), 0);
 		/* wait for 3ac */
 		do {
 			value = gpio_get_value(S3C2410_GPC(6));
-		} while (value);
+		} while (value && retries--);
 
 		gpio_set_value(H1940_LATCH_LCD_P2, 0);
 		gpio_set_value(H1940_LATCH_LCD_P3, 0);
@@ -551,6 +552,9 @@ static void h1940_lcd_power_set(struct plat_lcd_data *pd,
 		gpio_set_value(H1940_LATCH_LCD_P0, 1);
 		gpio_set_value(H1940_LATCH_LCD_P1, 1);
 
+		gpio_direction_input(S3C2410_GPC(1));
+		gpio_direction_input(S3C2410_GPC(4));
+		mdelay(10);
 		s3c_gpio_cfgpin(S3C2410_GPC(1), S3C_GPIO_SFN(2));
 		s3c_gpio_cfgpin(S3C2410_GPC(4), S3C_GPIO_SFN(2));
 
-- 
cgit v0.10.2


From fed7fdbef48d513b1a470bc3d76923d156996448 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:51 +0200
Subject: ARM: S3C2442: RX1950: Retain LEDs state in suspend

s3c_adc_battery uses LEDs to indicate charging process,
retain LEDs state in suspend, otherwise user have no information if PDA
battery is charging after he put it to suspend.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2440/mach-rx1950.c b/arch/arm/mach-s3c2440/mach-rx1950.c
index 86bbc23..0f6821b 100644
--- a/arch/arm/mach-s3c2440/mach-rx1950.c
+++ b/arch/arm/mach-s3c2440/mach-rx1950.c
@@ -268,16 +268,19 @@ static struct gpio_led rx1950_leds_desc[] = {
 		.name				= "Green",
 		.default_trigger	= "main-battery-charging-or-full",
 		.gpio				= S3C2410_GPA(6),
+		.retain_state_suspended		= 1,
 	},
 	{
 		.name				= "Red",
 		.default_trigger	= "main-battery-full",
 		.gpio				= S3C2410_GPA(7),
+		.retain_state_suspended		= 1,
 	},
 	{
 		.name				= "Blue",
 		.default_trigger	= "rx1950-acx-mem",
 		.gpio				= S3C2410_GPA(11),
+		.retain_state_suspended		= 1,
 	},
 };
 
-- 
cgit v0.10.2


From 62235dcd2ad4f46ea9e5d4f71c5ea3111341c6e8 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 6 Jan 2011 21:52:52 +0200
Subject: ARM: S3C2442: RX1950: Add support for LED blinking

Add .gpio_set_blink callback to support HW blinking
available on RX1950

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2440/mach-rx1950.c b/arch/arm/mach-s3c2440/mach-rx1950.c
index 0f6821b..22f18d9 100644
--- a/arch/arm/mach-s3c2440/mach-rx1950.c
+++ b/arch/arm/mach-s3c2440/mach-rx1950.c
@@ -263,6 +263,52 @@ void rx1950_disable_charger(void)
 	gpio_direction_output(S3C2410_GPJ(3), 0);
 }
 
+DEFINE_SPINLOCK(rx1950_blink_spin);
+
+static int rx1950_led_blink_set(unsigned gpio, int state,
+	unsigned long *delay_on, unsigned long *delay_off)
+{
+	int blink_gpio, check_gpio;
+
+	switch (gpio) {
+	case S3C2410_GPA(6):
+		blink_gpio = S3C2410_GPA(4);
+		check_gpio = S3C2410_GPA(3);
+		break;
+	case S3C2410_GPA(7):
+		blink_gpio = S3C2410_GPA(3);
+		check_gpio = S3C2410_GPA(4);
+		break;
+	default:
+		return -EINVAL;
+		break;
+	}
+
+	if (delay_on && delay_off && !*delay_on && !*delay_off)
+		*delay_on = *delay_off = 500;
+
+	spin_lock(&rx1950_blink_spin);
+
+	switch (state) {
+	case GPIO_LED_NO_BLINK_LOW:
+	case GPIO_LED_NO_BLINK_HIGH:
+		if (!gpio_get_value(check_gpio))
+			gpio_set_value(S3C2410_GPJ(6), 0);
+		gpio_set_value(blink_gpio, 0);
+		gpio_set_value(gpio, state);
+		break;
+	case GPIO_LED_BLINK:
+		gpio_set_value(gpio, 0);
+		gpio_set_value(S3C2410_GPJ(6), 1);
+		gpio_set_value(blink_gpio, 1);
+		break;
+	}
+
+	spin_unlock(&rx1950_blink_spin);
+
+	return 0;
+}
+
 static struct gpio_led rx1950_leds_desc[] = {
 	{
 		.name				= "Green",
@@ -287,6 +333,7 @@ static struct gpio_led rx1950_leds_desc[] = {
 static struct gpio_led_platform_data rx1950_leds_pdata = {
 	.num_leds	= ARRAY_SIZE(rx1950_leds_desc),
 	.leds		= rx1950_leds_desc,
+	.gpio_blink_set	= rx1950_led_blink_set,
 };
 
 static struct platform_device rx1950_leds = {
@@ -755,6 +802,13 @@ static void __init rx1950_init_machine(void)
 
 	WARN_ON(gpio_request(S3C2410_GPB(1), "LCD power"));
 
+	WARN_ON(gpio_request(S3C2410_GPA(3), "Red blink"));
+	WARN_ON(gpio_request(S3C2410_GPA(4), "Green blink"));
+	WARN_ON(gpio_request(S3C2410_GPJ(6), "LED blink"));
+	gpio_direction_output(S3C2410_GPA(3), 0);
+	gpio_direction_output(S3C2410_GPA(4), 0);
+	gpio_direction_output(S3C2410_GPJ(6), 0);
+
 	platform_add_devices(rx1950_devices, ARRAY_SIZE(rx1950_devices));
 
 	i2c_register_board_info(0, rx1950_i2c_devices,
-- 
cgit v0.10.2


From 6b7d815c3f37f22c355b00362bc7c7d0bac9baea Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 25 Feb 2011 18:54:51 +0100
Subject: i2c-mxs: set controller to pio queue mode after reset

mxs_reset_block() clears the PIO_QUEUE_MODE bit. So we have
to set it again after a controller reset.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index 8022e23..caf96dc 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -118,6 +118,8 @@ static void mxs_i2c_reset(struct mxs_i2c_dev *i2c)
 {
 	mxs_reset_block(i2c->regs);
 	writel(MXS_I2C_IRQ_MASK << 8, i2c->regs + MXS_I2C_CTRL1_SET);
+	writel(MXS_I2C_QUEUECTRL_PIO_QUEUE_MODE,
+			i2c->regs + MXS_I2C_QUEUECTRL_SET);
 }
 
 static void mxs_i2c_pioq_setup_read(struct mxs_i2c_dev *i2c, u8 addr, int len,
@@ -347,8 +349,6 @@ static int __devinit mxs_i2c_probe(struct platform_device *pdev)
 
 	/* Do reset to enforce correct startup after pinmuxing */
 	mxs_i2c_reset(i2c);
-	writel(MXS_I2C_QUEUECTRL_PIO_QUEUE_MODE,
-			i2c->regs + MXS_I2C_QUEUECTRL_SET);
 
 	adap = &i2c->adapter;
 	strlcpy(adap->name, "MXS I2C adapter", sizeof(adap->name));
-- 
cgit v0.10.2


From d6668c7cd4defdab1a74c8dd271b5ca23d99b627 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Feb 2011 12:38:15 +0100
Subject: i2c-pxa2xx: use dynamic register layout

This will prepare the driver to handle register layouts where certain
registers are not available at all.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index f4c19a9..a011455 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -38,29 +38,49 @@
 #include <asm/irq.h>
 #include <plat/i2c.h>
 
+struct pxa_reg_layout {
+	u32 ibmr;
+	u32 idbr;
+	u32 icr;
+	u32 isr;
+	u32 isar;
+};
+
+enum pxa_i2c_types {
+	REGS_PXA2XX,
+	REGS_PXA3XX,
+};
+
 /*
- * I2C register offsets will be shifted 0 or 1 bit left, depending on
- * different SoCs
+ * I2C registers definitions
  */
-#define REG_SHIFT_0	(0 << 0)
-#define REG_SHIFT_1	(1 << 0)
-#define REG_SHIFT(d)	((d) & 0x1)
+static struct pxa_reg_layout pxa_reg_layout[] = {
+	[REGS_PXA2XX] = {
+		.ibmr =	0x00,
+		.idbr =	0x10,
+		.icr =	0x20,
+		.isr =	0x30,
+		.isar =	0x40,
+	},
+	[REGS_PXA3XX] = {
+		.ibmr =	0x00,
+		.idbr =	0x08,
+		.icr =	0x10,
+		.isr =	0x18,
+		.isar =	0x20,
+	},
+};
 
 static const struct platform_device_id i2c_pxa_id_table[] = {
-	{ "pxa2xx-i2c",		REG_SHIFT_1 },
-	{ "pxa3xx-pwri2c",	REG_SHIFT_0 },
+	{ "pxa2xx-i2c",		REGS_PXA2XX },
+	{ "pxa3xx-pwri2c",	REGS_PXA3XX },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, i2c_pxa_id_table);
 
 /*
- * I2C registers and bit definitions
+ * I2C bit definitions
  */
-#define IBMR		(0x00)
-#define IDBR		(0x08)
-#define ICR		(0x10)
-#define ISR		(0x18)
-#define ISAR		(0x20)
 
 #define ICR_START	(1 << 0)	   /* start bit */
 #define ICR_STOP	(1 << 1)	   /* stop bit */
@@ -111,7 +131,11 @@ struct pxa_i2c {
 	u32			icrlog[32];
 
 	void __iomem		*reg_base;
-	unsigned int		reg_shift;
+	void __iomem		*reg_ibmr;
+	void __iomem		*reg_idbr;
+	void __iomem		*reg_icr;
+	void __iomem		*reg_isr;
+	void __iomem		*reg_isar;
 
 	unsigned long		iobase;
 	unsigned long		iosize;
@@ -121,11 +145,11 @@ struct pxa_i2c {
 	unsigned int		fast_mode :1;
 };
 
-#define _IBMR(i2c)	((i2c)->reg_base + (0x0 << (i2c)->reg_shift))
-#define _IDBR(i2c)	((i2c)->reg_base + (0x4 << (i2c)->reg_shift))
-#define _ICR(i2c)	((i2c)->reg_base + (0x8 << (i2c)->reg_shift))
-#define _ISR(i2c)	((i2c)->reg_base + (0xc << (i2c)->reg_shift))
-#define _ISAR(i2c)	((i2c)->reg_base + (0x10 << (i2c)->reg_shift))
+#define _IBMR(i2c)	((i2c)->reg_ibmr)
+#define _IDBR(i2c)	((i2c)->reg_idbr)
+#define _ICR(i2c)	((i2c)->reg_icr)
+#define _ISR(i2c)	((i2c)->reg_isr)
+#define _ISAR(i2c)	((i2c)->reg_isar)
 
 /*
  * I2C Slave mode address
@@ -1001,6 +1025,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	struct resource *res;
 	struct i2c_pxa_platform_data *plat = dev->dev.platform_data;
 	const struct platform_device_id *id = platform_get_device_id(dev);
+	enum pxa_i2c_types i2c_type = id->driver_data;
 	int ret;
 	int irq;
 
@@ -1044,7 +1069,12 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		ret = -EIO;
 		goto eremap;
 	}
-	i2c->reg_shift = REG_SHIFT(id->driver_data);
+
+	i2c->reg_ibmr = i2c->reg_base + pxa_reg_layout[i2c_type].ibmr;
+	i2c->reg_idbr = i2c->reg_base + pxa_reg_layout[i2c_type].idbr;
+	i2c->reg_icr = i2c->reg_base + pxa_reg_layout[i2c_type].icr;
+	i2c->reg_isr = i2c->reg_base + pxa_reg_layout[i2c_type].isr;
+	i2c->reg_isar = i2c->reg_base + pxa_reg_layout[i2c_type].isar;
 
 	i2c->iobase = res->start;
 	i2c->iosize = resource_size(res);
-- 
cgit v0.10.2


From b459396ee9398bdf61e3118ca730394f58e90c9c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Feb 2011 12:38:16 +0100
Subject: ARM: pxa2xx: reorganize I2C files

This patch moves the platform data definition from
arch/arm/plat-pxa/include/plat/i2c.h to include/linux/i2c/pxa-i2c.h so
it can be accessed from x86 the same way as on ARM.

This change should make no functional change to the PXA code. The move
is verified by building the following defconfigs:
  cm_x2xx_defconfig corgi_defconfig em_x270_defconfig ezx_defconfig
  imote2_defconfig pxa3xx_defconfig spitz_defconfig zeus_defconfig
  raumfeld_defconfig magician_defconfig mmp2_defconfig pxa168_defconfig
  pxa910_defconfig

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-mmp/include/mach/mmp2.h b/arch/arm/mach-mmp/include/mach/mmp2.h
index 4aec493..2cbf6df 100644
--- a/arch/arm/mach-mmp/include/mach/mmp2.h
+++ b/arch/arm/mach-mmp/include/mach/mmp2.h
@@ -11,8 +11,8 @@ extern void __init mmp2_init_irq(void);
 extern void mmp2_clear_pmic_int(void);
 
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <mach/devices.h>
-#include <plat/i2c.h>
 
 extern struct pxa_device_desc mmp2_device_uart1;
 extern struct pxa_device_desc mmp2_device_uart2;
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index 1801e42..a52b3d2 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -8,8 +8,8 @@ extern void __init pxa168_init_irq(void);
 extern void pxa168_clear_keypad_wakeup(void);
 
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <mach/devices.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 #include <video/pxa168fb.h>
 #include <plat/pxa27x_keypad.h>
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index f13c49d..91be755 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -7,8 +7,8 @@ extern struct sys_timer pxa910_timer;
 extern void __init pxa910_init_irq(void);
 
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <mach/devices.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 
 extern struct pxa_device_desc pxa910_device_uart1;
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index e194d92..d2af733 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -27,6 +27,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/types.h>
 #include <linux/i2c/pcf857x.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/regulator/max1586.h>
@@ -51,8 +52,6 @@
 #include <mach/irda.h>
 #include <mach/ohci.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c
index 7984268..bfca7ed 100644
--- a/arch/arm/mach-pxa/cm-x300.c
+++ b/arch/arm/mach-pxa/cm-x300.c
@@ -29,6 +29,7 @@
 
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <linux/mfd/da903x.h>
 #include <linux/regulator/machine.h>
@@ -48,7 +49,6 @@
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 #include <mach/audio.h>
 #include <mach/pxa3xx-u2d.h>
diff --git a/arch/arm/mach-pxa/colibri-evalboard.c b/arch/arm/mach-pxa/colibri-evalboard.c
index 28f667e..81c3c43 100644
--- a/arch/arm/mach-pxa/colibri-evalboard.c
+++ b/arch/arm/mach-pxa/colibri-evalboard.c
@@ -20,6 +20,7 @@
 #include <mach/hardware.h>
 #include <asm/mach/arch.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <mach/pxa27x.h>
 #include <mach/colibri.h>
@@ -27,8 +28,6 @@
 #include <mach/ohci.h>
 #include <mach/pxa27x-udc.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/colibri-pxa270-income.c b/arch/arm/mach-pxa/colibri-pxa270-income.c
index 07b62a0..ee79739 100644
--- a/arch/arm/mach-pxa/colibri-pxa270-income.c
+++ b/arch/arm/mach-pxa/colibri-pxa270-income.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/pwm_backlight.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/sysdev.h>
 
 #include <asm/irq.h>
@@ -33,8 +34,6 @@
 #include <mach/pxa27x-udc.h>
 #include <mach/pxafb.h>
 
-#include <plat/i2c.h>
-
 #include "devices.h"
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index a5452a3..d4e705c 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -24,6 +24,7 @@
 #include <linux/gpio.h>
 #include <linux/backlight.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/io.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
@@ -45,7 +46,6 @@
 #include <asm/mach/irq.h>
 
 #include <mach/pxa25x.h>
-#include <plat/i2c.h>
 #include <mach/irda.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
diff --git a/arch/arm/mach-pxa/csb726.c b/arch/arm/mach-pxa/csb726.c
index a305424..0481c29 100644
--- a/arch/arm/mach-pxa/csb726.c
+++ b/arch/arm/mach-pxa/csb726.c
@@ -17,12 +17,12 @@
 #include <linux/mtd/partitions.h>
 #include <linux/sm501.h>
 #include <linux/smsc911x.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
 #include <mach/csb726.h>
 #include <mach/mfp-pxa27x.h>
-#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/ohci.h>
 #include <mach/pxa2xx-regs.h>
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index 4c766e3..c4bf08b 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -4,6 +4,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/spi/pxa2xx_spi.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/pmu.h>
 #include <mach/udc.h>
@@ -16,7 +17,6 @@
 #include <mach/camera.h>
 #include <mach/audio.h>
 #include <mach/hardware.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index a78bb30..b411d7c 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -31,6 +31,7 @@
 #include <linux/apm-emulation.h>
 #include <linux/i2c.h>
 #include <linux/i2c/pca953x.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/regulator/userspace-consumer.h>
 
 #include <media/soc_camera.h>
@@ -45,7 +46,6 @@
 #include <mach/ohci.h>
 #include <mach/mmc.h>
 #include <plat/pxa27x_keypad.h>
-#include <plat/i2c.h>
 #include <mach/camera.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 87cec0a..93f05e0 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -20,6 +20,7 @@
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/leds-lp3944.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <media/soc_camera.h>
 
@@ -30,7 +31,6 @@
 #include <mach/pxa27x.h>
 #include <mach/pxafb.h>
 #include <mach/ohci.h>
-#include <plat/i2c.h>
 #include <mach/hardware.h>
 #include <plat/pxa27x_keypad.h>
 #include <mach/camera.h>
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index a908e0a..6de0ad0 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -35,6 +35,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/usb/gpio_vbus.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -42,7 +43,6 @@
 
 #include <mach/pxa27x.h>
 #include <mach/hx4700.h>
-#include <plat/i2c.h>
 #include <mach/irda.h>
 
 #include <video/platform_lcd.h>
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index ccb7bfa..87c1ed9 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -28,6 +28,7 @@
 #include <linux/leds.h>
 #include <linux/mfd/da903x.h>
 #include <linux/i2c/max732x.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
@@ -45,7 +46,6 @@
 #include <mach/mmc.h>
 #include <plat/pxa27x_keypad.h>
 #include <mach/littleton.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 41198f0..5535991 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -28,6 +28,7 @@
 #include <linux/regulator/bq24022.h>
 #include <linux/regulator/machine.h>
 #include <linux/usb/gpio_vbus.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -36,7 +37,6 @@
 #include <mach/pxa27x.h>
 #include <mach/magician.h>
 #include <mach/pxafb.h>
-#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/irda.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d4b6f23..f954222 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -27,6 +27,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/pwm_backlight.h>
 #include <linux/smc91x.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
@@ -46,7 +47,6 @@
 #include <mach/mainstone.h>
 #include <mach/audio.h>
 #include <mach/pxafb.h>
-#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/irda.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/mioa701.c b/arch/arm/mach-pxa/mioa701.c
index faafea3..78d98a8 100644
--- a/arch/arm/mach-pxa/mioa701.c
+++ b/arch/arm/mach-pxa/mioa701.c
@@ -39,6 +39,7 @@
 #include <linux/usb/gpio_vbus.h>
 #include <linux/regulator/max1586.h>
 #include <linux/slab.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -50,7 +51,6 @@
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/pxa27x-udc.h>
-#include <plat/i2c.h>
 #include <mach/camera.h>
 #include <mach/audio.h>
 #include <media/soc_camera.h>
diff --git a/arch/arm/mach-pxa/mxm8x10.c b/arch/arm/mach-pxa/mxm8x10.c
index cdf7f41..b5a8fd3 100644
--- a/arch/arm/mach-pxa/mxm8x10.c
+++ b/arch/arm/mach-pxa/mxm8x10.c
@@ -22,8 +22,8 @@
 #include <linux/serial_8250.h>
 #include <linux/dm9000.h>
 #include <linux/gpio.h>
+#include <linux/i2c/pxa-i2c.h>
 
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 
 #include <mach/pxafb.h>
diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c
index 35572c4..72adb3ae 100644
--- a/arch/arm/mach-pxa/palm27x.c
+++ b/arch/arm/mach-pxa/palm27x.c
@@ -22,6 +22,7 @@
 #include <linux/power_supply.h>
 #include <linux/usb/gpio_vbus.h>
 #include <linux/regulator/max1586.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -36,8 +37,6 @@
 #include <mach/palmasoc.h>
 #include <mach/palm27x.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index 90820fa..9dbf3cc 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -23,12 +23,12 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/pwm_backlight.h>
 
 #include <media/soc_camera.h>
 
 #include <asm/gpio.h>
-#include <plat/i2c.h>
 #include <mach/camera.h>
 #include <asm/mach/map.h>
 #include <mach/pxa27x.h>
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 4f0ff1a..35353af 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -23,6 +23,7 @@
 #include <linux/mtd/physmap.h>
 #include <linux/gpio.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/spi/pxa2xx_spi.h>
@@ -44,7 +45,6 @@
 #include <mach/irda.h>
 #include <mach/poodle.h>
 #include <mach/pxafb.h>
-#include <plat/i2c.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/hardware/locomo.h>
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 28b11be..1cb5d0f 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -19,6 +19,7 @@
 #include <linux/sysdev.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
 #include <mach/hardware.h>
@@ -32,8 +33,6 @@
 #include <mach/dma.h>
 #include <mach/smemc.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 #include "clock.h"
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 1230343..f374247 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -21,6 +21,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/sysdev.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach/map.h>
 #include <mach/hardware.h>
@@ -32,7 +33,6 @@
 #include <mach/dma.h>
 #include <mach/regs-intc.h>
 #include <mach/smemc.h>
-#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/pxa95x.c b/arch/arm/mach-pxa/pxa95x.c
index 437980f..23b229b 100644
--- a/arch/arm/mach-pxa/pxa95x.c
+++ b/arch/arm/mach-pxa/pxa95x.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/platform_device.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/sysdev.h>
@@ -27,7 +28,6 @@
 #include <mach/pm.h>
 #include <mach/dma.h>
 #include <mach/regs-intc.h>
-#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 8361151..4709418 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -32,6 +32,7 @@
 #include <linux/sched.h>
 #include <linux/pwm_backlight.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_gpio.h>
 #include <linux/lis3lv02d.h>
@@ -53,7 +54,6 @@
 #include <mach/ohci.h>
 #include <mach/pxafb.h>
 #include <mach/mmc.h>
-#include <plat/i2c.h>
 #include <plat/pxa3xx_nand.h>
 
 #include "generic.h"
diff --git a/arch/arm/mach-pxa/saar.c b/arch/arm/mach-pxa/saar.c
index c1ca8cb..eb83c89 100644
--- a/arch/arm/mach-pxa/saar.c
+++ b/arch/arm/mach-pxa/saar.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/smc91x.h>
 #include <linux/mfd/da903x.h>
 #include <linux/mtd/mtd.h>
@@ -31,7 +32,6 @@
 #include <asm/mach/flash.h>
 
 #include <mach/pxa930.h>
-#include <plat/i2c.h>
 #include <mach/pxafb.h>
 
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/saarb.c b/arch/arm/mach-pxa/saarb.c
index e497922..9322fe5 100644
--- a/arch/arm/mach-pxa/saarb.c
+++ b/arch/arm/mach-pxa/saarb.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/mfd/88pm860x.h>
 
 #include <asm/mach-types.h>
@@ -24,8 +25,6 @@
 #include <mach/mfp-pxa930.h>
 #include <mach/gpio.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 
 #define SAARB_NR_IRQS	(IRQ_BOARD_START + 40)
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index b49a2c2..38e2c09 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -19,6 +19,7 @@
 #include <linux/gpio.h>
 #include <linux/leds.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/i2c/pca953x.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
@@ -47,8 +48,6 @@
 #include <mach/sharpsl_pm.h>
 #include <mach/smemc.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 9a14fdb..cb5611d 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -25,6 +25,7 @@
 #include <linux/mtd/plat-ram.h>
 #include <linux/mtd/partitions.h>
 
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/i2c/pcf857x.h>
 #include <linux/i2c/at24.h>
 #include <linux/smc91x.h>
@@ -43,7 +44,6 @@
 #include <asm/mach/flash.h>
 
 #include <mach/pxa27x.h>
-#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/pxa27x-udc.h>
diff --git a/arch/arm/mach-pxa/tavorevb3.c b/arch/arm/mach-pxa/tavorevb3.c
index 70191a9..79f4422 100644
--- a/arch/arm/mach-pxa/tavorevb3.c
+++ b/arch/arm/mach-pxa/tavorevb3.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/gpio.h>
 #include <linux/mfd/88pm860x.h>
 
@@ -23,8 +24,6 @@
 
 #include <mach/pxa930.h>
 
-#include <plat/i2c.h>
-
 #include "devices.h"
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index f2582ec..5ad3807 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -34,6 +34,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/input/matrix_keypad.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -41,7 +42,6 @@
 #include <mach/pxa25x.h>
 #include <mach/reset.h>
 #include <mach/irda.h>
-#include <plat/i2c.h>
 #include <mach/mmc.h>
 #include <mach/udc.h>
 #include <mach/tosa_bt.h>
diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c
index 423261d..857bb2e 100644
--- a/arch/arm/mach-pxa/trizeps4.c
+++ b/arch/arm/mach-pxa/trizeps4.c
@@ -26,6 +26,7 @@
 #include <linux/dm9000.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/partitions.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/types.h>
 #include <asm/setup.h>
@@ -47,7 +48,6 @@
 #include <mach/irda.h>
 #include <mach/ohci.h>
 #include <mach/smemc.h>
-#include <plat/i2c.h>
 
 #include "generic.h"
 #include "devices.h"
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 49eeeab..1227921 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -36,6 +36,7 @@
 #include <linux/gpio.h>
 #include <linux/jiffies.h>
 #include <linux/i2c-gpio.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/serial_8250.h>
 #include <linux/smc91x.h>
 #include <linux/pwm_backlight.h>
@@ -47,7 +48,6 @@
 #include <mach/pxa25x.h>
 #include <mach/audio.h>
 #include <mach/pxafb.h>
-#include <plat/i2c.h>
 #include <mach/regs-uart.h>
 #include <mach/arcom-pcmcia.h>
 #include <mach/viper.h>
diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c
index b9b5797..e709fd4 100644
--- a/arch/arm/mach-pxa/vpac270.c
+++ b/arch/arm/mach-pxa/vpac270.c
@@ -26,6 +26,7 @@
 #include <linux/ucb1400.h>
 #include <linux/ata_platform.h>
 #include <linux/regulator/max1586.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -40,8 +41,6 @@
 #include <mach/udc.h>
 #include <mach/pata_pxa.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
index 51c0281..f55f8f2 100644
--- a/arch/arm/mach-pxa/xcep.c
+++ b/arch/arm/mach-pxa/xcep.c
@@ -16,6 +16,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/smc91x.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -26,8 +27,6 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
 
-#include <plat/i2c.h>
-
 #include <mach/hardware.h>
 #include <mach/pxa2xx-regs.h>
 #include <mach/mfp-pxa25x.h>
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index a323e07..aaf8837 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -29,6 +29,7 @@
 #include <linux/gpio_keys.h>
 #include <linux/delay.h>
 #include <linux/regulator/machine.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -40,8 +41,6 @@
 #include <mach/mmc.h>
 #include <plat/pxa27x_keypad.h>
 
-#include <plat/i2c.h>
-
 #include "generic.h"
 #include "devices.h"
 
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index b92aa3b..730f51e 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -25,6 +25,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/i2c/pca953x.h>
 #include <linux/apm-emulation.h>
 #include <linux/can/platform/mcp251x.h>
@@ -33,8 +34,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 
-#include <plat/i2c.h>
-
 #include <mach/pxa2xx-regs.h>
 #include <mach/regs-uart.h>
 #include <mach/ohci.h>
diff --git a/arch/arm/mach-pxa/zylonite_pxa300.c b/arch/arm/mach-pxa/zylonite_pxa300.c
index 3aa73b3..93c64d8 100644
--- a/arch/arm/mach-pxa/zylonite_pxa300.c
+++ b/arch/arm/mach-pxa/zylonite_pxa300.c
@@ -17,11 +17,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pxa-i2c.h>
 #include <linux/i2c/pca953x.h>
 #include <linux/gpio.h>
 
 #include <mach/pxa300.h>
-#include <plat/i2c.h>
 #include <mach/zylonite.h>
 
 #include "generic.h"
diff --git a/arch/arm/plat-pxa/include/plat/i2c.h b/arch/arm/plat-pxa/include/plat/i2c.h
deleted file mode 100644
index 1a9f65e..0000000
--- a/arch/arm/plat-pxa/include/plat/i2c.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  i2c_pxa.h
- *
- *  Copyright (C) 2002 Intrinsyc Software Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- */
-#ifndef _I2C_PXA_H_
-#define _I2C_PXA_H_
-
-#if 0
-#define DEF_TIMEOUT             3
-#else
-/* need a longer timeout if we're dealing with the fact we may well be
- * looking at a multi-master environment
-*/
-#define DEF_TIMEOUT             32
-#endif
-
-#define BUS_ERROR               (-EREMOTEIO)
-#define XFER_NAKED              (-ECONNREFUSED)
-#define I2C_RETRY               (-2000) /* an error has occurred retry transmit */
-
-/* ICR initialize bit values
-*
-*  15. FM       0 (100 Khz operation)
-*  14. UR       0 (No unit reset)
-*  13. SADIE    0 (Disables the unit from interrupting on slave addresses
-*                                       matching its slave address)
-*  12. ALDIE    0 (Disables the unit from interrupt when it loses arbitration
-*                                       in master mode)
-*  11. SSDIE    0 (Disables interrupts from a slave stop detected, in slave mode)
-*  10. BEIE     1 (Enable interrupts from detected bus errors, no ACK sent)
-*  9.  IRFIE    1 (Enable interrupts from full buffer received)
-*  8.  ITEIE    1 (Enables the I2C unit to interrupt when transmit buffer empty)
-*  7.  GCD      1 (Disables i2c unit response to general call messages as a slave)
-*  6.  IUE      0 (Disable unit until we change settings)
-*  5.  SCLE     1 (Enables the i2c clock output for master mode (drives SCL)
-*  4.  MA       0 (Only send stop with the ICR stop bit)
-*  3.  TB       0 (We are not transmitting a byte initially)
-*  2.  ACKNAK   0 (Send an ACK after the unit receives a byte)
-*  1.  STOP     0 (Do not send a STOP)
-*  0.  START    0 (Do not send a START)
-*
-*/
-#define I2C_ICR_INIT	(ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE)
-
-/* I2C status register init values
- *
- * 10. BED      1 (Clear bus error detected)
- * 9.  SAD      1 (Clear slave address detected)
- * 7.  IRF      1 (Clear IDBR Receive Full)
- * 6.  ITE      1 (Clear IDBR Transmit Empty)
- * 5.  ALD      1 (Clear Arbitration Loss Detected)
- * 4.  SSD      1 (Clear Slave Stop Detected)
- */
-#define I2C_ISR_INIT	0x7FF  /* status register init */
-
-struct i2c_slave_client;
-
-struct i2c_pxa_platform_data {
-	unsigned int		slave_addr;
-	struct i2c_slave_client	*slave;
-	unsigned int		class;
-	unsigned int		use_pio :1;
-	unsigned int		fast_mode :1;
-};
-
-extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info);
-
-#ifdef CONFIG_PXA27x
-extern void pxa27x_set_i2c_power_info(struct i2c_pxa_platform_data *info);
-#endif
-
-#ifdef CONFIG_PXA3xx
-extern void pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info);
-#endif
-
-#endif
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index a011455..fc2a90e 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -34,9 +34,9 @@
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/io.h>
+#include <linux/i2c/pxa-i2c.h>
 
 #include <asm/irq.h>
-#include <plat/i2c.h>
 
 struct pxa_reg_layout {
 	u32 ibmr;
diff --git a/include/linux/i2c/pxa-i2c.h b/include/linux/i2c/pxa-i2c.h
new file mode 100644
index 0000000..1a9f65e
--- /dev/null
+++ b/include/linux/i2c/pxa-i2c.h
@@ -0,0 +1,82 @@
+/*
+ *  i2c_pxa.h
+ *
+ *  Copyright (C) 2002 Intrinsyc Software Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+#ifndef _I2C_PXA_H_
+#define _I2C_PXA_H_
+
+#if 0
+#define DEF_TIMEOUT             3
+#else
+/* need a longer timeout if we're dealing with the fact we may well be
+ * looking at a multi-master environment
+*/
+#define DEF_TIMEOUT             32
+#endif
+
+#define BUS_ERROR               (-EREMOTEIO)
+#define XFER_NAKED              (-ECONNREFUSED)
+#define I2C_RETRY               (-2000) /* an error has occurred retry transmit */
+
+/* ICR initialize bit values
+*
+*  15. FM       0 (100 Khz operation)
+*  14. UR       0 (No unit reset)
+*  13. SADIE    0 (Disables the unit from interrupting on slave addresses
+*                                       matching its slave address)
+*  12. ALDIE    0 (Disables the unit from interrupt when it loses arbitration
+*                                       in master mode)
+*  11. SSDIE    0 (Disables interrupts from a slave stop detected, in slave mode)
+*  10. BEIE     1 (Enable interrupts from detected bus errors, no ACK sent)
+*  9.  IRFIE    1 (Enable interrupts from full buffer received)
+*  8.  ITEIE    1 (Enables the I2C unit to interrupt when transmit buffer empty)
+*  7.  GCD      1 (Disables i2c unit response to general call messages as a slave)
+*  6.  IUE      0 (Disable unit until we change settings)
+*  5.  SCLE     1 (Enables the i2c clock output for master mode (drives SCL)
+*  4.  MA       0 (Only send stop with the ICR stop bit)
+*  3.  TB       0 (We are not transmitting a byte initially)
+*  2.  ACKNAK   0 (Send an ACK after the unit receives a byte)
+*  1.  STOP     0 (Do not send a STOP)
+*  0.  START    0 (Do not send a START)
+*
+*/
+#define I2C_ICR_INIT	(ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE)
+
+/* I2C status register init values
+ *
+ * 10. BED      1 (Clear bus error detected)
+ * 9.  SAD      1 (Clear slave address detected)
+ * 7.  IRF      1 (Clear IDBR Receive Full)
+ * 6.  ITE      1 (Clear IDBR Transmit Empty)
+ * 5.  ALD      1 (Clear Arbitration Loss Detected)
+ * 4.  SSD      1 (Clear Slave Stop Detected)
+ */
+#define I2C_ISR_INIT	0x7FF  /* status register init */
+
+struct i2c_slave_client;
+
+struct i2c_pxa_platform_data {
+	unsigned int		slave_addr;
+	struct i2c_slave_client	*slave;
+	unsigned int		class;
+	unsigned int		use_pio :1;
+	unsigned int		fast_mode :1;
+};
+
+extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info);
+
+#ifdef CONFIG_PXA27x
+extern void pxa27x_set_i2c_power_info(struct i2c_pxa_platform_data *info);
+#endif
+
+#ifdef CONFIG_PXA3xx
+extern void pxa3xx_set_i2c_power_info(struct i2c_pxa_platform_data *info);
+#endif
+
+#endif
-- 
cgit v0.10.2


From 7e94dd154e934aa2137c427c3b1c8e9a6e465fcd Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 2 Mar 2011 11:26:53 +0100
Subject: i2c-pxa2xx: Add PCI support for PXA I2C controller

The Sodaville I2C controller is almost the same as found on PXA2xx. The
difference:
- the register are at a different offset
- no slave support

The PCI probe code adds three platform devices which are probed then by
the platform code.
The X86 part also adds dummy clock defines because we don't have HW
clock support.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 230601e..b6e53c2 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -546,15 +546,18 @@ config I2C_PUV3
 
 config I2C_PXA
 	tristate "Intel PXA2XX I2C adapter"
-	depends on ARCH_PXA || ARCH_MMP
+	depends on ARCH_PXA || ARCH_MMP || (X86_32 && PCI && OF)
 	help
 	  If you have devices in the PXA I2C bus, say yes to this option.
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-pxa.
 
+config I2C_PXA_PCI
+	def_bool I2C_PXA && X86_32 && PCI && OF
+
 config I2C_PXA_SLAVE
 	bool "Intel PXA2XX I2C Slave comms support"
-	depends on I2C_PXA
+	depends on I2C_PXA && !X86_32
 	help
 	  Support I2C slave mode communications on the PXA I2C bus.  This
 	  is necessary for systems where the PXA may be a target on the
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 3878c95..1dd554e 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_I2C_PMCMSP)	+= i2c-pmcmsp.o
 obj-$(CONFIG_I2C_PNX)		+= i2c-pnx.o
 obj-$(CONFIG_I2C_PUV3)		+= i2c-puv3.o
 obj-$(CONFIG_I2C_PXA)		+= i2c-pxa.o
+obj-$(CONFIG_I2C_PXA_PCI)	+= i2c-pxa-pci.o
 obj-$(CONFIG_I2C_S3C2410)	+= i2c-s3c2410.o
 obj-$(CONFIG_I2C_S6000)		+= i2c-s6000.o
 obj-$(CONFIG_I2C_SH7760)	+= i2c-sh7760.o
diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c
new file mode 100644
index 0000000..6659d26
--- /dev/null
+++ b/drivers/i2c/busses/i2c-pxa-pci.c
@@ -0,0 +1,176 @@
+/*
+ * The CE4100's I2C device is more or less the same one as found on PXA.
+ * It does not support slave mode, the register slightly moved. This PCI
+ * device provides three bars, every contains a single I2C controller.
+ */
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/pxa-i2c.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+
+#define CE4100_PCI_I2C_DEVS	3
+
+struct ce4100_devices {
+	struct platform_device *pdev[CE4100_PCI_I2C_DEVS];
+};
+
+static struct platform_device *add_i2c_device(struct pci_dev *dev, int bar)
+{
+	struct platform_device *pdev;
+	struct i2c_pxa_platform_data pdata;
+	struct resource res[2];
+	struct device_node *child;
+	static int devnum;
+	int ret;
+
+	memset(&pdata, 0, sizeof(struct i2c_pxa_platform_data));
+	memset(&res, 0, sizeof(res));
+
+	res[0].flags = IORESOURCE_MEM;
+	res[0].start = pci_resource_start(dev, bar);
+	res[0].end = pci_resource_end(dev, bar);
+
+	res[1].flags = IORESOURCE_IRQ;
+	res[1].start = dev->irq;
+	res[1].end = dev->irq;
+
+	for_each_child_of_node(dev->dev.of_node, child) {
+		const void *prop;
+		struct resource r;
+		int ret;
+
+		ret = of_address_to_resource(child, 0, &r);
+		if (ret < 0)
+			continue;
+		if (r.start != res[0].start)
+			continue;
+		if (r.end != res[0].end)
+			continue;
+		if (r.flags != res[0].flags)
+			continue;
+
+		prop = of_get_property(child, "fast-mode", NULL);
+		if (prop)
+			pdata.fast_mode = 1;
+
+		break;
+	}
+
+	if (!child) {
+		dev_err(&dev->dev, "failed to match a DT node for bar %d.\n",
+				bar);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	pdev = platform_device_alloc("ce4100-i2c", devnum);
+	if (!pdev) {
+		of_node_put(child);
+		ret = -ENOMEM;
+		goto out;
+	}
+	pdev->dev.parent = &dev->dev;
+	pdev->dev.of_node = child;
+
+	ret = platform_device_add_resources(pdev, res, ARRAY_SIZE(res));
+	if (ret)
+		goto err;
+
+	ret = platform_device_add_data(pdev, &pdata, sizeof(pdata));
+	if (ret)
+		goto err;
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err;
+	devnum++;
+	return pdev;
+err:
+	platform_device_put(pdev);
+out:
+	return ERR_PTR(ret);
+}
+
+static int __devinit ce4100_i2c_probe(struct pci_dev *dev,
+		const struct pci_device_id *ent)
+{
+	int ret;
+	int i;
+	struct ce4100_devices *sds;
+
+	ret = pci_enable_device_mem(dev);
+	if (ret)
+		return ret;
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Missing device tree node.\n");
+		return -EINVAL;
+	}
+	sds = kzalloc(sizeof(*sds), GFP_KERNEL);
+	if (!sds)
+		goto err_mem;
+
+	for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) {
+		sds->pdev[i] = add_i2c_device(dev, i);
+		if (IS_ERR(sds->pdev[i])) {
+			while (--i >= 0)
+				platform_device_unregister(sds->pdev[i]);
+			goto err_dev_add;
+		}
+	}
+	pci_set_drvdata(dev, sds);
+	return 0;
+
+err_dev_add:
+	pci_set_drvdata(dev, NULL);
+	kfree(sds);
+err_mem:
+	pci_disable_device(dev);
+	return ret;
+}
+
+static void __devexit ce4100_i2c_remove(struct pci_dev *dev)
+{
+	struct ce4100_devices *sds;
+	unsigned int i;
+
+	sds = pci_get_drvdata(dev);
+	pci_set_drvdata(dev, NULL);
+
+	for (i = 0; i < ARRAY_SIZE(sds->pdev); i++)
+		platform_device_unregister(sds->pdev[i]);
+
+	pci_disable_device(dev);
+	kfree(sds);
+}
+
+static struct pci_device_id ce4100_i2c_devices[] __devinitdata = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e68)},
+	{ },
+};
+MODULE_DEVICE_TABLE(pci, ce4100_i2c_devices);
+
+static struct pci_driver ce4100_i2c_driver = {
+	.name           = "ce4100_i2c",
+	.id_table       = ce4100_i2c_devices,
+	.probe          = ce4100_i2c_probe,
+	.remove         = __devexit_p(ce4100_i2c_remove),
+};
+
+static int __init ce4100_i2c_init(void)
+{
+	return pci_register_driver(&ce4100_i2c_driver);
+}
+module_init(ce4100_i2c_init);
+
+static void __exit ce4100_i2c_exit(void)
+{
+	pci_unregister_driver(&ce4100_i2c_driver);
+}
+module_exit(ce4100_i2c_exit);
+
+MODULE_DESCRIPTION("CE4100 PCI-I2C glue code for PXA's driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index fc2a90e..ab59b7e 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -38,6 +38,13 @@
 
 #include <asm/irq.h>
 
+#ifndef CONFIG_HAVE_CLK
+#define clk_get(dev, id)	NULL
+#define clk_put(clk)		do { } while (0)
+#define clk_disable(clk)	do { } while (0)
+#define clk_enable(clk)		do { } while (0)
+#endif
+
 struct pxa_reg_layout {
 	u32 ibmr;
 	u32 idbr;
@@ -49,6 +56,7 @@ struct pxa_reg_layout {
 enum pxa_i2c_types {
 	REGS_PXA2XX,
 	REGS_PXA3XX,
+	REGS_CE4100,
 };
 
 /*
@@ -69,11 +77,19 @@ static struct pxa_reg_layout pxa_reg_layout[] = {
 		.isr =	0x18,
 		.isar =	0x20,
 	},
+	[REGS_CE4100] = {
+		.ibmr =	0x14,
+		.idbr =	0x0c,
+		.icr =	0x00,
+		.isr =	0x04,
+		/* no isar register */
+	},
 };
 
 static const struct platform_device_id i2c_pxa_id_table[] = {
 	{ "pxa2xx-i2c",		REGS_PXA2XX },
 	{ "pxa3xx-pwri2c",	REGS_PXA3XX },
+	{ "ce4100-i2c",		REGS_CE4100 },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, i2c_pxa_id_table);
@@ -442,7 +458,8 @@ static void i2c_pxa_reset(struct pxa_i2c *i2c)
 	writel(I2C_ISR_INIT, _ISR(i2c));
 	writel(readl(_ICR(i2c)) & ~ICR_UR, _ICR(i2c));
 
-	writel(i2c->slave_addr, _ISAR(i2c));
+	if (i2c->reg_isar)
+		writel(i2c->slave_addr, _ISAR(i2c));
 
 	/* set control register values */
 	writel(I2C_ICR_INIT | (i2c->fast_mode ? ICR_FM : 0), _ICR(i2c));
@@ -1074,7 +1091,8 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	i2c->reg_idbr = i2c->reg_base + pxa_reg_layout[i2c_type].idbr;
 	i2c->reg_icr = i2c->reg_base + pxa_reg_layout[i2c_type].icr;
 	i2c->reg_isr = i2c->reg_base + pxa_reg_layout[i2c_type].isr;
-	i2c->reg_isar = i2c->reg_base + pxa_reg_layout[i2c_type].isar;
+	if (i2c_type != REGS_CE4100)
+		i2c->reg_isar = i2c->reg_base + pxa_reg_layout[i2c_type].isar;
 
 	i2c->iobase = res->start;
 	i2c->iosize = resource_size(res);
@@ -1113,7 +1131,10 @@ static int i2c_pxa_probe(struct platform_device *dev)
 	i2c->adap.algo_data = i2c;
 	i2c->adap.dev.parent = &dev->dev;
 
-	ret = i2c_add_numbered_adapter(&i2c->adap);
+	if (i2c_type == REGS_CE4100)
+		ret = i2c_add_adapter(&i2c->adap);
+	else
+		ret = i2c_add_numbered_adapter(&i2c->adap);
 	if (ret < 0) {
 		printk(KERN_INFO "I2C: Failed to add bus\n");
 		goto eadapt;
-- 
cgit v0.10.2


From c66dc529194be374556d166ee7ddb84a7d1d302b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Feb 2011 12:38:18 +0100
Subject: i2c-pxa2xx: add support for shared IRQ handler

Sodaville has three of them on a single IRQ. IRQF_DISABLED is removed
because it is a NOP allready and scheduled for removal.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index ab59b7e..fa8dd2c 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -956,11 +956,17 @@ static void i2c_pxa_irq_rxfull(struct pxa_i2c *i2c, u32 isr)
 	writel(icr, _ICR(i2c));
 }
 
+#define VALID_INT_SOURCE	(ISR_SSD | ISR_ALD | ISR_ITE | ISR_IRF | \
+				ISR_SAD | ISR_BED)
 static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id)
 {
 	struct pxa_i2c *i2c = dev_id;
 	u32 isr = readl(_ISR(i2c));
 
+	isr &= VALID_INT_SOURCE;
+	if (!isr)
+		return IRQ_NONE;
+
 	if (i2c_debug > 2 && 0) {
 		dev_dbg(&i2c->adap.dev, "%s: ISR=%08x, ICR=%08x, IBMR=%02x\n",
 			__func__, isr, readl(_ICR(i2c)), readl(_IBMR(i2c)));
@@ -975,7 +981,7 @@ static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id)
 	/*
 	 * Always clear all pending IRQs.
 	 */
-	writel(isr & (ISR_SSD|ISR_ALD|ISR_ITE|ISR_IRF|ISR_SAD|ISR_BED), _ISR(i2c));
+	writel(isr, _ISR(i2c));
 
 	if (isr & ISR_SAD)
 		i2c_pxa_slave_start(i2c, isr);
@@ -1120,7 +1126,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		i2c->adap.algo = &i2c_pxa_pio_algorithm;
 	} else {
 		i2c->adap.algo = &i2c_pxa_algorithm;
-		ret = request_irq(irq, i2c_pxa_handler, IRQF_DISABLED,
+		ret = request_irq(irq, i2c_pxa_handler, IRQF_SHARED,
 				  i2c->adap.name, i2c);
 		if (ret)
 			goto ereqirq;
-- 
cgit v0.10.2


From 93c92cfdec8c3f8764894b37606c174f68fd9338 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Feb 2011 12:38:19 +0100
Subject: i2c-pxa2xx: check timeout correctly

timeout here maybe 0 if the event occured and a task with a higher
priority stole the cpu and we were sleeping longer than the timeout
value we specified.
In case of a real timeout I changed the error code to I2C_RETRY so we
retry the transfer.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index fa8dd2c..8612836 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -770,8 +770,10 @@ static int i2c_pxa_do_xfer(struct pxa_i2c *i2c, struct i2c_msg *msg, int num)
 	 */
 	ret = i2c->msg_idx;
 
-	if (timeout == 0)
+	if (!timeout && i2c->msg_num) {
 		i2c_pxa_scream_blue_murder(i2c, "timeout");
+		ret = I2C_RETRY;
+	}
 
  out:
 	return ret;
-- 
cgit v0.10.2


From baa8cab012ff07d6835580b19c2fd93a20627823 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 23 Feb 2011 12:38:20 +0100
Subject: i2c-pxa2xx: pass of_node from platform driver to adapter and publish

the of_node will auto-publish devices which are added to the device
tree.

Commit 925bb9c6 aka ("of/i2c: Fix module load order issue caused by
of_i2c.c) moved the of_i2c_register_devices() function from the i2c core
back to the drivers. This patch does the same thing for the pxa driver.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Dirk Brandewie <dirk.brandewie@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 8612836..27b28e7 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/i2c-pxa.h>
+#include <linux/of_i2c.h>
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/clk.h>
@@ -1138,6 +1139,9 @@ static int i2c_pxa_probe(struct platform_device *dev)
 
 	i2c->adap.algo_data = i2c;
 	i2c->adap.dev.parent = &dev->dev;
+#ifdef CONFIG_OF
+	i2c->adap.dev.of_node = dev->dev.of_node;
+#endif
 
 	if (i2c_type == REGS_CE4100)
 		ret = i2c_add_adapter(&i2c->adap);
@@ -1147,6 +1151,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
 		printk(KERN_INFO "I2C: Failed to add bus\n");
 		goto eadapt;
 	}
+	of_i2c_register_devices(&i2c->adap);
 
 	platform_set_drvdata(dev, i2c);
 
-- 
cgit v0.10.2


From 23e74a86ab43e5cccbf90edb76fc396e36acc998 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Sun, 13 Mar 2011 15:53:28 +0200
Subject: i2c-pxa2xx: Fix register offsets

Fix regression that was introduced by dynamic register layout.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 27b28e7..b914184 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -66,18 +66,18 @@ enum pxa_i2c_types {
 static struct pxa_reg_layout pxa_reg_layout[] = {
 	[REGS_PXA2XX] = {
 		.ibmr =	0x00,
-		.idbr =	0x10,
-		.icr =	0x20,
-		.isr =	0x30,
-		.isar =	0x40,
-	},
-	[REGS_PXA3XX] = {
-		.ibmr =	0x00,
 		.idbr =	0x08,
 		.icr =	0x10,
 		.isr =	0x18,
 		.isar =	0x20,
 	},
+	[REGS_PXA3XX] = {
+		.ibmr =	0x00,
+		.idbr =	0x04,
+		.icr =	0x08,
+		.isr =	0x0c,
+		.isar =	0x10,
+	},
 	[REGS_CE4100] = {
 		.ibmr =	0x14,
 		.idbr =	0x0c,
-- 
cgit v0.10.2


From 3ffc1222bd2c30b0ef2d3a797b0e0070c7c61a98 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 21 Mar 2011 12:00:00 +0100
Subject: ALSA: usb - Remove trailing spaces from USB card name strings

Some USB devices give trailing spaces in strings returned from
usb_string().  This confuses the automatic card-id creation, resulting
always in "default".
This patch fixes the behavior by removing trailing spaces.

Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/usb/card.c b/sound/usb/card.c
index 40722f8..a90662a 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -41,6 +41,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/ctype.h>
 #include <linux/usb.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
@@ -283,6 +284,15 @@ static int snd_usb_audio_dev_free(struct snd_device *device)
 	return snd_usb_audio_free(chip);
 }
 
+static void remove_trailing_spaces(char *str)
+{
+	char *p;
+
+	if (!*str)
+		return;
+	for (p = str + strlen(str) - 1; p >= str && isspace(*p); p--)
+		*p = 0;
+}
 
 /*
  * create a chip instance and set its names.
@@ -351,7 +361,7 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
 	snd_component_add(card, component);
 
 	/* retrieve the device string as shortname */
-	if (quirk && quirk->product_name) {
+	if (quirk && quirk->product_name && *quirk->product_name) {
 		strlcpy(card->shortname, quirk->product_name, sizeof(card->shortname));
 	} else {
 		if (!dev->descriptor.iProduct ||
@@ -363,9 +373,10 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
 				USB_ID_PRODUCT(chip->usb_id));
 		}
 	}
+	remove_trailing_spaces(card->shortname);
 
 	/* retrieve the vendor and device strings as longname */
-	if (quirk && quirk->vendor_name) {
+	if (quirk && quirk->vendor_name && *quirk->vendor_name) {
 		len = strlcpy(card->longname, quirk->vendor_name, sizeof(card->longname));
 	} else {
 		if (dev->descriptor.iManufacturer)
@@ -375,8 +386,11 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
 			len = 0;
 		/* we don't really care if there isn't any vendor string */
 	}
-	if (len > 0)
-		strlcat(card->longname, " ", sizeof(card->longname));
+	if (len > 0) {
+		remove_trailing_spaces(card->longname);
+		if (*card->longname)
+			strlcat(card->longname, " ", sizeof(card->longname));
+	}
 
 	strlcat(card->longname, card->shortname, sizeof(card->longname));
 
-- 
cgit v0.10.2


From 75eb1c311da47d32ba47825226e110df703b35d0 Mon Sep 17 00:00:00 2001
From: Andres Mejia <mcitadel@gmail.com>
Date: Sun, 20 Mar 2011 14:07:40 -0400
Subject: ALSA: hda - Set EAPD for Realtek ALC665

Set EAPD for Realtek ALC665 (Vendor Id: 0x10eSet EAPD for Realtek
ALC665 (Vendor Id: 0x10ec0665).

Signed-off-by: Andres Mejia <mcitadel@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f1a03f2..28f95d1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1265,6 +1265,7 @@ static void alc_auto_init_amp(struct hda_codec *codec, int type)
 		case 0x10ec0660:
 		case 0x10ec0662:
 		case 0x10ec0663:
+		case 0x10ec0665:
 		case 0x10ec0862:
 		case 0x10ec0889:
 			set_eapd(codec, 0x14, 1);
@@ -4240,6 +4241,7 @@ static void alc_power_eapd(struct hda_codec *codec)
 	case 0x10ec0660:
 	case 0x10ec0662:
 	case 0x10ec0663:
+	case 0x10ec0665:
 	case 0x10ec0862:
 	case 0x10ec0889:
 		set_eapd(codec, 0x14, 0);
-- 
cgit v0.10.2


From 07d5f69b457019eda4ca568923b1d62b7ada89e1 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Mar 2011 13:58:05 +0100
Subject: fuse: reduce size of struct fuse_request

Reduce the size of struct fuse_request by removing cuse_init_out from
the request structure and allocating it dinamically instead.

CC: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3e87cce..124b697 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -305,7 +305,7 @@ static void cuse_gendev_release(struct device *dev)
 static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 {
 	struct cuse_conn *cc = fc_to_cc(fc);
-	struct cuse_init_out *arg = &req->misc.cuse_init_out;
+	struct cuse_init_out *arg = req->out.args[0].value;
 	struct page *page = req->pages[0];
 	struct cuse_devinfo devinfo = { };
 	struct device *dev;
@@ -384,6 +384,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 	dev_set_uevent_suppress(dev, 0);
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
 out:
+	kfree(arg);
 	__free_page(page);
 	return;
 
@@ -405,6 +406,7 @@ static int cuse_send_init(struct cuse_conn *cc)
 	struct page *page;
 	struct fuse_conn *fc = &cc->fc;
 	struct cuse_init_in *arg;
+	void *outarg;
 
 	BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
 
@@ -419,6 +421,10 @@ static int cuse_send_init(struct cuse_conn *cc)
 	if (!page)
 		goto err_put_req;
 
+	outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
+	if (!outarg)
+		goto err_free_page;
+
 	arg = &req->misc.cuse_init_in;
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
@@ -429,7 +435,7 @@ static int cuse_send_init(struct cuse_conn *cc)
 	req->in.args[0].value = arg;
 	req->out.numargs = 2;
 	req->out.args[0].size = sizeof(struct cuse_init_out);
-	req->out.args[0].value = &req->misc.cuse_init_out;
+	req->out.args[0].value = outarg;
 	req->out.args[1].size = CUSE_INIT_INFO_MAX;
 	req->out.argvar = 1;
 	req->out.argpages = 1;
@@ -440,6 +446,8 @@ static int cuse_send_init(struct cuse_conn *cc)
 
 	return 0;
 
+err_free_page:
+	__free_page(page);
 err_put_req:
 	fuse_put_request(fc, req);
 err:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d428694..b788bec 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,7 +272,6 @@ struct fuse_req {
 		struct fuse_init_in init_in;
 		struct fuse_init_out init_out;
 		struct cuse_init_in cuse_init_in;
-		struct cuse_init_out cuse_init_out;
 		struct {
 			struct fuse_read_in in;
 			u64 attr_ver;
-- 
cgit v0.10.2


From 357ccf2b69bcefa650a54db83702381d1c9d6959 Mon Sep 17 00:00:00 2001
From: Bryan Green <bryan@grid-net.com>
Date: Tue, 1 Mar 2011 16:43:52 -0800
Subject: fuse: wakeup pollers on connection release/abort

If a fuse dev connection is broken, wake up any
processes that are blocking, in a poll system call,
on one of the files in the now defunct filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cf8d28d..213d3cf 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1910,6 +1910,21 @@ __acquires(fc->lock)
 		kfree(dequeue_forget(fc, 1, NULL));
 }
 
+static void end_polls(struct fuse_conn *fc)
+{
+	struct rb_node *p;
+
+	p = rb_first(&fc->polled_files);
+
+	while (p) {
+		struct fuse_file *ff;
+		ff = rb_entry(p, struct fuse_file, polled_node);
+		wake_up_interruptible_all(&ff->poll_wait);
+
+		p = rb_next(p);
+	}
+}
+
 /*
  * Abort all requests.
  *
@@ -1937,6 +1952,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
 		fc->blocked = 0;
 		end_io_requests(fc);
 		end_queued_requests(fc);
+		end_polls(fc);
 		wake_up_all(&fc->waitq);
 		wake_up_all(&fc->blocked_waitq);
 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1953,6 +1969,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
 		fc->connected = 0;
 		fc->blocked = 0;
 		end_queued_requests(fc);
+		end_polls(fc);
 		wake_up_all(&fc->blocked_waitq);
 		spin_unlock(&fc->lock);
 		fuse_conn_put(fc);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9e0832d..6ea0073 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -222,7 +222,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
 		rb_erase(&ff->polled_node, &fc->polled_files);
 	spin_unlock(&fc->lock);
 
-	wake_up_interruptible_sync(&ff->poll_wait);
+	wake_up_interruptible_all(&ff->poll_wait);
 
 	inarg->fh = ff->fh;
 	inarg->flags = flags;
-- 
cgit v0.10.2


From 19690ddb65dbfc7be1b411fce12d3332acefbfb5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Mar 2011 13:58:06 +0100
Subject: fuse: make fuse_permission() RCU aware

Only bail out of fuse_permission() on IPERM_FLAG_RCU when blocking is
actually necessary.

CC: Nick Piggin <npiggin@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8bd0ef9..3b84b91 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -970,6 +970,14 @@ static int fuse_access(struct inode *inode, int mask)
 	return err;
 }
 
+static int fuse_perm_getattr(struct inode *inode, int flags)
+{
+	if (flags & IPERM_FLAG_RCU)
+		return -ECHILD;
+
+	return fuse_do_getattr(inode, NULL, NULL);
+}
+
 /*
  * Check permission.  The two basic access models of FUSE are:
  *
@@ -989,9 +997,6 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 	bool refreshed = false;
 	int err = 0;
 
-	if (flags & IPERM_FLAG_RCU)
-		return -ECHILD;
-
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 
@@ -1000,9 +1005,15 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 	 */
 	if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
-		err = fuse_update_attributes(inode, NULL, NULL, &refreshed);
-		if (err)
-			return err;
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		if (fi->i_time < get_jiffies_64()) {
+			refreshed = true;
+
+			err = fuse_perm_getattr(inode, flags);
+			if (err)
+				return err;
+		}
 	}
 
 	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -1012,7 +1023,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 		   attributes.  This is also needed, because the root
 		   node will at first have no permissions */
 		if (err == -EACCES && !refreshed) {
-			err = fuse_do_getattr(inode, NULL, NULL);
+			err = fuse_perm_getattr(inode, flags);
 			if (!err)
 				err = generic_permission(inode, mask,
 							flags, NULL);
@@ -1023,13 +1034,16 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
 		   noticed immediately, only after the attribute
 		   timeout has expired */
 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
+		if (flags & IPERM_FLAG_RCU)
+			return -ECHILD;
+
 		err = fuse_access(inode, mask);
 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
 		if (!(inode->i_mode & S_IXUGO)) {
 			if (refreshed)
 				return -EACCES;
 
-			err = fuse_do_getattr(inode, NULL, NULL);
+			err = fuse_perm_getattr(inode, flags);
 			if (!err && !(inode->i_mode & S_IXUGO))
 				return -EACCES;
 		}
-- 
cgit v0.10.2


From e7c0a167860620bd2938366896964f729ddaeaaa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Mar 2011 13:58:06 +0100
Subject: fuse: make fuse_dentry_revalidate() RCU aware

Only bail out of fuse_dentry_revalidate() on LOOKUP_RCU when blocking
is actually necessary.

CC: Nick Piggin <npiggin@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3b84b91..c6ba49b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,10 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 {
 	struct inode *inode;
 
-	if (nd && nd->flags & LOOKUP_RCU)
-		return -ECHILD;
-
-	inode = entry->d_inode;
+	inode = ACCESS_ONCE(entry->d_inode);
 	if (inode && is_bad_inode(inode))
 		return 0;
 	else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -177,6 +174,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
 		if (!inode)
 			return 0;
 
+		if (nd->flags & LOOKUP_RCU)
+			return -ECHILD;
+
 		fc = get_fuse_conn(inode);
 		req = fuse_get_req(fc);
 		if (IS_ERR(req))
-- 
cgit v0.10.2


From 6f6c7006755b667f9f6c1f3b6f08cd65f75cc471 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 17 Jan 2011 20:34:08 -0800
Subject: libceph: fix osd request queuing on osdmap updates

If we send a request to osd A, and the request's pg remaps to osd B and
then back to A in quick succession, we need to resend the request to A. The
old code was only calling kick_requests after processing all incremental
maps in a message, so it was very possible to not resend a request that
needed to be resent.  This would make the osd eventually time out (at least
with the current default of osd timeouts enabled).

The correct approach is to scan requests on every map incremental.  This
patch refactors the kick code in a few ways:
 - all requests are either on req_lru (in flight), req_unsent (ready to
   send), or req_notarget (currently map to no up osd)
 - mapping always done by map_request (previous map_osds)
 - if the mapping changes, we requeue.  requests are resent only after all
   map incrementals are processed.
 - some osd reset code is moved out of kick_requests into a separate
   function
 - the "kick this osd" functionality is moved to kick_osd_requests, as it
   is unrelated to scanning for request->pg->osd mapping changes

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index a1af296..e791b8e 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -74,7 +74,6 @@ struct ceph_osd_request {
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
 	unsigned long     r_stamp;            /* send OR check time */
-	bool              r_resend;           /* msg send failed, needs retry */
 
 	struct ceph_file_layout r_file_layout;
 	struct ceph_snap_context *r_snapc;    /* snap context for writes */
@@ -104,7 +103,9 @@ struct ceph_osd_client {
 	u64                    timeout_tid;   /* tid of timeout triggering rq */
 	u64                    last_tid;      /* tid of last request */
 	struct rb_root         requests;      /* pending requests */
-	struct list_head       req_lru;	      /* pending requests lru */
+	struct list_head       req_lru;	      /* in-flight lru */
+	struct list_head       req_unsent;    /* unsent/need-resend queue */
+	struct list_head       req_notarget;  /* map to no osd */
 	int                    num_requests;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3e20a12..b85ed5a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -22,10 +22,9 @@
 #define OSD_OPREPLY_FRONT_LEN	512
 
 static const struct ceph_connection_operations osd_con_ops;
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd);
 
-static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void send_queued(struct ceph_osd_client *osdc);
+static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
 
 static int op_needs_trail(int op)
 {
@@ -529,6 +528,35 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
 	return NULL;
 }
 
+/*
+ * Resubmit requests pending on the given osd.
+ */
+static void __kick_osd_requests(struct ceph_osd_client *osdc,
+				struct ceph_osd *osd)
+{
+	struct ceph_osd_request *req;
+	int err;
+
+	dout("__kick_osd_requests osd%d\n", osd->o_osd);
+	err = __reset_osd(osdc, osd);
+	if (err == -EAGAIN)
+		return;
+
+	list_for_each_entry(req, &osd->o_requests, r_osd_item) {
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
+		req->r_flags |= CEPH_OSD_FLAG_RETRY;
+	}
+}
+
+static void kick_osd_requests(struct ceph_osd_client *osdc,
+			      struct ceph_osd *kickosd)
+{
+	mutex_lock(&osdc->request_mutex);
+	__kick_osd_requests(osdc, kickosd);
+	mutex_unlock(&osdc->request_mutex);
+}
 
 /*
  * If the osd connection drops, we need to resubmit all requests.
@@ -543,7 +571,8 @@ static void osd_reset(struct ceph_connection *con)
 	dout("osd_reset osd%d\n", osd->o_osd);
 	osdc = osd->o_osdc;
 	down_read(&osdc->map_sem);
-	kick_requests(osdc, osd);
+	kick_osd_requests(osdc, osd);
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -781,20 +810,20 @@ static void __cancel_request(struct ceph_osd_request *req)
 		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
 		req->r_sent = 0;
 	}
-	list_del_init(&req->r_req_lru_item);
 }
 
 /*
  * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
  * (as needed), and set the request r_osd appropriately.  If there is
- * no up osd, set r_osd to NULL.
+ * no up osd, set r_osd to NULL.  Move the request to the appropiate list
+ * (unsent, homeless) or leave on in-flight lru.
  *
  * Return 0 if unchanged, 1 if changed, or negative on error.
  *
  * Caller should hold map_sem for read and request_mutex.
  */
-static int __map_osds(struct ceph_osd_client *osdc,
-		      struct ceph_osd_request *req)
+static int __map_request(struct ceph_osd_client *osdc,
+			 struct ceph_osd_request *req)
 {
 	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	struct ceph_pg pgid;
@@ -802,11 +831,13 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	int o = -1, num = 0;
 	int err;
 
-	dout("map_osds %p tid %lld\n", req, req->r_tid);
+	dout("map_request %p tid %lld\n", req, req->r_tid);
 	err = ceph_calc_object_layout(&reqhead->layout, req->r_oid,
 				      &req->r_file_layout, osdc->osdmap);
-	if (err)
+	if (err) {
+		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 		return err;
+	}
 	pgid = reqhead->layout.ol_pgid;
 	req->r_pgid = pgid;
 
@@ -823,7 +854,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	    (req->r_osd == NULL && o == -1))
 		return 0;  /* no change */
 
-	dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
+	dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n",
 	     req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
 	     req->r_osd ? req->r_osd->o_osd : -1);
 
@@ -841,10 +872,12 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	if (!req->r_osd && o >= 0) {
 		err = -ENOMEM;
 		req->r_osd = create_osd(osdc);
-		if (!req->r_osd)
+		if (!req->r_osd) {
+			list_move(&req->r_req_lru_item, &osdc->req_notarget);
 			goto out;
+		}
 
-		dout("map_osds osd %p is osd%d\n", req->r_osd, o);
+		dout("map_request osd %p is osd%d\n", req->r_osd, o);
 		req->r_osd->o_osd = o;
 		req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
 		__insert_osd(osdc, req->r_osd);
@@ -855,6 +888,9 @@ static int __map_osds(struct ceph_osd_client *osdc,
 	if (req->r_osd) {
 		__remove_osd_from_lru(req->r_osd);
 		list_add(&req->r_osd_item, &req->r_osd->o_requests);
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+	} else {
+		list_move(&req->r_req_lru_item, &osdc->req_notarget);
 	}
 	err = 1;   /* osd or pg changed */
 
@@ -869,16 +905,6 @@ static int __send_request(struct ceph_osd_client *osdc,
 			  struct ceph_osd_request *req)
 {
 	struct ceph_osd_request_head *reqhead;
-	int err;
-
-	err = __map_osds(osdc, req);
-	if (err < 0)
-		return err;
-	if (req->r_osd == NULL) {
-		dout("send_request %p no up osds in pg\n", req);
-		ceph_monc_request_next_osdmap(&osdc->client->monc);
-		return 0;
-	}
 
 	dout("send_request %p tid %llu to osd%d flags %d\n",
 	     req, req->r_tid, req->r_osd->o_osd, req->r_flags);
@@ -898,6 +924,21 @@ static int __send_request(struct ceph_osd_client *osdc,
 }
 
 /*
+ * Send any requests in the queue (req_unsent).
+ */
+static void send_queued(struct ceph_osd_client *osdc)
+{
+	struct ceph_osd_request *req, *tmp;
+
+	dout("send_queued\n");
+	mutex_lock(&osdc->request_mutex);
+	list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) {
+		__send_request(osdc, req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+
+/*
  * Timeout callback, called every N seconds when 1 or more osd
  * requests has been active for more than N seconds.  When this
  * happens, we ping all OSDs with requests who have timed out to
@@ -916,7 +957,6 @@ static void handle_timeout(struct work_struct *work)
 	unsigned long keepalive =
 		osdc->client->options->osd_keepalive_timeout * HZ;
 	unsigned long last_stamp = 0;
-	struct rb_node *p;
 	struct list_head slow_osds;
 
 	dout("timeout\n");
@@ -925,21 +965,6 @@ static void handle_timeout(struct work_struct *work)
 	ceph_monc_request_next_osdmap(&osdc->client->monc);
 
 	mutex_lock(&osdc->request_mutex);
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			int err;
-
-			dout("osdc resending prev failed %lld\n", req->r_tid);
-			err = __send_request(osdc, req);
-			if (err)
-				dout("osdc failed again on %lld\n", req->r_tid);
-			else
-				req->r_resend = false;
-			continue;
-		}
-	}
 
 	/*
 	 * reset osds that appear to be _really_ unresponsive.  this
@@ -963,7 +988,7 @@ static void handle_timeout(struct work_struct *work)
 		BUG_ON(!osd);
 		pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
 			   req->r_tid, osd->o_osd);
-		__kick_requests(osdc, osd);
+		__kick_osd_requests(osdc, osd);
 	}
 
 	/*
@@ -991,7 +1016,7 @@ static void handle_timeout(struct work_struct *work)
 
 	__schedule_osd_timeout(osdc);
 	mutex_unlock(&osdc->request_mutex);
-
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 }
 
@@ -1109,108 +1134,61 @@ bad:
 	ceph_msg_dump(msg);
 }
 
-
-static int __kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
+static void reset_changed_osds(struct ceph_osd_client *osdc)
 {
-	struct ceph_osd_request *req;
 	struct rb_node *p, *n;
-	int needmap = 0;
-	int err;
-
-	dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1);
-	if (kickosd) {
-		err = __reset_osd(osdc, kickosd);
-		if (err == -EAGAIN)
-			return 1;
-	} else {
-		for (p = rb_first(&osdc->osds); p; p = n) {
-			struct ceph_osd *osd =
-				rb_entry(p, struct ceph_osd, o_node);
-
-			n = rb_next(p);
-			if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
-			    memcmp(&osd->o_con.peer_addr,
-				   ceph_osd_addr(osdc->osdmap,
-						 osd->o_osd),
-				   sizeof(struct ceph_entity_addr)) != 0)
-				__reset_osd(osdc, osd);
-		}
-	}
-
-	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
-		req = rb_entry(p, struct ceph_osd_request, r_node);
-
-		if (req->r_resend) {
-			dout(" r_resend set on tid %llu\n", req->r_tid);
-			__cancel_request(req);
-			goto kick;
-		}
-		if (req->r_osd && kickosd == req->r_osd) {
-			__cancel_request(req);
-			goto kick;
-		}
 
-		err = __map_osds(osdc, req);
-		if (err == 0)
-			continue;  /* no change */
-		if (err < 0) {
-			/*
-			 * FIXME: really, we should set the request
-			 * error and fail if this isn't a 'nofail'
-			 * request, but that's a fair bit more
-			 * complicated to do.  So retry!
-			 */
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-			continue;
-		}
-		if (req->r_osd == NULL) {
-			dout("tid %llu maps to no valid osd\n", req->r_tid);
-			needmap++;  /* request a newer map */
-			continue;
-		}
+	for (p = rb_first(&osdc->osds); p; p = n) {
+		struct ceph_osd *osd = rb_entry(p, struct ceph_osd, o_node);
 
-kick:
-		dout("kicking %p tid %llu osd%d\n", req, req->r_tid,
-		     req->r_osd ? req->r_osd->o_osd : -1);
-		req->r_flags |= CEPH_OSD_FLAG_RETRY;
-		err = __send_request(osdc, req);
-		if (err) {
-			dout(" setting r_resend on %llu\n", req->r_tid);
-			req->r_resend = true;
-		}
+		n = rb_next(p);
+		if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) ||
+		    memcmp(&osd->o_con.peer_addr,
+			   ceph_osd_addr(osdc->osdmap,
+					 osd->o_osd),
+			   sizeof(struct ceph_entity_addr)) != 0)
+			__reset_osd(osdc, osd);
 	}
-
-	return needmap;
 }
 
 /*
- * Resubmit osd requests whose osd or osd address has changed.  Request
- * a new osd map if osds are down, or we are otherwise unable to determine
- * how to direct a request.
- *
- * Close connections to down osds.
- *
- * If @who is specified, resubmit requests for that specific osd.
+ * Requeue requests whose mapping to an OSD has changed.  If requests map to
+ * no osd, request a new map.
  *
  * Caller should hold map_sem for read and request_mutex.
  */
-static void kick_requests(struct ceph_osd_client *osdc,
-			  struct ceph_osd *kickosd)
+static void kick_requests(struct ceph_osd_client *osdc)
 {
-	int needmap;
+	struct ceph_osd_request *req;
+	struct rb_node *p;
+	int needmap = 0;
+	int err;
 
+	dout("kick_requests\n");
 	mutex_lock(&osdc->request_mutex);
-	needmap = __kick_requests(osdc, kickosd);
+	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+		req = rb_entry(p, struct ceph_osd_request, r_node);
+		err = __map_request(osdc, req);
+		if (err < 0)
+			continue;  /* error */
+		if (req->r_osd == NULL) {
+			dout("%p tid %llu maps to no osd\n", req, req->r_tid);
+			needmap++;  /* request a newer map */
+		} else if (err > 0) {
+			dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
+			     req->r_osd ? req->r_osd->o_osd : -1);
+			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		}
+	}
 	mutex_unlock(&osdc->request_mutex);
 
 	if (needmap) {
 		dout("%d requests for down osds, need new map\n", needmap);
 		ceph_monc_request_next_osdmap(&osdc->client->monc);
 	}
-
 }
+
+
 /*
  * Process updated osd map.
  *
@@ -1263,6 +1241,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 				ceph_osdmap_destroy(osdc->osdmap);
 				osdc->osdmap = newmap;
 			}
+			kick_requests(osdc);
+			reset_changed_osds(osdc);
 		} else {
 			dout("ignoring incremental map %u len %d\n",
 			     epoch, maplen);
@@ -1300,6 +1280,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 			osdc->osdmap = newmap;
 			if (oldmap)
 				ceph_osdmap_destroy(oldmap);
+			kick_requests(osdc);
 		}
 		p += maplen;
 		nr_maps--;
@@ -1308,8 +1289,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
 done:
 	downgrade_write(&osdc->map_sem);
 	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
-	if (newmap)
-		kick_requests(osdc, NULL);
+	send_queued(osdc);
 	up_read(&osdc->map_sem);
 	wake_up_all(&osdc->client->auth_wq);
 	return;
@@ -1347,15 +1327,22 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 	 * the request still han't been touched yet.
 	 */
 	if (req->r_sent == 0) {
-		rc = __send_request(osdc, req);
-		if (rc) {
-			if (nofail) {
-				dout("osdc_start_request failed send, "
-				     " marking %lld\n", req->r_tid);
-				req->r_resend = true;
-				rc = 0;
-			} else {
-				__unregister_request(osdc, req);
+		rc = __map_request(osdc, req);
+		if (rc < 0)
+			return rc;
+		if (req->r_osd == NULL) {
+			dout("send_request %p no up osds in pg\n", req);
+			ceph_monc_request_next_osdmap(&osdc->client->monc);
+		} else {
+			rc = __send_request(osdc, req);
+			if (rc) {
+				if (nofail) {
+					dout("osdc_start_request failed send, "
+					     " will retry %lld\n", req->r_tid);
+					rc = 0;
+				} else {
+					__unregister_request(osdc, req);
+				}
 			}
 		}
 	}
@@ -1441,6 +1428,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	INIT_LIST_HEAD(&osdc->osd_lru);
 	osdc->requests = RB_ROOT;
 	INIT_LIST_HEAD(&osdc->req_lru);
+	INIT_LIST_HEAD(&osdc->req_unsent);
+	INIT_LIST_HEAD(&osdc->req_notarget);
 	osdc->num_requests = 0;
 	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
-- 
cgit v0.10.2


From 21f3b5f1bbc3c27e82a8c9fc9861fa20bcb31f26 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Wed, 19 Jan 2011 09:45:22 -0800
Subject: ceph: remove debugfs debug cruft

Whoops!

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 08f65fa..0dba691 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 	if (!fsc->debugfs_congestion_kb)
 		goto out;
 
-	dout("a\n");
-
 	snprintf(name, sizeof(name), "../../bdi/%s",
 		 dev_name(fsc->backing_dev_info.dev));
 	fsc->debugfs_bdi =
@@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 	if (!fsc->debugfs_bdi)
 		goto out;
 
-	dout("b\n");
 	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
 					0600,
 					fsc->client->debugfs_dir,
@@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 	if (!fsc->debugfs_mdsmap)
 		goto out;
 
-	dout("ca\n");
 	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
 						0600,
 						fsc->client->debugfs_dir,
@@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 	if (!fsc->debugfs_mdsc)
 		goto out;
 
-	dout("da\n");
 	fsc->debugfs_caps = debugfs_create_file("caps",
 						   0400,
 						   fsc->client->debugfs_dir,
@@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 	if (!fsc->debugfs_caps)
 		goto out;
 
-	dout("ea\n");
 	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
 					0600,
 					fsc->client->debugfs_dir,
-- 
cgit v0.10.2


From 483fac71485e5063ff4033b6dc7d91567f1b6ff1 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Thu, 20 Jan 2011 16:36:06 -0800
Subject: ceph: update common header files

This updates the common header files used by the different ceph
related modules. Specifically it adds definitions required by
the rbd watch/notify feature.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>

diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 09dcc0c..b8e995f 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -136,9 +136,18 @@ struct ceph_dir_layout {
 
 
 /* osd */
-#define CEPH_MSG_OSD_MAP          41
-#define CEPH_MSG_OSD_OP           42
-#define CEPH_MSG_OSD_OPREPLY      43
+#define CEPH_MSG_OSD_MAP                41
+#define CEPH_MSG_OSD_OP                 42
+#define CEPH_MSG_OSD_OPREPLY            43
+#define CEPH_MSG_WATCH_NOTIFY           44
+
+
+/* watch-notify operations */
+enum {
+  WATCH_NOTIFY				= 1, /* notifying watcher */
+  WATCH_NOTIFY_COMPLETE			= 2, /* notifier notified when done */
+};
+
 
 /* pool operations */
 enum {
@@ -213,8 +222,10 @@ struct ceph_client_mount {
 	struct ceph_mon_request_header monhdr;
 } __attribute__ ((packed));
 
+#define CEPH_SUBSCRIBE_ONETIME    1  /* i want only 1 update after have */
+
 struct ceph_mon_subscribe_item {
-	__le64 have_version;	__le64 have;
+	__le64 have_version;    __le64 have;
 	__u8 onetime;
 } __attribute__ ((packed));
 
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 6d5247f..0a99099 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -12,9 +12,9 @@
  * osdmap encoding versions
  */
 #define CEPH_OSDMAP_INC_VERSION     5
-#define CEPH_OSDMAP_INC_VERSION_EXT 5
+#define CEPH_OSDMAP_INC_VERSION_EXT 6
 #define CEPH_OSDMAP_VERSION         5
-#define CEPH_OSDMAP_VERSION_EXT     5
+#define CEPH_OSDMAP_VERSION_EXT     6
 
 /*
  * fs id
@@ -181,9 +181,17 @@ enum {
 	/* read */
 	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
 	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
+	CEPH_OSD_OP_MAPEXT    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
 
 	/* fancy read */
-	CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
+	CEPH_OSD_OP_MASKTRUNC   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
+	CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
+
+	CEPH_OSD_OP_NOTIFY    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
+	CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
+
+	/* versioning */
+	CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
 
 	/* write */
 	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
@@ -205,6 +213,8 @@ enum {
 	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
 	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
 
+	CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
+
 	/** attrs **/
 	/* read */
 	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
@@ -218,11 +228,14 @@ enum {
 	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
 
 	/** subop **/
-	CEPH_OSD_OP_PULL           = CEPH_OSD_OP_MODE_SUB | 1,
-	CEPH_OSD_OP_PUSH           = CEPH_OSD_OP_MODE_SUB | 2,
-	CEPH_OSD_OP_BALANCEREADS   = CEPH_OSD_OP_MODE_SUB | 3,
-	CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
-	CEPH_OSD_OP_SCRUB          = CEPH_OSD_OP_MODE_SUB | 5,
+	CEPH_OSD_OP_PULL            = CEPH_OSD_OP_MODE_SUB | 1,
+	CEPH_OSD_OP_PUSH            = CEPH_OSD_OP_MODE_SUB | 2,
+	CEPH_OSD_OP_BALANCEREADS    = CEPH_OSD_OP_MODE_SUB | 3,
+	CEPH_OSD_OP_UNBALANCEREADS  = CEPH_OSD_OP_MODE_SUB | 4,
+	CEPH_OSD_OP_SCRUB           = CEPH_OSD_OP_MODE_SUB | 5,
+	CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
+	CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
+	CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
 
 	/** lock **/
 	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
@@ -328,6 +341,8 @@ enum {
 	CEPH_OSD_CMPXATTR_MODE_U64    = 2
 };
 
+#define RADOS_NOTIFY_VER	1
+
 /*
  * an individual object operation.  each may be accompanied by some data
  * payload
@@ -359,7 +374,12 @@ struct ceph_osd_op {
 	        struct {
 		        __le64 snapid;
 	        } __attribute__ ((packed)) snap;
-	};
+		struct {
+			__le64 cookie;
+			__le64 ver;
+			__u8 flag;	/* 0 = unwatch, 1 = watch */
+		} __attribute__ ((packed)) watch;
+};
 	__le32 payload_len;
 } __attribute__ ((packed));
 
@@ -402,4 +422,5 @@ struct ceph_osd_reply_head {
 } __attribute__ ((packed));
 
 
+
 #endif
-- 
cgit v0.10.2


From ad1fee96cbaf873520064252c5dc3212c9844861 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Fri, 21 Jan 2011 16:44:03 -0800
Subject: ceph: add ino32 mount option

The ino32 mount option forces the ceph fs to report 32 bit
ino values.  This is useful for 64 bit kernels with 32 bit userspace.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ebafa65..cbe875d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -161,7 +161,7 @@ more:
 	filp->f_pos = di->offset;
 	err = filldir(dirent, dentry->d_name.name,
 		      dentry->d_name.len, di->offset,
-		      dentry->d_inode->i_ino,
+		      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
 		      dentry->d_inode->i_mode >> 12);
 
 	if (last) {
@@ -245,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 		dout("readdir off 0 -> '.'\n");
 		if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
-			    inode->i_ino, inode->i_mode >> 12) < 0)
+			    ceph_translate_ino(inode->i_sb, inode->i_ino),
+			    inode->i_mode >> 12) < 0)
 			return 0;
 		filp->f_pos = 1;
 		off = 1;
 	}
 	if (filp->f_pos == 1) {
+		ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
 		dout("readdir off 1 -> '..'\n");
 		if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
-			    filp->f_dentry->d_parent->d_inode->i_ino,
+			    ceph_translate_ino(inode->i_sb, ino),
 			    inode->i_mode >> 12) < 0)
 			return 0;
 		filp->f_pos = 2;
@@ -377,7 +379,8 @@ more:
 		if (filldir(dirent,
 			    rinfo->dir_dname[off - fi->offset],
 			    rinfo->dir_dname_len[off - fi->offset],
-			    pos, ino, ftype) < 0) {
+			    pos,
+			    ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
 			dout("filldir stopping us...\n");
 			return 0;
 		}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 6045636..b54c97da 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
 /*
  * find or create an inode, given the ceph ino number
  */
+static int ceph_set_ino_cb(struct inode *inode, void *data)
+{
+	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
+	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
+	return 0;
+}
+
 struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
 {
 	struct inode *inode;
@@ -1809,7 +1816,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
 	if (!err) {
 		generic_fillattr(inode, stat);
-		stat->ino = inode->i_ino;
+		stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
 		if (ceph_snap(inode) != CEPH_NOSNAP)
 			stat->dev = ceph_snap(inode);
 		else
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c50854..e39ea78 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,7 @@ enum {
 	Opt_rbytes,
 	Opt_norbytes,
 	Opt_noasyncreaddir,
+	Opt_ino32,
 };
 
 static match_table_t fsopt_tokens = {
@@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = {
 	{Opt_rbytes, "rbytes"},
 	{Opt_norbytes, "norbytes"},
 	{Opt_noasyncreaddir, "noasyncreaddir"},
+	{Opt_ino32, "ino32"},
 	{-1, NULL}
 };
 
@@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_noasyncreaddir:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
 		break;
+	case Opt_ino32:
+		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
+		break;
 	default:
 		BUG_ON(token);
 	}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 20b907d..5405c90 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -27,6 +27,7 @@
 #define CEPH_MOUNT_OPT_DIRSTAT         (1<<4) /* `cat dirname` for stats */
 #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
+#define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
 
 #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
 
@@ -319,6 +320,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
 	return container_of(inode, struct ceph_inode_info, vfs_inode);
 }
 
+static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
+{
+	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
+}
+
+static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
+{
+	return (struct ceph_fs_client *)sb->s_fs_info;
+}
+
 static inline struct ceph_vino ceph_vino(struct inode *inode)
 {
 	return ceph_inode(inode)->i_vino;
@@ -327,19 +338,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
 /*
  * ino_t is <64 bits on many architectures, blech.
  *
- * don't include snap in ino hash, at least for now.
+ *               i_ino (kernel inode)   st_ino (userspace)
+ * i386          32                     32
+ * x86_64+ino32  64                     32
+ * x86_64        64                     64
+ */
+static inline u32 ceph_ino_to_ino32(ino_t ino)
+{
+	ino ^= ino >> (sizeof(ino) * 8 - 32);
+	if (!ino)
+		ino = 1;
+	return ino;
+}
+
+/*
+ * kernel i_ino value
  */
 static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
 {
 	ino_t ino = (ino_t)vino.ino;  /* ^ (vino.snap << 20); */
 #if BITS_PER_LONG == 32
-	ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8;
-	if (!ino)
-		ino = 1;
+	ino = ceph_ino_to_ino32(ino);
 #endif
 	return ino;
 }
 
+/*
+ * user-visible ino (stat, filldir)
+ */
+#if BITS_PER_LONG == 32
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+	return ino;
+}
+#else
+static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
+{
+	if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
+		ino = ceph_ino_to_ino32(ino);
+	return ino;
+}
+#endif
+
+
 /* for printf-style formatting */
 #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
 
@@ -428,13 +469,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
 	return ((loff_t)frag << 32) | (loff_t)off;
 }
 
-static inline int ceph_set_ino_cb(struct inode *inode, void *data)
-{
-	ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
-	inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
-	return 0;
-}
-
 /*
  * caps helpers
  */
@@ -503,15 +537,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
 				    int *total, int *avail, int *used,
 				    int *reserved, int *min);
 
-static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
-{
-	return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
-}
-
-static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
-{
-	return (struct ceph_fs_client *)sb->s_fs_info;
-}
 
 
 /*
-- 
cgit v0.10.2


From 80456f8672f7e69d05c01627da03587dc1ea1603 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 10 Mar 2011 13:33:26 -0800
Subject: ceph: move readahead default to fs/ceph from libceph

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index e39ea78..a9e78b4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -293,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
         fsopt->sb_flags = flags;
         fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
 
-        fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT;
+        fsopt->rsize = CEPH_RSIZE_DEFAULT;
         fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
 	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
@@ -375,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
 
 	if (fsopt->wsize)
 		seq_printf(m, ",wsize=%d", fsopt->wsize);
-	if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT)
+	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
 		seq_printf(m, ",rsize=%d", fsopt->rsize);
 	if (fsopt->congestion_kb != default_congestion_kb())
 		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 5405c90..619fe71 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,6 +36,7 @@
 #define ceph_test_mount_opt(fsc, opt) \
 	(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
 
+#define CEPH_RSIZE_DEFAULT             (512*1024) /* readahead */
 #define CEPH_MAX_READDIR_DEFAULT        1024
 #define CEPH_MAX_READDIR_BYTES_DEFAULT  (512*1024)
 #define CEPH_SNAPDIRNAME_DEFAULT        ".snap"
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 72c72bf..0d2e0ff 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -71,7 +71,6 @@ struct ceph_options {
 #define CEPH_OSD_TIMEOUT_DEFAULT    60  /* seconds */
 #define CEPH_OSD_KEEPALIVE_DEFAULT  5
 #define CEPH_OSD_IDLE_TTL_DEFAULT    60
-#define CEPH_MOUNT_RSIZE_DEFAULT    (512*1024) /* readahead */
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
-- 
cgit v0.10.2


From 78a255654fa7f01945dea0dcedcf5113b3ad9f93 Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry.cy.chang@gmail.com>
Date: Tue, 15 Mar 2011 09:18:01 +0000
Subject: ceph: remove request from unsafe list if it is canceled/timed out

This fixes the list corruption warning like this:

------------[ cut here ]------------
WARNING: at lib/list_debug.c:30 __list_add+0x68/0x81()
Hardware name: X8DTU
list_add corruption. prev->next should be next (ffff880618931250), but was (null). (prev=ffff880c188b9130).
Modules linked in: nfsd lockd nfs_acl auth_rpcgss exportfs ceph libceph libcrc32c sunrpc ipv6 fuse igb i2c_i801 ioatdma i2c_core iTCO_wdt iTCO_vendor_support joydev dca serio_raw usb_storage [last unloaded: scsi_wait_scan]
Pid: 10977, comm: smbd Tainted: G        W  2.6.32.23-170.Elaster.xendom0.fc12.x86_64 #1
Call Trace:
[<ffffffff8105753c>] warn_slowpath_common+0x7c/0x94
[<ffffffff810575ab>] warn_slowpath_fmt+0x41/0x43
[<ffffffff812351a3>] __list_add+0x68/0x81
[<ffffffffa014799d>] ceph_aio_write+0x614/0x8a2 [ceph]
[<ffffffff8111d2a0>] do_sync_write+0xe8/0x125
[<ffffffff81075a1f>] ? autoremove_wake_function+0x0/0x39
[<ffffffff811f21ec>] ? selinux_file_permission+0x5c/0xb3
[<ffffffff811e8521>] ? security_file_permission+0x16/0x18
[<ffffffff8111d864>] vfs_write+0xae/0x10b
[<ffffffff8111d91b>] sys_pwrite64+0x5a/0x76
[<ffffffff81012d32>] system_call_fastpath+0x16/0x1b
---[ end trace 08573eb9f07ff6f4 ]---

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d0e4a8..db5d863 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -568,7 +568,14 @@ more:
 			spin_unlock(&ci->i_unsafe_lock);
 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
 		}
+		
 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+		if (ret < 0 && req->r_safe_callback) {
+			spin_lock(&ci->i_unsafe_lock);
+			list_del_init(&req->r_unsafe_item);
+			spin_unlock(&ci->i_unsafe_lock);
+			ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
+		}
 	}
 
 	if (file->f_flags & O_DIRECT)
-- 
cgit v0.10.2


From 49bcb93236ce1c60d9b7eb21a0aea1999f4d8709 Mon Sep 17 00:00:00 2001
From: Henry C Chang <henry.cy.chang@gmail.com>
Date: Tue, 15 Mar 2011 09:18:02 +0000
Subject: ceph: add request to the tail of unsafe write list

In sync_write_wait(), we assume that the newest request is at the
tail of unsafe write list. We should maintain the semantics here.

Signed-off-by: Henry C Chang <henry_c_chang@tcloudcomputing.com>
Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index db5d863..159b512 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -564,7 +564,8 @@ more:
 			 * start_request so that a tid has been assigned.
 			 */
 			spin_lock(&ci->i_unsafe_lock);
-			list_add(&req->r_unsafe_item, &ci->i_unsafe_writes);
+			list_add_tail(&req->r_unsafe_item,
+				      &ci->i_unsafe_writes);
 			spin_unlock(&ci->i_unsafe_lock);
 			ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
 		}
-- 
cgit v0.10.2


From 147851d2dc4d2be2f60d40276d12d7ef82f8a7ce Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Tue, 15 Mar 2011 14:57:41 -0700
Subject: ceph: rename dentry_release -> d_release, fix comment

Just for consistency's sake.  Fix obsolete comment too.

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index cbe875d..1a867a3 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1027,14 +1027,13 @@ out_touch:
 }
 
 /*
- * When a dentry is released, clear the dir I_COMPLETE if it was part
- * of the current dir gen or if this is in the snapshot namespace.
+ * Release our ceph_dentry_info.
  */
-static void ceph_dentry_release(struct dentry *dentry)
+static void ceph_d_release(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 
-	dout("dentry_release %p\n", dentry);
+	dout("d_release %p\n", dentry);
 	if (di) {
 		ceph_dentry_lru_del(dentry);
 		if (di->lease_session)
@@ -1259,14 +1258,14 @@ const struct inode_operations ceph_dir_iops = {
 
 const struct dentry_operations ceph_dentry_ops = {
 	.d_revalidate = ceph_d_revalidate,
-	.d_release = ceph_dentry_release,
+	.d_release = ceph_d_release,
 };
 
 const struct dentry_operations ceph_snapdir_dentry_ops = {
 	.d_revalidate = ceph_snapdir_d_revalidate,
-	.d_release = ceph_dentry_release,
+	.d_release = ceph_d_release,
 };
 
 const struct dentry_operations ceph_snap_dentry_ops = {
-	.d_release = ceph_dentry_release,
+	.d_release = ceph_d_release,
 };
-- 
cgit v0.10.2


From 55b00bae111030bd0dfcc898a920e54725aed1bf Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 21 Mar 2011 15:06:50 -0700
Subject: rbd: update email address in Documentation

Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
index 90a87e2..fa72ccb 100644
--- a/Documentation/ABI/testing/sysfs-bus-rbd
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -1,6 +1,6 @@
 What:		/sys/bus/rbd/
 Date:		November 2010
-Contact:	Yehuda Sadeh <yehuda@hq.newdream.net>,
+Contact:	Yehuda Sadeh <yehuda@newdream.net>,
 		Sage Weil <sage@newdream.net>
 Description:
 
-- 
cgit v0.10.2


From 97491ba3f64c2137101efdfe7593305d692d7d63 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Sun, 13 Mar 2011 15:53:29 +0200
Subject: i2c-pxa2xx: Don't clear isr bits too early

isr is passed later into i2c_pxa_irq_txempty and
i2c_pxa_irq_rxfull and they may use some other bits
than irq sources.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index b914184..f59224a 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -966,8 +966,7 @@ static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id)
 	struct pxa_i2c *i2c = dev_id;
 	u32 isr = readl(_ISR(i2c));
 
-	isr &= VALID_INT_SOURCE;
-	if (!isr)
+	if (!(isr & VALID_INT_SOURCE))
 		return IRQ_NONE;
 
 	if (i2c_debug > 2 && 0) {
@@ -984,7 +983,7 @@ static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id)
 	/*
 	 * Always clear all pending IRQs.
 	 */
-	writel(isr, _ISR(i2c));
+	writel(isr & VALID_INT_SOURCE, _ISR(i2c));
 
 	if (isr & ISR_SAD)
 		i2c_pxa_slave_start(i2c, isr);
-- 
cgit v0.10.2


From 9edb240696bacd53f4331119d2dfd13d4cd4852f Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 17 Mar 2011 20:04:57 +0200
Subject: ARM: H1940/RX1950: Change default LED triggers

Change LED triggers to mimic WinMobile behavior:
red blinking when battery is charging,
orange solid when battery is charged.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>

diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 882b32a..2a2fa06 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -418,13 +418,14 @@ EXPORT_SYMBOL(h1940_led_blink_set);
 static struct gpio_led h1940_leds_desc[] = {
 	{
 		.name			= "Green",
-		.default_trigger	= "main-battery-charging-or-full",
+		.default_trigger	= "main-battery-full",
 		.gpio			= H1940_LATCH_LED_GREEN,
 		.retain_state_suspended	= 1,
 	},
 	{
 		.name			= "Red",
-		.default_trigger	= "main-battery-full",
+		.default_trigger
+			= "main-battery-charging-blink-full-solid",
 		.gpio			= H1940_LATCH_LED_RED,
 		.retain_state_suspended	= 1,
 	},
diff --git a/arch/arm/mach-s3c2440/mach-rx1950.c b/arch/arm/mach-s3c2440/mach-rx1950.c
index 22f18d9..27ea950 100644
--- a/arch/arm/mach-s3c2440/mach-rx1950.c
+++ b/arch/arm/mach-s3c2440/mach-rx1950.c
@@ -311,22 +311,23 @@ static int rx1950_led_blink_set(unsigned gpio, int state,
 
 static struct gpio_led rx1950_leds_desc[] = {
 	{
-		.name				= "Green",
-		.default_trigger	= "main-battery-charging-or-full",
-		.gpio				= S3C2410_GPA(6),
-		.retain_state_suspended		= 1,
+		.name			= "Green",
+		.default_trigger	= "main-battery-full",
+		.gpio			= S3C2410_GPA(6),
+		.retain_state_suspended	= 1,
 	},
 	{
-		.name				= "Red",
-		.default_trigger	= "main-battery-full",
-		.gpio				= S3C2410_GPA(7),
-		.retain_state_suspended		= 1,
+		.name			= "Red",
+		.default_trigger
+			= "main-battery-charging-blink-full-solid",
+		.gpio			= S3C2410_GPA(7),
+		.retain_state_suspended	= 1,
 	},
 	{
-		.name				= "Blue",
+		.name			= "Blue",
 		.default_trigger	= "rx1950-acx-mem",
-		.gpio				= S3C2410_GPA(11),
-		.retain_state_suspended		= 1,
+		.gpio			= S3C2410_GPA(11),
+		.retain_state_suspended	= 1,
 	},
 };
 
-- 
cgit v0.10.2


From a454f0ccefbfdbfc0e1aa8a5f8010af5e48b8845 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 21 Mar 2011 18:08:28 -0700
Subject: xfrm: Fix initialize repl field of struct xfrm_state

Commit 'xfrm: Move IPsec replay detection functions to a separate file'
  (9fdc4883d92d20842c5acea77a4a21bb1574b495)
introduce repl field to struct xfrm_state, and only initialize it
under SA's netlink create path, the other path, such as pf_key,
ipcomp/ipcomp6 etc, the repl field remaining uninitialize. So if
the SA is created by pf_key, any input packet with SA's encryption
algorithm will cause panic.

    int xfrm_input()
    {
        ...
        x->repl->advance(x, seq);
        ...
    }

This patch fixed it by introduce new function __xfrm_init_state().

Pid: 0, comm: swapper Not tainted 2.6.38-next+ #14 Bochs Bochs
EIP: 0060:[<c078e5d5>] EFLAGS: 00010206 CPU: 0
EIP is at xfrm_input+0x31c/0x4cc
EAX: dd839c00 EBX: 00000084 ECX: 00000000 EDX: 01000000
ESI: dd839c00 EDI: de3a0780 EBP: dec1de88 ESP: dec1de64
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process swapper (pid: 0, ti=dec1c000 task=c09c0f20 task.ti=c0992000)
Stack:
 00000000 00000000 00000002 c0ba27c0 00100000 01000000 de3a0798 c0ba27c0
 00000033 dec1de98 c0786848 00000000 de3a0780 dec1dea4 c0786868 00000000
 dec1debc c074ee56 e1da6b8c de3a0780 c074ed44 de3a07a8 dec1decc c074ef32
Call Trace:
 [<c0786848>] xfrm4_rcv_encap+0x22/0x27
 [<c0786868>] xfrm4_rcv+0x1b/0x1d
 [<c074ee56>] ip_local_deliver_finish+0x112/0x1b1
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074ef77>] ip_local_deliver+0x3e/0x44
 [<c074ed44>] ? ip_local_deliver_finish+0x0/0x1b1
 [<c074ec03>] ip_rcv_finish+0x30a/0x332
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c074ef32>] NF_HOOK.clone.1+0x3d/0x44
 [<c074f188>] ip_rcv+0x20b/0x247
 [<c074e8f9>] ? ip_rcv_finish+0x0/0x332
 [<c072797d>] __netif_receive_skb+0x373/0x399
 [<c0727bc1>] netif_receive_skb+0x4b/0x51
 [<e0817e2a>] cp_rx_poll+0x210/0x2c4 [8139cp]
 [<c072818f>] net_rx_action+0x9a/0x17d
 [<c0445b5c>] __do_softirq+0xa1/0x149
 [<c0445abb>] ? __do_softirq+0x0/0x149

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 42a8c32..cffa5dc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1430,6 +1430,7 @@ extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 extern u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
 extern int xfrm_init_replay(struct xfrm_state *x);
 extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
+extern int __xfrm_init_state(struct xfrm_state *x, bool init_replay);
 extern int xfrm_init_state(struct xfrm_state *x);
 extern int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d575f05..f83a3d1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1907,7 +1907,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	return res;
 }
 
-int xfrm_init_state(struct xfrm_state *x)
+int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
 {
 	struct xfrm_state_afinfo *afinfo;
 	struct xfrm_mode *inner_mode;
@@ -1980,12 +1980,25 @@ int xfrm_init_state(struct xfrm_state *x)
 	if (x->outer_mode == NULL)
 		goto error;
 
+	if (init_replay) {
+		err = xfrm_init_replay(x);
+		if (err)
+			goto error;
+	}
+
 	x->km.state = XFRM_STATE_VALID;
 
 error:
 	return err;
 }
 
+EXPORT_SYMBOL(__xfrm_init_state);
+
+int xfrm_init_state(struct xfrm_state *x)
+{
+	return __xfrm_init_state(x, true);
+}
+
 EXPORT_SYMBOL(xfrm_init_state);
 
 int __net_init xfrm_state_init(struct net *net)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 706385a..fc152d2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -511,7 +511,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	err = xfrm_init_state(x);
+	err = __xfrm_init_state(x, false);
 	if (err)
 		goto error;
 
-- 
cgit v0.10.2


From 8aa525a9340da4227797a06221ca08399006635f Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Mon, 21 Mar 2011 18:10:25 -0700
Subject: l2tp: fix possible oops on l2tp_eth module unload

A struct used in the l2tp_eth driver for registering network namespace
ops was incorrectly marked as __net_initdata, leading to oops when
module unloaded.

BUG: unable to handle kernel paging request at ffffffffa00ec098
IP: [<ffffffff8123dbd8>] ops_exit_list+0x7/0x4b
PGD 142d067 PUD 1431063 PMD 195da8067 PTE 0
Oops: 0000 [#1] SMP
last sysfs file: /sys/module/l2tp_eth/refcnt
Call Trace:
 [<ffffffff8123dc94>] ? unregister_pernet_operations+0x32/0x93
 [<ffffffff8123dd20>] ? unregister_pernet_device+0x2b/0x38
 [<ffffffff81068b6e>] ? sys_delete_module+0x1b8/0x222
 [<ffffffff810c7300>] ? do_munmap+0x254/0x318
 [<ffffffff812c64e5>] ? page_fault+0x25/0x30
 [<ffffffff812c6952>] ? system_call_fastpath+0x16/0x1b

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8d9ce0a..a8193f5 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -283,7 +283,7 @@ static __net_init int l2tp_eth_init_net(struct net *net)
 	return 0;
 }
 
-static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+static struct pernet_operations l2tp_eth_net_ops = {
 	.init = l2tp_eth_init_net,
 	.id   = &l2tp_eth_net_id,
 	.size = sizeof(struct l2tp_eth_net),
-- 
cgit v0.10.2


From 20246a800389fe5442675c59863fec5a4f520c7c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 18:12:54 -0700
Subject: snmp: SNMP_UPD_PO_STATS_BH() always called from softirq

We dont need to test if we run from softirq context, we definitely are.

This saves few instructions in ip_rcv() & ip_rcv_finish()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 762e2ab..27461d6 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -150,7 +150,7 @@ struct linux_xfrm_mib {
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
 		__typeof__(*mib[0]) *ptr = \
-			__this_cpu_ptr((mib)[!in_softirq()]); \
+			__this_cpu_ptr((mib)[0]); \
 		ptr->mibs[basefield##PKTS]++; \
 		ptr->mibs[basefield##OCTETS] += addend;\
 	} while (0)
@@ -202,7 +202,7 @@ struct linux_xfrm_mib {
 #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
 	do {								\
 		__typeof__(*mib[0]) *ptr;				\
-		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		ptr = __this_cpu_ptr((mib)[0]);				\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[basefield##PKTS]++;				\
 		ptr->mibs[basefield##OCTETS] += addend;			\
-- 
cgit v0.10.2


From 674f2115995b7b588cbf3540c9f9b2448a8c7ea8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 18:16:39 -0700
Subject: ipx: fix ipx_release()

Commit b0d0d915d1d1a0 (remove the BKL) added a regression, because
sock_put() can free memory while we are going to use it later.

Fix is to delay sock_put() _after_ release_sock().

Reported-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 2731b51..9680226 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -148,7 +148,6 @@ static void ipx_destroy_socket(struct sock *sk)
 	ipx_remove_socket(sk);
 	skb_queue_purge(&sk->sk_receive_queue);
 	sk_refcnt_debug_dec(sk);
-	sock_put(sk);
 }
 
 /*
@@ -1404,6 +1403,7 @@ static int ipx_release(struct socket *sock)
 	sk_refcnt_debug_release(sk);
 	ipx_destroy_socket(sk);
 	release_sock(sk);
+	sock_put(sk);
 out:
 	return 0;
 }
-- 
cgit v0.10.2


From b20e7bbfc7a15a4182730f0936433145992b4b06 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Mar 2011 18:18:00 -0700
Subject: net/appletalk: fix atalk_release use after free

The BKL removal in appletalk introduced a use-after-free problem,
where atalk_destroy_socket frees a sock, but we still release
the socket lock on it.

An easy fix is to take an extra reference on the sock and sock_put
it when returning from atalk_release.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3d4f4b0..206e771 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1051,6 +1051,7 @@ static int atalk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
+	sock_hold(sk);
 	lock_sock(sk);
 	if (sk) {
 		sock_orphan(sk);
@@ -1058,6 +1059,8 @@ static int atalk_release(struct socket *sock)
 		atalk_destroy_socket(sk);
 	}
 	release_sock(sk);
+	sock_put(sk);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 4f2d56c45fec7c15169599cab05e9f6df18769d0 Mon Sep 17 00:00:00 2001
From: Jan Altenberg <jan@linutronix.de>
Date: Mon, 21 Mar 2011 18:19:26 -0700
Subject: can: c_can: Do basic c_can configuration _before_ enabling the
 interrupts

I ran into some trouble while testing the SocketCAN driver for the BOSCH
C_CAN controller. The interface is not correctly initialized, if I put
some CAN traffic on the line, _while_ the interface is being started
(which means: the interface doesn't come up correcty, if there's some RX
traffic while doing 'ifconfig can0 up').

The current implementation enables the controller interrupts _before_
doing the basic c_can configuration. I think, this should be done the
other way round.

The patch below fixes things for me.

Signed-off-by: Jan Altenberg <jan@linutronix.de>
Acked-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 1405078..110eda0 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -633,9 +633,6 @@ static void c_can_start(struct net_device *dev)
 {
 	struct c_can_priv *priv = netdev_priv(dev);
 
-	/* enable status change, error and module interrupts */
-	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
-
 	/* basic c_can configuration */
 	c_can_chip_config(dev);
 
@@ -643,6 +640,9 @@ static void c_can_start(struct net_device *dev)
 
 	/* reset tx helper pointers */
 	priv->tx_next = priv->tx_echo = 0;
+
+	/* enable status change, error and module interrupts */
+	c_can_enable_all_interrupts(priv, ENABLE_ALL_INTERRUPTS);
 }
 
 static void c_can_stop(struct net_device *dev)
-- 
cgit v0.10.2


From ac0a121d7906b049dfee3649f886c969fbb3c1b7 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 21 Mar 2011 18:20:26 -0700
Subject: net: fix incorrect spelling in drop monitor protocol

It was pointed out to me recently that my spelling could be better :)

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 36e603c..706502f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -350,7 +350,7 @@ static int __init init_net_drop_monitor(void)
 	struct per_cpu_dm_data *data;
 	int cpu, rc;
 
-	printk(KERN_INFO "Initalizing network drop monitor service\n");
+	printk(KERN_INFO "Initializing network drop monitor service\n");
 
 	if (sizeof(void *) > 8) {
 		printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
-- 
cgit v0.10.2


From d5cd92448fded12c91f7574e49747c5f7d975a8d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:22:22 -0700
Subject: macvlan: Fix use after free of struct macvlan_port.

When the macvlan driver was extended to call unregisgter_netdevice_queue
in 23289a37e2b127dfc4de1313fba15bb4c9f0cd5b, a use after free of struct
macvlan_port was introduced.  The code in dellink relied on unregister_netdevice
actually unregistering the net device so it would be safe to free macvlan_port.

Since unregister_netdevice_queue can just queue up the unregister instead of
performing the unregiser immediately we free the macvlan_port too soon and
then the code in macvlan_stop removes the macaddress for the set of macaddress
to listen for and uses memory that has already been freed.

To fix this add a reference count to track when it is safe to free the macvlan_port
and move the call of macvlan_port_destroy into macvlan_uninit which is guaranteed
to be called after the final macvlan_port_close.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 5b37d3c..78e34e9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,8 +39,11 @@ struct macvlan_port {
 	struct list_head	vlans;
 	struct rcu_head		rcu;
 	bool 			passthru;
+	int			count;
 };
 
+static void macvlan_port_destroy(struct net_device *dev);
+
 #define macvlan_port_get_rcu(dev) \
 	((struct macvlan_port *) rcu_dereference(dev->rx_handler_data))
 #define macvlan_port_get(dev) ((struct macvlan_port *) dev->rx_handler_data)
@@ -457,8 +460,13 @@ static int macvlan_init(struct net_device *dev)
 static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 
 	free_percpu(vlan->pcpu_stats);
+
+	port->count -= 1;
+	if (!port->count)
+		macvlan_port_destroy(port->dev);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -691,12 +699,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
 
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-		if (!list_empty(&port->vlans))
+		if (port->count)
 			return -EINVAL;
 		port->passthru = true;
 		memcpy(dev->dev_addr, lowerdev->dev_addr, ETH_ALEN);
 	}
 
+	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
@@ -707,7 +716,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	return 0;
 
 destroy_port:
-	if (list_empty(&port->vlans))
+	port->count -= 1;
+	if (!port->count)
 		macvlan_port_destroy(lowerdev);
 
 	return err;
@@ -725,13 +735,9 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
-	struct macvlan_port *port = vlan->port;
 
 	list_del(&vlan->list);
 	unregister_netdevice_queue(dev, head);
-
-	if (list_empty(&port->vlans))
-		macvlan_port_destroy(port->dev);
 }
 EXPORT_SYMBOL_GPL(macvlan_dellink);
 
-- 
cgit v0.10.2


From 9d2a8fa96a44ba242de3a6f56acaef7a40a97b97 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:23:34 -0700
Subject: net ipv6: Fix duplicate /proc/sys/net/ipv6/neigh directory entries.

When I was fixing issues with unregisgtering tables under /proc/sys/net/ipv6/neigh
by adding a mount point it appears I missed a critical ordering issue, in the
ipv6 initialization.  I had not realized that ipv6_sysctl_register is called
at the very end of the ipv6 initialization and in particular after we call
neigh_sysctl_register from ndisc_init.

"neigh" needs to be initialized in ipv6_static_sysctl_register which is
the first ipv6 table to initialized, and definitely before ndisc_init.
This removes the weirdness of duplicate tables while still providing a
"neigh" mount point which prevents races in sysctl unregistering.

This was initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=31232
Reported-by: sunkan@zappa.cx
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7cb65ef..6dcf5e7 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,16 @@
 
 static struct ctl_table empty[1];
 
+static ctl_table ipv6_static_skeleton[] = {
+	{
+		.procname	= "neigh",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= empty,
+	},
+	{ }
+};
+
 static ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "route",
@@ -37,12 +47,6 @@ static ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "neigh",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= empty,
-	},
 	{ }
 };
 
@@ -160,7 +164,7 @@ static struct ctl_table_header *ip6_base;
 
 int ipv6_static_sysctl_register(void)
 {
-	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, empty);
+	ip6_base = register_sysctl_paths(net_ipv6_ctl_path, ipv6_static_skeleton);
 	if (ip6_base == NULL)
 		return -ENOMEM;
 	return 0;
-- 
cgit v0.10.2


From 675071a2ef3f4a6d25ee002a7437d50431168344 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Mon, 21 Mar 2011 18:24:53 -0700
Subject: veth: Fix the byte counters

Commit 44540960 "veth: move loopback logic to common location" introduced
a bug in the packet counters.  I don't understand why that happened as it
is not explained in the comments and the mut check in dev_forward_skb
retains the assumption that skb->len is the total length of the packet.

I just measured this emperically by setting up a veth pair between two
noop network namespaces setting and attempting a telnet connection between
the two.  I saw three packets in each direction and the byte counters were
exactly 14*3 = 42 bytes high in each direction.  I got the actual
packet lengths with tcpdump.

So remove the extra ETH_HLEN from the veth byte count totals.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 105d7f0..2de9b90 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -171,7 +171,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->ip_summed == CHECKSUM_NONE)
 		skb->ip_summed = rcv_priv->ip_summed;
 
-	length = skb->len + ETH_HLEN;
+	length = skb->len;
 	if (dev_forward_skb(rcv, skb) != NET_RX_SUCCESS)
 		goto rx_drop;
 
-- 
cgit v0.10.2


From f40f94fc6c3b5a5542d5ed976e9ff69a3b463802 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Mar 2011 10:15:40 +0000
Subject: ipvs: fix a typo in __ip_vs_control_init()

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Simon Horman <horms@verge.net.au>
Cc: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b799cea..33733c8 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3605,7 +3605,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 
 	/* procfs stats */
 	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (ipvs->tot_stats.cpustats) {
+	if (!ipvs->tot_stats.cpustats) {
 		pr_err("%s(): alloc_percpu.\n", __func__);
 		return -ENOMEM;
 	}
-- 
cgit v0.10.2


From 736561a01f11114146b1b7f82d486fa9c95828ef Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Mon, 21 Mar 2011 15:18:01 +0000
Subject: IPVS: Use global mutex in ip_vs_app.c

As part of the work to make IPVS network namespace aware
__ip_vs_app_mutex was replaced by a per-namespace lock,
ipvs->app_mutex. ipvs->app_key is also supplied for debugging purposes.

Unfortunately this implementation results in ipvs->app_key residing
in non-static storage which at the very least causes a lockdep warning.

This patch takes the rather heavy-handed approach of reinstating
__ip_vs_app_mutex which will cover access to the ipvs->list_head
of all network namespaces.

[   12.610000] IPVS: Creating netns size=2456 id=0
[   12.630000] IPVS: Registered protocols (TCP, UDP, SCTP, AH, ESP)
[   12.640000] BUG: key ffff880003bbf1a0 not in .data!
[   12.640000] ------------[ cut here ]------------
[   12.640000] WARNING: at kernel/lockdep.c:2701 lockdep_init_map+0x37b/0x570()
[   12.640000] Hardware name: Bochs
[   12.640000] Pid: 1, comm: swapper Tainted: G        W 2.6.38-kexec-06330-g69b7efe-dirty #122
[   12.650000] Call Trace:
[   12.650000]  [<ffffffff8102e685>] warn_slowpath_common+0x75/0xb0
[   12.650000]  [<ffffffff8102e6d5>] warn_slowpath_null+0x15/0x20
[   12.650000]  [<ffffffff8105967b>] lockdep_init_map+0x37b/0x570
[   12.650000]  [<ffffffff8105829d>] ? trace_hardirqs_on+0xd/0x10
[   12.650000]  [<ffffffff81055ad8>] debug_mutex_init+0x38/0x50
[   12.650000]  [<ffffffff8104bc4c>] __mutex_init+0x5c/0x70
[   12.650000]  [<ffffffff81685ee7>] __ip_vs_app_init+0x64/0x86
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1c33>] T.620+0x43/0x170
[   12.660000]  [<ffffffff811b1e9a>] ? register_pernet_subsys+0x1a/0x40
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.660000]  [<ffffffff811b1db7>] register_pernet_operations+0x57/0xb0
[   12.660000]  [<ffffffff81685a3b>] ? ip_vs_init+0x0/0xff
[   12.670000]  [<ffffffff811b1ea9>] register_pernet_subsys+0x29/0x40
[   12.670000]  [<ffffffff81685f19>] ip_vs_app_init+0x10/0x12
[   12.670000]  [<ffffffff81685a87>] ip_vs_init+0x4c/0xff
[   12.670000]  [<ffffffff8166562c>] do_one_initcall+0x7a/0x12e
[   12.670000]  [<ffffffff8166583e>] kernel_init+0x13e/0x1c2
[   12.670000]  [<ffffffff8128c134>] kernel_thread_helper+0x4/0x10
[   12.670000]  [<ffffffff8128ad40>] ? restore_args+0x0/0x30
[   12.680000]  [<ffffffff81665700>] ? kernel_init+0x0/0x1c2
[   12.680000]  [<ffffffff8128c130>] ? kernel_thread_helper+0x0/0x1global0

Signed-off-by: Simon Horman <horms@verge.net.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Julian Anastasov <ja@ssi.bg>
Cc: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 272f593..30b49ed 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -801,8 +801,6 @@ struct netns_ipvs {
 	struct list_head	rs_table[IP_VS_RTAB_SIZE];
 	/* ip_vs_app */
 	struct list_head	app_list;
-	struct mutex		app_mutex;
-	struct lock_class_key	app_key;	/* mutex debuging */
 
 	/* ip_vs_proto */
 	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 5c48ffb..2dc6de1 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,6 +43,8 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
+static DEFINE_MUTEX(__ip_vs_app_mutex);
+
 /*
  *	Get an ip_vs_app object
  */
@@ -167,14 +169,13 @@ int
 register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 		       __u16 port)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	int result;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	result = ip_vs_app_inc_new(net, app, proto, port);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return result;
 }
@@ -189,11 +190,11 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 	/* increase the module use count */
 	ip_vs_use_count_inc();
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_add(&app->a_list, &ipvs->app_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	return 0;
 }
@@ -205,10 +206,9 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *inc, *nxt;
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
 		ip_vs_app_inc_release(net, inc);
@@ -216,7 +216,7 @@ void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 
 	list_del(&app->a_list);
 
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
@@ -501,7 +501,7 @@ static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 	struct net *net = seq_file_net(seq);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	mutex_lock(&ipvs->app_mutex);
+	mutex_lock(&__ip_vs_app_mutex);
 
 	return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -535,9 +535,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-	struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
-
-	mutex_unlock(&ipvs->app_mutex);
+	mutex_unlock(&__ip_vs_app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -583,7 +581,6 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	INIT_LIST_HEAD(&ipvs->app_list);
-	__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
 	proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
 	return 0;
 }
-- 
cgit v0.10.2


From bff5fbf50bd498c217994bd2d41a53ac3141185a Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:21:38 +0800
Subject: ALSA: hda - VIA: Fix stereo mixer recording no sound issue

Modify function via_mux_enum_put() to fix stereo mixer recording
no sound issue.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 63b0054..5bc9bd9 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1101,6 +1101,7 @@ static int via_mux_enum_put(struct snd_kcontrol *kcontrol,
 	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
 	struct via_spec *spec = codec->spec;
 	unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+	int ret;
 
 	if (!spec->mux_nids[adc_idx])
 		return -EINVAL;
@@ -1109,12 +1110,14 @@ static int via_mux_enum_put(struct snd_kcontrol *kcontrol,
 			       AC_VERB_GET_POWER_STATE, 0x00) != AC_PWRST_D0)
 		snd_hda_codec_write(codec, spec->mux_nids[adc_idx], 0,
 				    AC_VERB_SET_POWER_STATE, AC_PWRST_D0);
-	/* update jack power state */
-	set_jack_power_state(codec);
 
-	return snd_hda_input_mux_put(codec, spec->input_mux, ucontrol,
+	ret = snd_hda_input_mux_put(codec, spec->input_mux, ucontrol,
 				     spec->mux_nids[adc_idx],
 				     &spec->cur_mux[adc_idx]);
+	/* update jack power state */
+	set_jack_power_state(codec);
+
+	return ret;
 }
 
 static int via_independent_hp_info(struct snd_kcontrol *kcontrol,
-- 
cgit v0.10.2


From ce0e5a9e81fbb153ee15ca60246c6722f07fc546 Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:22:37 +0800
Subject: ALSA: hda - VIA: Fix independent headphone no sound issue

Modify via_independent_hp_put() function to support VT1718S and VT1812
codecs, and fix independent headphone no sound issue.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 5bc9bd9..2f605e5 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1191,8 +1191,16 @@ static int via_independent_hp_put(struct snd_kcontrol *kcontrol,
 	/* Get Independent Mode index of headphone pin widget */
 	spec->hp_independent_mode = spec->hp_independent_mode_index == pinsel
 		? 1 : 0;
-	snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, pinsel);
+	if (spec->codec_type == VT1718S)
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_CONNECT_SEL, pinsel ? 2 : 0);
+	else
+		snd_hda_codec_write(codec, nid, 0,
+				    AC_VERB_SET_CONNECT_SEL, pinsel);
 
+	if (spec->codec_type == VT1812)
+		snd_hda_codec_write(codec, 0x35, 0,
+				    AC_VERB_SET_CONNECT_SEL, pinsel);
 	if (spec->multiout.hp_nid && spec->multiout.hp_nid
 	    != spec->multiout.dac_nids[HDA_FRONT])
 		snd_hda_codec_setup_stream(codec, spec->multiout.hp_nid,
@@ -1211,6 +1219,8 @@ static int via_independent_hp_put(struct snd_kcontrol *kcontrol,
 		activate_ctl(codec, "Headphone Playback Switch",
 			     spec->hp_independent_mode);
 	}
+	/* update jack power state */
+	set_jack_power_state(codec);
 	return 0;
 }
 
-- 
cgit v0.10.2


From ab657e0cacc39d88145871c6a3c844597c02d406 Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:23:23 +0800
Subject: ALSA: hda - VIA: Add missing support for VT1718S in A-A path

Modify mute_aa_path() function to support VT1718S codec.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 2f605e5..23e58f0 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1323,6 +1323,11 @@ static void mute_aa_path(struct hda_codec *codec, int mute)
 		start_idx = 2;
 		end_idx = 4;
 		break;
+	case VT1718S:
+		nid_mixer = 0x21;
+		start_idx = 1;
+		end_idx = 3;
+		break;
 	default:
 		return;
 	}
-- 
cgit v0.10.2


From 169222813eec8403c76394fb7b35ecab98e3c607 Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:24:10 +0800
Subject: ALSA: hda - VIA: Fix invalid A-A path volume adjust issue

Modify vt_auto_create_analog_input_ctls() function to fix invalid a-a path
volume adjust issue for VT1708S, VT1702 and VT1716S codecs.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 23e58f0..299a18b 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -2456,7 +2456,14 @@ static int vt_auto_create_analog_input_ctls(struct hda_codec *codec,
 		else
 			type_idx = 0;
 		label = hda_get_autocfg_input_label(codec, cfg, i);
-		err = via_new_analog_input(spec, label, type_idx, idx, cap_nid);
+		if (spec->codec_type == VT1708S ||
+		    spec->codec_type == VT1702 ||
+		    spec->codec_type == VT1716S)
+			err = via_new_analog_input(spec, label, type_idx,
+						   idx+1, cap_nid);
+		else
+			err = via_new_analog_input(spec, label, type_idx,
+						   idx, cap_nid);
 		if (err < 0)
 			return err;
 		snd_hda_add_imux_item(imux, label, idx, NULL);
-- 
cgit v0.10.2


From 0341ccd7557fecafe6a79c55158670cf629d269e Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:25:03 +0800
Subject: ALSA: hda - VIA: Fix codec type for VT1708BCE at the right timing

Add get_codec_type() in via_new_spec() function to make sure getting
correct codec type before building mixer controls.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 299a18b..269bb36 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -159,6 +159,7 @@ struct via_spec {
 #endif
 };
 
+static enum VIA_HDA_CODEC get_codec_type(struct hda_codec *codec);
 static struct via_spec * via_new_spec(struct hda_codec *codec)
 {
 	struct via_spec *spec;
@@ -169,6 +170,10 @@ static struct via_spec * via_new_spec(struct hda_codec *codec)
 
 	codec->spec = spec;
 	spec->codec = codec;
+	spec->codec_type = get_codec_type(codec);
+	/* VT1708BCE & VT1708S are almost same */
+	if (spec->codec_type == VT1708BCE)
+		spec->codec_type = VT1708S;
 	return spec;
 }
 
@@ -2203,10 +2208,6 @@ static int via_init(struct hda_codec *codec)
 	for (i = 0; i < spec->num_iverbs; i++)
 		snd_hda_sequence_write(codec, spec->init_verbs[i]);
 
-	spec->codec_type = get_codec_type(codec);
-	if (spec->codec_type == VT1708BCE)
-		spec->codec_type = VT1708S; /* VT1708BCE & VT1708S are almost
-					       same */
 	/* Lydia Add for EAPD enable */
 	if (!spec->dig_in_nid) { /* No Digital In connection */
 		if (spec->dig_in_pin) {
-- 
cgit v0.10.2


From 970f630f5adcefb2841338929e209d970001d919 Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:25:56 +0800
Subject: ALSA: hda - VIA: Correct stream names for VT1818S

Correct stream names of analog playback and capture streams
for VT1818S.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 269bb36..7e317f9 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -4173,6 +4173,11 @@ static int patch_vt1708S(struct hda_codec *codec)
 		spec->stream_name_analog = "VT1708BCE Analog";
 		spec->stream_name_digital = "VT1708BCE Digital";
 	}
+	/* correct names for VT1818S */
+	if (codec->vendor_id == 0x11060440) {
+		spec->stream_name_analog = "VT1818S Analog";
+		spec->stream_name_digital = "VT1818S Digital";
+	}
 	return 0;
 }
 
-- 
cgit v0.10.2


From ee3c35c0827de02de414d08b2ddcbb910c2263ab Mon Sep 17 00:00:00 2001
From: Lydia Wang <lydiawang@viatech.com.cn>
Date: Tue, 22 Mar 2011 16:26:36 +0800
Subject: ALSA: hda - VIA: Fix VT1708 can't build up Headphone control issue

Since VT1708 didn't support the control of getting connection number,
building of headphone control will fail in via_hp_build() function.

Signed-off-by: Lydia Wang <lydiawang@viatech.com.cn>
Cc: <stable@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 7e317f9..1371b57 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1266,9 +1266,12 @@ static int via_hp_build(struct hda_codec *codec)
 		break;
 	}
 
-	nums = snd_hda_get_connections(codec, nid, conn, HDA_MAX_CONNECTIONS);
-	if (nums <= 1)
-		return 0;
+	if (spec->codec_type != VT1708) {
+		nums = snd_hda_get_connections(codec, nid,
+					       conn, HDA_MAX_CONNECTIONS);
+		if (nums <= 1)
+			return 0;
+	}
 
 	knew = via_clone_control(spec, &via_hp_mixer[0]);
 	if (knew == NULL)
-- 
cgit v0.10.2


From 09b9cc44c942256026bf7a63fec2155b8f488899 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 22 Mar 2011 00:27:42 -0400
Subject: sd: Fail discard requests when logical block provisioning has been
 disabled

Ensure that we kill discard requests after logical block provisioning
has been disabled in sysfs.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3be5db5..7ff61d7 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -597,6 +597,7 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
 		break;
 
 	default:
+		ret = BLKPREP_KILL;
 		goto out;
 	}
 
-- 
cgit v0.10.2


From 5b9a4f98b2e29fb92a4a54ef12b2e3940f941ed9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 22 Mar 2011 10:35:17 +0100
Subject: dma: let IMX_DMA depend on IMX_HAVE_DMA_V1 instead of an explicit
 list of SoCs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a side effect this makes IMX_DMA selectable on i.MX21 again, because
the symbol ARCH_MX21 doesn't exist (MACH_MX21 would have been more correct).

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d700895..a572600 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -221,7 +221,7 @@ config IMX_SDMA
 
 config IMX_DMA
 	tristate "i.MX DMA support"
-	depends on ARCH_MX1 || ARCH_MX21 || MACH_MX27
+	depends on IMX_HAVE_DMA_V1
 	select DMA_ENGINE
 	help
 	  Support the i.MX DMA engine. This engine is integrated into
-- 
cgit v0.10.2


From d42efe6bfb4eed8314c8ce3547f21954a4140399 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@st.com>
Date: Tue, 22 Mar 2011 17:27:25 +0530
Subject: dmaengine/dmatest: Pass timeout via module params

When we try to test all channels present on our controller together, some
channels of lower priority may be very slow as compared to others. If number of
transfers is unlimited, some channels may timeout and will not finish within 3
seconds. Thus, while doing such regress testing we may need to have higher value
of timeouts. This patch adds support for passing timeout value via module
parameters. Default value is 3 msec, a negative value means max timeout
possible.

Signed-off-by: Viresh Kumar <viresh.kumar@st.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 7e1b0aa..e0888cb 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -54,6 +54,11 @@ module_param(pq_sources, uint, S_IRUGO);
 MODULE_PARM_DESC(pq_sources,
 		"Number of p+q source buffers (default: 3)");
 
+static int timeout = 3000;
+module_param(timeout, uint, S_IRUGO);
+MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), \
+		Pass -1 for infinite timeout");
+
 /*
  * Initialization patterns. All bytes in the source buffer has bit 7
  * set, all bytes in the destination buffer has bit 7 cleared.
@@ -299,7 +304,7 @@ static int dmatest_func(void *data)
 		dma_addr_t dma_srcs[src_cnt];
 		dma_addr_t dma_dsts[dst_cnt];
 		struct completion cmp;
-		unsigned long tmo = msecs_to_jiffies(3000);
+		unsigned long tmo = msecs_to_jiffies(timeout);
 		u8 align = 0;
 
 		total_tests++;
-- 
cgit v0.10.2


From 333802e90d3f0366c4a1cb767e2783d2e1df73a8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Mar 2011 12:02:33 +0000
Subject: ASoC: Support !REGULATOR build for sgtl5000

The regulator is optional depending on board design.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>

diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index 1f7217f..ff29380 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -772,6 +772,7 @@ static int sgtl5000_pcm_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
+#ifdef CONFIG_REGULATOR
 static int ldo_regulator_is_enabled(struct regulator_dev *dev)
 {
 	struct ldo_regulator *ldo = rdev_get_drvdata(dev);
@@ -901,6 +902,19 @@ static int ldo_regulator_remove(struct snd_soc_codec *codec)
 
 	return 0;
 }
+#else
+static int ldo_regulator_register(struct snd_soc_codec *codec,
+				struct regulator_init_data *init_data,
+				int voltage)
+{
+	return -EINVAL;
+}
+
+static int ldo_regulator_remove(struct snd_soc_codec *codec)
+{
+	return 0;
+}
+#endif
 
 /*
  * set dac bias
-- 
cgit v0.10.2


From a40c4f10e3fb96030358e49abd010c1f08446fa3 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Mon, 21 Mar 2011 15:07:16 -0700
Subject: libceph: add lingering request and watch/notify event framework

Lingering requests are requests that are sent to the OSD normally but
tracked also after we get a successful request.  This keeps the OSD
connection open and resends the original request if the object moves to
another OSD.  The OSD can then send notification messages back to us
if another client initiates a notify.

This framework will be used by RBD so that the client gets notification
when a snapshot is created by another node or tool.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index e791b8e..f88eacb 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -32,6 +32,7 @@ struct ceph_osd {
 	struct rb_node o_node;
 	struct ceph_connection o_con;
 	struct list_head o_requests;
+	struct list_head o_linger_requests;
 	struct list_head o_osd_lru;
 	struct ceph_authorizer *o_authorizer;
 	void *o_authorizer_buf, *o_authorizer_reply_buf;
@@ -47,6 +48,8 @@ struct ceph_osd_request {
 	struct rb_node  r_node;
 	struct list_head r_req_lru_item;
 	struct list_head r_osd_item;
+	struct list_head r_linger_item;
+	struct list_head r_linger_osd;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
 	int              r_pg_osds[CEPH_PG_MAX_SIZE];
@@ -59,6 +62,7 @@ struct ceph_osd_request {
 	int               r_flags;     /* any additional flags for the osd */
 	u32               r_sent;      /* >0 if r_request is sending/sent */
 	int               r_got_reply;
+	int		  r_linger;
 
 	struct ceph_osd_client *r_osdc;
 	struct kref       r_kref;
@@ -89,6 +93,26 @@ struct ceph_osd_request {
 	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
 };
 
+struct ceph_osd_event {
+	u64 cookie;
+	int one_shot;
+	struct ceph_osd_client *osdc;
+	void (*cb)(u64, u64, u8, void *);
+	void *data;
+	struct rb_node node;
+	struct list_head osd_node;
+	struct kref kref;
+	struct completion completion;
+};
+
+struct ceph_osd_event_work {
+	struct work_struct work;
+	struct ceph_osd_event *event;
+        u64 ver;
+        u64 notify_id;
+        u8 opcode;
+};
+
 struct ceph_osd_client {
 	struct ceph_client     *client;
 
@@ -106,6 +130,7 @@ struct ceph_osd_client {
 	struct list_head       req_lru;	      /* in-flight lru */
 	struct list_head       req_unsent;    /* unsent/need-resend queue */
 	struct list_head       req_notarget;  /* map to no osd */
+	struct list_head       req_linger;    /* lingering requests */
 	int                    num_requests;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;
@@ -117,6 +142,12 @@ struct ceph_osd_client {
 
 	struct ceph_msgpool	msgpool_op;
 	struct ceph_msgpool	msgpool_op_reply;
+
+	spinlock_t		event_lock;
+	struct rb_root		event_tree;
+	u64			event_count;
+
+	struct workqueue_struct	*notify_wq;
 };
 
 struct ceph_osd_req_op {
@@ -151,6 +182,13 @@ struct ceph_osd_req_op {
 	        struct {
 		        u64 snapid;
 	        } snap;
+		struct {
+			u64 cookie;
+			u64 ver;
+			__u8 flag;
+			u32 prot_ver;
+			u32 timeout;
+		} watch;
 	};
 	u32 payload_len;
 };
@@ -199,6 +237,11 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      bool use_mempool, int num_reply,
 				      int page_align);
 
+extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+					 struct ceph_osd_request *req);
+extern void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+						struct ceph_osd_request *req);
+
 static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
 {
 	kref_get(&req->r_kref);
@@ -234,5 +277,14 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct page **pages, int nr_pages,
 				int flags, int do_sync, bool nofail);
 
+/* watch/notify events */
+extern int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+				  void (*event_cb)(u64, u64, u8, void *),
+				  int one_shot, void *data,
+				  struct ceph_osd_event **pevent);
+extern void ceph_osdc_cancel_event(struct ceph_osd_event *event);
+extern int ceph_osdc_wait_event(struct ceph_osd_event *event,
+				unsigned long timeout);
+extern void ceph_osdc_put_event(struct ceph_osd_event *event);
 #endif
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index f3e4a13..95f96ab 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -62,6 +62,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_OSD_MAP: return "osd_map";
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
+	case CEPH_MSG_WATCH_NOTIFY: return "watch_notify";
 	default: return "unknown";
 	}
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b85ed5a..02212ed 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -25,6 +25,12 @@ static const struct ceph_connection_operations osd_con_ops;
 
 static void send_queued(struct ceph_osd_client *osdc);
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd);
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req);
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req);
+static int __send_request(struct ceph_osd_client *osdc,
+			  struct ceph_osd_request *req);
 
 static int op_needs_trail(int op)
 {
@@ -33,6 +39,7 @@ static int op_needs_trail(int op)
 	case CEPH_OSD_OP_SETXATTR:
 	case CEPH_OSD_OP_CMPXATTR:
 	case CEPH_OSD_OP_CALL:
+	case CEPH_OSD_OP_NOTIFY:
 		return 1;
 	default:
 		return 0;
@@ -208,6 +215,8 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	init_completion(&req->r_completion);
 	init_completion(&req->r_safe_completion);
 	INIT_LIST_HEAD(&req->r_unsafe_item);
+	INIT_LIST_HEAD(&req->r_linger_item);
+	INIT_LIST_HEAD(&req->r_linger_osd);
 	req->r_flags = flags;
 
 	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -314,6 +323,24 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 		break;
 	case CEPH_OSD_OP_STARTSYNC:
 		break;
+	case CEPH_OSD_OP_NOTIFY:
+		{
+			__le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
+			__le32 timeout = cpu_to_le32(src->watch.timeout);
+
+			BUG_ON(!req->r_trail);
+
+			ceph_pagelist_append(req->r_trail,
+						&prot_ver, sizeof(prot_ver));
+			ceph_pagelist_append(req->r_trail,
+						&timeout, sizeof(timeout));
+		}
+	case CEPH_OSD_OP_NOTIFY_ACK:
+	case CEPH_OSD_OP_WATCH:
+		dst->watch.cookie = cpu_to_le64(src->watch.cookie);
+		dst->watch.ver = cpu_to_le64(src->watch.ver);
+		dst->watch.flag = src->watch.flag;
+		break;
 	default:
 		pr_err("unrecognized osd opcode %d\n", dst->op);
 		WARN_ON(1);
@@ -534,7 +561,7 @@ __lookup_request_ge(struct ceph_osd_client *osdc,
 static void __kick_osd_requests(struct ceph_osd_client *osdc,
 				struct ceph_osd *osd)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req, *nreq;
 	int err;
 
 	dout("__kick_osd_requests osd%d\n", osd->o_osd);
@@ -546,7 +573,17 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 		list_move(&req->r_req_lru_item, &osdc->req_unsent);
 		dout("requeued %p tid %llu osd%d\n", req, req->r_tid,
 		     osd->o_osd);
-		req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		if (!req->r_linger)
+			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+	}
+
+	list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
+				 r_linger_osd) {
+		__unregister_linger_request(osdc, req);
+		__register_request(osdc, req);
+		list_move(&req->r_req_lru_item, &osdc->req_unsent);
+		dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     osd->o_osd);
 	}
 }
 
@@ -590,6 +627,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
 	atomic_set(&osd->o_ref, 1);
 	osd->o_osdc = osdc;
 	INIT_LIST_HEAD(&osd->o_requests);
+	INIT_LIST_HEAD(&osd->o_linger_requests);
 	INIT_LIST_HEAD(&osd->o_osd_lru);
 	osd->o_incarnation = 1;
 
@@ -679,7 +717,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 	int ret = 0;
 
 	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
-	if (list_empty(&osd->o_requests)) {
+	if (list_empty(&osd->o_requests) &&
+	    list_empty(&osd->o_linger_requests)) {
 		__remove_osd(osdc, osd);
 	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
 			  &osd->o_con.peer_addr,
@@ -752,10 +791,9 @@ static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
  * Register request, assign tid.  If this is the first request, set up
  * the timeout event.
  */
-static void register_request(struct ceph_osd_client *osdc,
-			     struct ceph_osd_request *req)
+static void __register_request(struct ceph_osd_client *osdc,
+			       struct ceph_osd_request *req)
 {
-	mutex_lock(&osdc->request_mutex);
 	req->r_tid = ++osdc->last_tid;
 	req->r_request->hdr.tid = cpu_to_le64(req->r_tid);
 	INIT_LIST_HEAD(&req->r_req_lru_item);
@@ -769,6 +807,13 @@ static void register_request(struct ceph_osd_client *osdc,
 		dout(" first request, scheduling timeout\n");
 		__schedule_osd_timeout(osdc);
 	}
+}
+
+static void register_request(struct ceph_osd_client *osdc,
+			     struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	__register_request(osdc, req);
 	mutex_unlock(&osdc->request_mutex);
 }
 
@@ -787,9 +832,14 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 		ceph_con_revoke(&req->r_osd->o_con, req->r_request);
 
 		list_del_init(&req->r_osd_item);
-		if (list_empty(&req->r_osd->o_requests))
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
 			__move_osd_to_lru(osdc, req->r_osd);
-		req->r_osd = NULL;
+		}
+		if (list_empty(&req->r_osd_item) &&
+		    list_empty(&req->r_linger_item))
+			req->r_osd = NULL;
 	}
 
 	ceph_osdc_put_request(req);
@@ -812,6 +862,58 @@ static void __cancel_request(struct ceph_osd_request *req)
 	}
 }
 
+static void __register_linger_request(struct ceph_osd_client *osdc,
+				    struct ceph_osd_request *req)
+{
+	dout("__register_linger_request %p\n", req);
+	list_add_tail(&req->r_linger_item, &osdc->req_linger);
+	list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+}
+
+static void __unregister_linger_request(struct ceph_osd_client *osdc,
+					struct ceph_osd_request *req)
+{
+	dout("__unregister_linger_request %p\n", req);
+	if (req->r_osd) {
+		list_del_init(&req->r_linger_item);
+		list_del_init(&req->r_linger_osd);
+
+		if (list_empty(&req->r_osd->o_requests) &&
+		    list_empty(&req->r_osd->o_linger_requests)) {
+			dout("moving osd to %p lru\n", req->r_osd);
+			__move_osd_to_lru(osdc, req->r_osd);
+		}
+		req->r_osd = NULL;
+	}
+}
+
+void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
+					 struct ceph_osd_request *req)
+{
+	mutex_lock(&osdc->request_mutex);
+	if (req->r_linger) {
+		__unregister_linger_request(osdc, req);
+		ceph_osdc_put_request(req);
+	}
+	mutex_unlock(&osdc->request_mutex);
+}
+EXPORT_SYMBOL(ceph_osdc_unregister_linger_request);
+
+void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
+				  struct ceph_osd_request *req)
+{
+	if (!req->r_linger) {
+		dout("set_request_linger %p\n", req);
+		req->r_linger = 1;
+		/*
+		 * caller is now responsible for calling
+		 * unregister_linger_request
+		 */
+		ceph_osdc_get_request(req);
+	}
+}
+EXPORT_SYMBOL(ceph_osdc_set_request_linger);
+
 /*
  * Pick an osd (the first 'up' osd in the pg), allocate the osd struct
  * (as needed), and set the request r_osd appropriately.  If there is
@@ -958,7 +1060,6 @@ static void handle_timeout(struct work_struct *work)
 		osdc->client->options->osd_keepalive_timeout * HZ;
 	unsigned long last_stamp = 0;
 	struct list_head slow_osds;
-
 	dout("timeout\n");
 	down_read(&osdc->map_sem);
 
@@ -1060,7 +1161,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	    numops * sizeof(struct ceph_osd_op))
 		goto bad;
 	dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
-
 	/* lookup */
 	mutex_lock(&osdc->request_mutex);
 	req = __lookup_request(osdc, tid);
@@ -1104,6 +1204,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 
 	dout("handle_reply tid %llu flags %d\n", tid, flags);
 
+	if (req->r_linger && (flags & CEPH_OSD_FLAG_ONDISK))
+		__register_linger_request(osdc, req);
+
 	/* either this is a read, or we got the safe response */
 	if (result < 0 ||
 	    (flags & CEPH_OSD_FLAG_ONDISK) ||
@@ -1124,6 +1227,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
 	}
 
 done:
+	dout("req=%p req->r_linger=%d\n", req, req->r_linger);
 	ceph_osdc_put_request(req);
 	return;
 
@@ -1159,7 +1263,7 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
  */
 static void kick_requests(struct ceph_osd_client *osdc)
 {
-	struct ceph_osd_request *req;
+	struct ceph_osd_request *req, *nreq;
 	struct rb_node *p;
 	int needmap = 0;
 	int err;
@@ -1177,8 +1281,30 @@ static void kick_requests(struct ceph_osd_client *osdc)
 		} else if (err > 0) {
 			dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
 			     req->r_osd ? req->r_osd->o_osd : -1);
-			req->r_flags |= CEPH_OSD_FLAG_RETRY;
+			if (!req->r_linger)
+				req->r_flags |= CEPH_OSD_FLAG_RETRY;
+		}
+	}
+
+	list_for_each_entry_safe(req, nreq, &osdc->req_linger,
+				 r_linger_item) {
+		dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
+
+		err = __map_request(osdc, req);
+		if (err == 0)
+			continue;  /* no change and no osd was specified */
+		if (err < 0)
+			continue;  /* hrm! */
+		if (req->r_osd == NULL) {
+			dout("tid %llu maps to no valid osd\n", req->r_tid);
+			needmap++;  /* request a newer map */
+			continue;
 		}
+
+		dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid,
+		     req->r_osd ? req->r_osd->o_osd : -1);
+		__unregister_linger_request(osdc, req);
+		__register_request(osdc, req);
 	}
 	mutex_unlock(&osdc->request_mutex);
 
@@ -1302,6 +1428,223 @@ bad:
 }
 
 /*
+ * watch/notify callback event infrastructure
+ *
+ * These callbacks are used both for watch and notify operations.
+ */
+static void __release_event(struct kref *kref)
+{
+	struct ceph_osd_event *event =
+		container_of(kref, struct ceph_osd_event, kref);
+
+	dout("__release_event %p\n", event);
+	kfree(event);
+}
+
+static void get_event(struct ceph_osd_event *event)
+{
+	kref_get(&event->kref);
+}
+
+void ceph_osdc_put_event(struct ceph_osd_event *event)
+{
+	kref_put(&event->kref, __release_event);
+}
+EXPORT_SYMBOL(ceph_osdc_put_event);
+
+static void __insert_event(struct ceph_osd_client *osdc,
+			     struct ceph_osd_event *new)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (new->cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (new->cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&new->node, parent, p);
+	rb_insert_color(&new->node, &osdc->event_tree);
+}
+
+static struct ceph_osd_event *__find_event(struct ceph_osd_client *osdc,
+					        u64 cookie)
+{
+	struct rb_node **p = &osdc->event_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct ceph_osd_event *event = NULL;
+
+	while (*p) {
+		parent = *p;
+		event = rb_entry(parent, struct ceph_osd_event, node);
+		if (cookie < event->cookie)
+			p = &(*p)->rb_left;
+		else if (cookie > event->cookie)
+			p = &(*p)->rb_right;
+		else
+			return event;
+	}
+	return NULL;
+}
+
+static void __remove_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	if (!RB_EMPTY_NODE(&event->node)) {
+		dout("__remove_event removed %p\n", event);
+		rb_erase(&event->node, &osdc->event_tree);
+		ceph_osdc_put_event(event);
+	} else {
+		dout("__remove_event didn't remove %p\n", event);
+	}
+}
+
+int ceph_osdc_create_event(struct ceph_osd_client *osdc,
+			   void (*event_cb)(u64, u64, u8, void *),
+			   int one_shot, void *data,
+			   struct ceph_osd_event **pevent)
+{
+	struct ceph_osd_event *event;
+
+	event = kmalloc(sizeof(*event), GFP_NOIO);
+	if (!event)
+		return -ENOMEM;
+
+	dout("create_event %p\n", event);
+	event->cb = event_cb;
+	event->one_shot = one_shot;
+	event->data = data;
+	event->osdc = osdc;
+	INIT_LIST_HEAD(&event->osd_node);
+	kref_init(&event->kref);   /* one ref for us */
+	kref_get(&event->kref);    /* one ref for the caller */
+	init_completion(&event->completion);
+
+	spin_lock(&osdc->event_lock);
+	event->cookie = ++osdc->event_count;
+	__insert_event(osdc, event);
+	spin_unlock(&osdc->event_lock);
+
+	*pevent = event;
+	return 0;
+}
+EXPORT_SYMBOL(ceph_osdc_create_event);
+
+void ceph_osdc_cancel_event(struct ceph_osd_event *event)
+{
+	struct ceph_osd_client *osdc = event->osdc;
+
+	dout("cancel_event %p\n", event);
+	spin_lock(&osdc->event_lock);
+	__remove_event(event);
+	spin_unlock(&osdc->event_lock);
+	ceph_osdc_put_event(event); /* caller's */
+}
+EXPORT_SYMBOL(ceph_osdc_cancel_event);
+
+
+static void do_event_work(struct work_struct *work)
+{
+	struct ceph_osd_event_work *event_work =
+		container_of(work, struct ceph_osd_event_work, work);
+	struct ceph_osd_event *event = event_work->event;
+	u64 ver = event_work->ver;
+	u64 notify_id = event_work->notify_id;
+	u8 opcode = event_work->opcode;
+
+	dout("do_event_work completing %p\n", event);
+	event->cb(ver, notify_id, opcode, event->data);
+	complete(&event->completion);
+	dout("do_event_work completed %p\n", event);
+	ceph_osdc_put_event(event);
+	kfree(event_work);
+}
+
+
+/*
+ * Process osd watch notifications
+ */
+void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+{
+	void *p, *end;
+	u8 proto_ver;
+	u64 cookie, ver, notify_id;
+	u8 opcode;
+	struct ceph_osd_event *event;
+	struct ceph_osd_event_work *event_work;
+
+	p = msg->front.iov_base;
+	end = p + msg->front.iov_len;
+
+	ceph_decode_8_safe(&p, end, proto_ver, bad);
+	ceph_decode_8_safe(&p, end, opcode, bad);
+	ceph_decode_64_safe(&p, end, cookie, bad);
+	ceph_decode_64_safe(&p, end, ver, bad);
+	ceph_decode_64_safe(&p, end, notify_id, bad);
+
+	spin_lock(&osdc->event_lock);
+	event = __find_event(osdc, cookie);
+	if (event) {
+		get_event(event);
+		if (event->one_shot)
+			__remove_event(event);
+	}
+	spin_unlock(&osdc->event_lock);
+	dout("handle_watch_notify cookie %lld ver %lld event %p\n",
+	     cookie, ver, event);
+	if (event) {
+		event_work = kmalloc(sizeof(*event_work), GFP_NOIO);
+		INIT_WORK(&event_work->work, do_event_work);
+		if (!event_work) {
+			dout("ERROR: could not allocate event_work\n");
+			goto done_err;
+		}
+		event_work->event = event;
+		event_work->ver = ver;
+		event_work->notify_id = notify_id;
+		event_work->opcode = opcode;
+		if (!queue_work(osdc->notify_wq, &event_work->work)) {
+			dout("WARNING: failed to queue notify event work\n");
+			goto done_err;
+		}
+	}
+
+	return;
+
+done_err:
+	complete(&event->completion);
+	ceph_osdc_put_event(event);
+	return;
+
+bad:
+	pr_err("osdc handle_watch_notify corrupt msg\n");
+	return;
+}
+
+int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout)
+{
+	int err;
+
+	dout("wait_event %p\n", event);
+	err = wait_for_completion_interruptible_timeout(&event->completion,
+							timeout * HZ);
+	ceph_osdc_put_event(event);
+	if (err > 0)
+		err = 0;
+	dout("wait_event %p returns %d\n", event, err);
+	return err;
+}
+EXPORT_SYMBOL(ceph_osdc_wait_event);
+
+/*
  * Register request, send initial attempt.
  */
 int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1430,9 +1773,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	INIT_LIST_HEAD(&osdc->req_lru);
 	INIT_LIST_HEAD(&osdc->req_unsent);
 	INIT_LIST_HEAD(&osdc->req_notarget);
+	INIT_LIST_HEAD(&osdc->req_linger);
 	osdc->num_requests = 0;
 	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	spin_lock_init(&osdc->event_lock);
+	osdc->event_tree = RB_ROOT;
+	osdc->event_count = 0;
 
 	schedule_delayed_work(&osdc->osds_timeout_work,
 	   round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
@@ -1452,6 +1799,13 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 				"osd_op_reply");
 	if (err < 0)
 		goto out_msgpool;
+
+	osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
+	if (IS_ERR(osdc->notify_wq)) {
+		err = PTR_ERR(osdc->notify_wq);
+		osdc->notify_wq = NULL;
+		goto out_msgpool;
+	}
 	return 0;
 
 out_msgpool:
@@ -1465,6 +1819,8 @@ EXPORT_SYMBOL(ceph_osdc_init);
 
 void ceph_osdc_stop(struct ceph_osd_client *osdc)
 {
+	flush_workqueue(osdc->notify_wq);
+	destroy_workqueue(osdc->notify_wq);
 	cancel_delayed_work_sync(&osdc->timeout_work);
 	cancel_delayed_work_sync(&osdc->osds_timeout_work);
 	if (osdc->osdmap) {
@@ -1472,6 +1828,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 		osdc->osdmap = NULL;
 	}
 	remove_old_osds(osdc, 1);
+	WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
 	mempool_destroy(osdc->req_mempool);
 	ceph_msgpool_destroy(&osdc->msgpool_op);
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
@@ -1580,6 +1937,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_OSD_OPREPLY:
 		handle_reply(osdc, msg, con);
 		break;
+	case CEPH_MSG_WATCH_NOTIFY:
+		handle_watch_notify(osdc, msg);
+		break;
 
 	default:
 		pr_err("received unknown message type %d %s\n", type,
@@ -1673,6 +2033,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
 
 	switch (type) {
 	case CEPH_MSG_OSD_MAP:
+	case CEPH_MSG_WATCH_NOTIFY:
 		return ceph_msg_new(type, front, GFP_NOFS);
 	case CEPH_MSG_OSD_OPREPLY:
 		return get_reply(con, hdr, skip);
-- 
cgit v0.10.2


From 59c2be1e4d42c0d4949cecdeef3f37070a1fbc13 Mon Sep 17 00:00:00 2001
From: Yehuda Sadeh <yehuda@hq.newdream.net>
Date: Mon, 21 Mar 2011 15:10:11 -0700
Subject: rbd: use watch/notify for changes in rbd header

Send notifications when we change the rbd header (e.g. create a snapshot)
and wait for such notifications.  This allows synchronizing the snapshot
creation between different rbd clients/rools.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e1e38b1..16dc364 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -31,6 +31,7 @@
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/decode.h>
+#include <linux/parser.h>
 
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -54,6 +55,8 @@
 
 #define DEV_NAME_LEN		32
 
+#define RBD_NOTIFY_TIMEOUT_DEFAULT 10
+
 /*
  * block device image metadata (in-memory version)
  */
@@ -71,6 +74,12 @@ struct rbd_image_header {
 
 	char *snap_names;
 	u64 *snap_sizes;
+
+	u64 obj_version;
+};
+
+struct rbd_options {
+	int	notify_timeout;
 };
 
 /*
@@ -78,6 +87,7 @@ struct rbd_image_header {
  */
 struct rbd_client {
 	struct ceph_client	*client;
+	struct rbd_options	*rbd_opts;
 	struct kref		kref;
 	struct list_head	node;
 };
@@ -124,6 +134,9 @@ struct rbd_device {
 	char			pool_name[RBD_MAX_POOL_NAME_LEN];
 	int			poolid;
 
+	struct ceph_osd_event   *watch_event;
+	struct ceph_osd_request *watch_request;
+
 	char                    snap_name[RBD_MAX_SNAP_NAME_LEN];
 	u32 cur_snap;	/* index+1 of current snapshot within snap context
 			   0 - for the head */
@@ -177,6 +190,8 @@ static void rbd_put_dev(struct rbd_device *rbd_dev)
 	put_device(&rbd_dev->dev);
 }
 
+static int __rbd_update_snaps(struct rbd_device *rbd_dev);
+
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -211,7 +226,8 @@ static const struct block_device_operations rbd_bd_ops = {
  * Initialize an rbd client instance.
  * We own *opt.
  */
-static struct rbd_client *rbd_client_create(struct ceph_options *opt)
+static struct rbd_client *rbd_client_create(struct ceph_options *opt,
+					    struct rbd_options *rbd_opts)
 {
 	struct rbd_client *rbdc;
 	int ret = -ENOMEM;
@@ -233,6 +249,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt)
 	if (ret < 0)
 		goto out_err;
 
+	rbdc->rbd_opts = rbd_opts;
+
 	spin_lock(&node_lock);
 	list_add_tail(&rbdc->node, &rbd_client_list);
 	spin_unlock(&node_lock);
@@ -267,6 +285,59 @@ static struct rbd_client *__rbd_client_find(struct ceph_options *opt)
 }
 
 /*
+ * mount options
+ */
+enum {
+	Opt_notify_timeout,
+	Opt_last_int,
+	/* int args above */
+	Opt_last_string,
+	/* string args above */
+};
+
+static match_table_t rbdopt_tokens = {
+	{Opt_notify_timeout, "notify_timeout=%d"},
+	/* int args above */
+	/* string args above */
+	{-1, NULL}
+};
+
+static int parse_rbd_opts_token(char *c, void *private)
+{
+	struct rbd_options *rbdopt = private;
+	substring_t argstr[MAX_OPT_ARGS];
+	int token, intval, ret;
+
+	token = match_token((char *)c, rbdopt_tokens, argstr);
+	if (token < 0)
+		return -EINVAL;
+
+	if (token < Opt_last_int) {
+		ret = match_int(&argstr[0], &intval);
+		if (ret < 0) {
+			pr_err("bad mount option arg (not int) "
+			       "at '%s'\n", c);
+			return ret;
+		}
+		dout("got int token %d val %d\n", token, intval);
+	} else if (token > Opt_last_int && token < Opt_last_string) {
+		dout("got string token %d val %s\n", token,
+		     argstr[0].from);
+	} else {
+		dout("got token %d\n", token);
+	}
+
+	switch (token) {
+	case Opt_notify_timeout:
+		rbdopt->notify_timeout = intval;
+		break;
+	default:
+		BUG_ON(token);
+	}
+	return 0;
+}
+
+/*
  * Get a ceph client with specific addr and configuration, if one does
  * not exist create it.
  */
@@ -276,11 +347,18 @@ static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
 	struct rbd_client *rbdc;
 	struct ceph_options *opt;
 	int ret;
+	struct rbd_options *rbd_opts;
+
+	rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
+	if (!rbd_opts)
+		return -ENOMEM;
+
+	rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT;
 
 	ret = ceph_parse_options(&opt, options, mon_addr,
-				 mon_addr + strlen(mon_addr), NULL, NULL);
+				 mon_addr + strlen(mon_addr), parse_rbd_opts_token, rbd_opts);
 	if (ret < 0)
-		return ret;
+		goto done_err;
 
 	spin_lock(&node_lock);
 	rbdc = __rbd_client_find(opt);
@@ -296,13 +374,18 @@ static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
 	}
 	spin_unlock(&node_lock);
 
-	rbdc = rbd_client_create(opt);
-	if (IS_ERR(rbdc))
-		return PTR_ERR(rbdc);
+	rbdc = rbd_client_create(opt, rbd_opts);
+	if (IS_ERR(rbdc)) {
+		ret = PTR_ERR(rbdc);
+		goto done_err;
+	}
 
 	rbd_dev->rbd_client = rbdc;
 	rbd_dev->client = rbdc->client;
 	return 0;
+done_err:
+	kfree(rbd_opts);
+	return ret;
 }
 
 /*
@@ -318,6 +401,7 @@ static void rbd_client_release(struct kref *kref)
 	spin_unlock(&node_lock);
 
 	ceph_destroy_client(rbdc->client);
+	kfree(rbdc->rbd_opts);
 	kfree(rbdc);
 }
 
@@ -666,7 +750,9 @@ static int rbd_do_request(struct request *rq,
 			  struct ceph_osd_req_op *ops,
 			  int num_reply,
 			  void (*rbd_cb)(struct ceph_osd_request *req,
-					 struct ceph_msg *msg))
+					 struct ceph_msg *msg),
+			  struct ceph_osd_request **linger_req,
+			  u64 *ver)
 {
 	struct ceph_osd_request *req;
 	struct ceph_file_layout *layout;
@@ -729,12 +815,20 @@ static int rbd_do_request(struct request *rq,
 				req->r_oid, req->r_oid_len);
 	up_read(&header->snap_rwsem);
 
+	if (linger_req) {
+		ceph_osdc_set_request_linger(&dev->client->osdc, req);
+		*linger_req = req;
+	}
+
 	ret = ceph_osdc_start_request(&dev->client->osdc, req, false);
 	if (ret < 0)
 		goto done_err;
 
 	if (!rbd_cb) {
 		ret = ceph_osdc_wait_request(&dev->client->osdc, req);
+		if (ver)
+			*ver = le64_to_cpu(req->r_reassert_version.version);
+		dout("reassert_ver=%lld\n", le64_to_cpu(req->r_reassert_version.version));
 		ceph_osdc_put_request(req);
 	}
 	return ret;
@@ -789,6 +883,11 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
 	kfree(req_data);
 }
 
+static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg)
+{
+	ceph_osdc_put_request(req);
+}
+
 /*
  * Do a synchronous ceph osd operation
  */
@@ -801,7 +900,9 @@ static int rbd_req_sync_op(struct rbd_device *dev,
 			   int num_reply,
 			   const char *obj,
 			   u64 ofs, u64 len,
-			   char *buf)
+			   char *buf,
+			   struct ceph_osd_request **linger_req,
+			   u64 *ver)
 {
 	int ret;
 	struct page **pages;
@@ -833,7 +934,8 @@ static int rbd_req_sync_op(struct rbd_device *dev,
 			  flags,
 			  ops,
 			  2,
-			  NULL);
+			  NULL,
+			  linger_req, ver);
 	if (ret < 0)
 		goto done_ops;
 
@@ -893,7 +995,7 @@ static int rbd_do_op(struct request *rq,
 			     flags,
 			     ops,
 			     num_reply,
-			     rbd_req_cb);
+			     rbd_req_cb, 0, NULL);
 done:
 	kfree(seg_name);
 	return ret;
@@ -940,18 +1042,174 @@ static int rbd_req_sync_read(struct rbd_device *dev,
 			  u64 snapid,
 			  const char *obj,
 			  u64 ofs, u64 len,
-			  char *buf)
+			  char *buf,
+			  u64 *ver)
 {
 	return rbd_req_sync_op(dev, NULL,
 			       (snapid ? snapid : CEPH_NOSNAP),
 			       CEPH_OSD_OP_READ,
 			       CEPH_OSD_FLAG_READ,
 			       NULL,
-			       1, obj, ofs, len, buf);
+			       1, obj, ofs, len, buf, NULL, ver);
 }
 
 /*
- * Request sync osd read
+ * Request sync osd watch
+ */
+static int rbd_req_sync_notify_ack(struct rbd_device *dev,
+				   u64 ver,
+				   u64 notify_id,
+				   const char *obj)
+{
+	struct ceph_osd_req_op *ops;
+	struct page **pages = NULL;
+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0);
+	if (ret < 0)
+		return ret;
+
+	ops[0].watch.ver = cpu_to_le64(dev->header.obj_version);
+	ops[0].watch.cookie = notify_id;
+	ops[0].watch.flag = 0;
+
+	ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP,
+			  obj, 0, 0, NULL,
+			  pages, 0,
+			  CEPH_OSD_FLAG_READ,
+			  ops,
+			  1,
+			  rbd_simple_req_cb, 0, NULL);
+
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+	struct rbd_device *dev = (struct rbd_device *)data;
+	if (!dev)
+		return;
+
+	dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
+		notify_id, (int)opcode);
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	__rbd_update_snaps(dev);
+	mutex_unlock(&ctl_mutex);
+
+	rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name);
+}
+
+/*
+ * Request sync osd watch
+ */
+static int rbd_req_sync_watch(struct rbd_device *dev,
+			      const char *obj,
+			      u64 ver)
+{
+	struct ceph_osd_req_op *ops;
+	struct ceph_osd_client *osdc = &dev->client->osdc;
+
+	int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0,
+				     (void *)dev, &dev->watch_event);
+	if (ret < 0)
+		goto fail;
+
+	ops[0].watch.ver = cpu_to_le64(ver);
+	ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie);
+	ops[0].watch.flag = 1;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			      CEPH_NOSNAP,
+			      0,
+			      CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			      ops,
+			      1, obj, 0, 0, NULL,
+			      &dev->watch_request, NULL);
+
+	if (ret < 0)
+		goto fail_event;
+
+	rbd_destroy_ops(ops);
+	return 0;
+
+fail_event:
+	ceph_osdc_cancel_event(dev->watch_event);
+	dev->watch_event = NULL;
+fail:
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+struct rbd_notify_info {
+	struct rbd_device *dev;
+};
+
+static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
+{
+	struct rbd_device *dev = (struct rbd_device *)data;
+	if (!dev)
+		return;
+
+	dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name,
+		notify_id, (int)opcode);
+}
+
+/*
+ * Request sync osd notify
+ */
+static int rbd_req_sync_notify(struct rbd_device *dev,
+		          const char *obj)
+{
+	struct ceph_osd_req_op *ops;
+	struct ceph_osd_client *osdc = &dev->client->osdc;
+	struct ceph_osd_event *event;
+	struct rbd_notify_info info;
+	int payload_len = sizeof(u32) + sizeof(u32);
+	int ret;
+
+	ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len);
+	if (ret < 0)
+		return ret;
+
+	info.dev = dev;
+
+	ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
+				     (void *)&info, &event);
+	if (ret < 0)
+		goto fail;
+
+	ops[0].watch.ver = 1;
+	ops[0].watch.flag = 1;
+	ops[0].watch.cookie = event->cookie;
+	ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
+	ops[0].watch.timeout = 12;
+
+	ret = rbd_req_sync_op(dev, NULL,
+			       CEPH_NOSNAP,
+			       0,
+			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
+			       ops,
+			       1, obj, 0, 0, NULL, NULL, NULL);
+	if (ret < 0)
+		goto fail_event;
+
+	ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
+	dout("ceph_osdc_wait_event returned %d\n", ret);
+	rbd_destroy_ops(ops);
+	return 0;
+
+fail_event:
+	ceph_osdc_cancel_event(event);
+fail:
+	rbd_destroy_ops(ops);
+	return ret;
+}
+
+/*
+ * Request sync osd rollback
  */
 static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
 				     u64 snapid,
@@ -969,13 +1227,10 @@ static int rbd_req_sync_rollback_obj(struct rbd_device *dev,
 			       0,
 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
 			       ops,
-			       1, obj, 0, 0, NULL);
+			       1, obj, 0, 0, NULL, NULL, NULL);
 
 	rbd_destroy_ops(ops);
 
-	if (ret < 0)
-		return ret;
-
 	return ret;
 }
 
@@ -987,7 +1242,8 @@ static int rbd_req_sync_exec(struct rbd_device *dev,
 			     const char *cls,
 			     const char *method,
 			     const char *data,
-			     int len)
+			     int len,
+			     u64 *ver)
 {
 	struct ceph_osd_req_op *ops;
 	int cls_len = strlen(cls);
@@ -1010,7 +1266,7 @@ static int rbd_req_sync_exec(struct rbd_device *dev,
 			       0,
 			       CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
 			       ops,
-			       1, obj, 0, 0, NULL);
+			       1, obj, 0, 0, NULL, NULL, ver);
 
 	rbd_destroy_ops(ops);
 
@@ -1156,6 +1412,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
 	struct rbd_image_header_ondisk *dh;
 	int snap_count = 0;
 	u64 snap_names_len = 0;
+	u64 ver;
 
 	while (1) {
 		int len = sizeof(*dh) +
@@ -1171,7 +1428,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
 				       NULL, CEPH_NOSNAP,
 				       rbd_dev->obj_md_name,
 				       0, len,
-				       (char *)dh);
+				       (char *)dh, &ver);
 		if (rc < 0)
 			goto out_dh;
 
@@ -1188,6 +1445,7 @@ static int rbd_read_header(struct rbd_device *rbd_dev,
 		}
 		break;
 	}
+	header->obj_version = ver;
 
 out_dh:
 	kfree(dh);
@@ -1205,6 +1463,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
 	u64 new_snapid;
 	int ret;
 	void *data, *data_start, *data_end;
+	u64 ver;
 
 	/* we should create a snapshot only if we're pointing at the head */
 	if (dev->cur_snap)
@@ -1227,7 +1486,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
 	ceph_encode_64_safe(&data, data_end, new_snapid, bad);
 
 	ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add",
-				data_start, data - data_start);
+				data_start, data - data_start, &ver);
 
 	kfree(data_start);
 
@@ -1259,6 +1518,7 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
 	int ret;
 	struct rbd_image_header h;
 	u64 snap_seq;
+	int follow_seq = 0;
 
 	ret = rbd_read_header(rbd_dev, &h);
 	if (ret < 0)
@@ -1267,6 +1527,11 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
 	down_write(&rbd_dev->header.snap_rwsem);
 
 	snap_seq = rbd_dev->header.snapc->seq;
+	if (rbd_dev->header.total_snaps &&
+	    rbd_dev->header.snapc->snaps[0] == snap_seq)
+		/* pointing at the head, will need to follow that
+		   if head moves */
+		follow_seq = 1;
 
 	kfree(rbd_dev->header.snapc);
 	kfree(rbd_dev->header.snap_names);
@@ -1277,7 +1542,10 @@ static int __rbd_update_snaps(struct rbd_device *rbd_dev)
 	rbd_dev->header.snap_names = h.snap_names;
 	rbd_dev->header.snap_names_len = h.snap_names_len;
 	rbd_dev->header.snap_sizes = h.snap_sizes;
-	rbd_dev->header.snapc->seq = snap_seq;
+	if (follow_seq)
+		rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0];
+	else
+		rbd_dev->header.snapc->seq = snap_seq;
 
 	ret = __rbd_init_snaps_header(rbd_dev);
 
@@ -1699,7 +1967,28 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
 	device_unregister(&rbd_dev->dev);
 }
 
-static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count)
+static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
+{
+	int ret, rc;
+
+	do {
+		ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name,
+					 rbd_dev->header.obj_version);
+		if (ret == -ERANGE) {
+			mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+			rc = __rbd_update_snaps(rbd_dev);
+			mutex_unlock(&ctl_mutex);
+			if (rc < 0)
+				return rc;
+		}
+	} while (ret == -ERANGE);
+
+	return ret;
+}
+
+static ssize_t rbd_add(struct bus_type *bus,
+		       const char *buf,
+		       size_t count)
 {
 	struct ceph_osd_client *osdc;
 	struct rbd_device *rbd_dev;
@@ -1797,6 +2086,10 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count)
 	if (rc)
 		goto err_out_bus;
 
+	rc = rbd_init_watch_dev(rbd_dev);
+	if (rc)
+		goto err_out_bus;
+
 	return count;
 
 err_out_bus:
@@ -1849,6 +2142,12 @@ static void rbd_dev_release(struct device *dev)
 	struct rbd_device *rbd_dev =
 			container_of(dev, struct rbd_device, dev);
 
+	if (rbd_dev->watch_request)
+		ceph_osdc_unregister_linger_request(&rbd_dev->client->osdc,
+						    rbd_dev->watch_request);
+	if (rbd_dev->watch_event)
+		ceph_osdc_cancel_event(rbd_dev->watch_event);
+
 	rbd_put_client(rbd_dev);
 
 	/* clean up and free blkdev */
@@ -1914,14 +2213,24 @@ static ssize_t rbd_snap_add(struct device *dev,
 	ret = rbd_header_add_snap(rbd_dev,
 				  name, GFP_KERNEL);
 	if (ret < 0)
-		goto done_unlock;
+		goto err_unlock;
 
 	ret = __rbd_update_snaps(rbd_dev);
 	if (ret < 0)
-		goto done_unlock;
+		goto err_unlock;
+
+	/* shouldn't hold ctl_mutex when notifying.. notify might
+	   trigger a watch callback that would need to get that mutex */
+	mutex_unlock(&ctl_mutex);
+
+	/* make a best effort, don't error if failed */
+	rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name);
 
 	ret = count;
-done_unlock:
+	kfree(name);
+	return ret;
+
+err_unlock:
 	mutex_unlock(&ctl_mutex);
 	kfree(name);
 	return ret;
-- 
cgit v0.10.2


From 2fd66c517d5e98de2528d86e0e62f5069ff99f59 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 22 Mar 2011 13:32:53 -0500
Subject: slub: Add missing irq restore for the OOM path

OOM path is missing the irq restore in the CONFIG_CMPXCHG_LOCAL case.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/mm/slub.c b/mm/slub.c
index 7e4f835..e126cfb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1857,6 +1857,9 @@ new_slab:
 	}
 	if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
 		slab_out_of_memory(s, gfpflags, node);
+#ifdef CONFIG_CMPXCHG_LOCAL
+	local_irq_restore(flags);
+#endif
 	return NULL;
 debug:
 	if (!alloc_debug_processing(s, c->page, object, addr))
-- 
cgit v0.10.2


From 4fdccdfbb4652a7bbac8adbce7449eb093775118 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 22 Mar 2011 13:35:00 -0500
Subject: slub: Add statistics for this_cmpxchg_double failures

Add some statistics for debugging.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 90fbb6d..45ca123 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -32,6 +32,7 @@ enum stat_item {
 	DEACTIVATE_TO_TAIL,	/* Cpu slab was moved to the tail of partials */
 	DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
 	ORDER_FALLBACK,		/* Number of times fallback was necessary */
+	CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
 	NR_SLUB_STAT_ITEMS };
 
 struct kmem_cache_cpu {
diff --git a/mm/slub.c b/mm/slub.c
index e126cfb..93de30d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -217,7 +217,7 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
 
 #endif
 
-static inline void stat(struct kmem_cache *s, enum stat_item si)
+static inline void stat(const struct kmem_cache *s, enum stat_item si)
 {
 #ifdef CONFIG_SLUB_STATS
 	__this_cpu_inc(s->cpu_slab->stat[si]);
@@ -1597,6 +1597,7 @@ static inline void note_cmpxchg_failure(const char *n,
 		printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
 			actual_tid, tid, next_tid(tid));
 #endif
+	stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
 }
 
 #endif
-- 
cgit v0.10.2


From 5a7e0a8cf50cf905403f5a498e86d1f97cfcf51b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:46 +0530
Subject: fs/9p: Fix race in initializing writeback fid

When two process open the same file we can end up with both of them
allocating the writeback_fid. Add a new mutex which can be used
for synchronizing v9fs_inode member values.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index bd8496d..89657e8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -130,6 +130,7 @@ struct v9fs_inode {
 #endif
 	unsigned int cache_validity;
 	struct p9_fid *writeback_fid;
+	struct mutex v_mutex;
 	struct inode vfs_inode;
 };
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 78bcb97..3337d58 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,6 +90,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 	}
 
 	file->private_data = fid;
+	mutex_lock(&v9inode->v_mutex);
 	if (v9ses->cache && !v9inode->writeback_fid) {
 		/*
 		 * clone a fid and add it to writeback_fid
@@ -101,10 +102,12 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		fid = v9fs_writeback_fid(file->f_path.dentry);
 		if (IS_ERR(fid)) {
 			err = PTR_ERR(fid);
+			mutex_unlock(&v9inode->v_mutex);
 			goto out_error;
 		}
 		v9inode->writeback_fid = (void *) fid;
 	}
+	mutex_unlock(&v9inode->v_mutex);
 #ifdef CONFIG_9P_FSCACHE
 	if (v9ses->cache)
 		v9fs_cache_inode_set_cookie(inode, file);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8a2c232..c6cef24 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -221,6 +221,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
 #endif
 	v9inode->writeback_fid = NULL;
 	v9inode->cache_validity = 0;
+	mutex_init(&v9inode->v_mutex);
 	return &v9inode->vfs_inode;
 }
 
@@ -650,6 +651,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	/* if we are opening a file, assign the open fid to the file */
 	if (nd && nd->flags & LOOKUP_OPEN) {
 		v9inode = V9FS_I(dentry->d_inode);
+		mutex_lock(&v9inode->v_mutex);
 		if (v9ses->cache && !v9inode->writeback_fid) {
 			/*
 			 * clone a fid and add it to writeback_fid
@@ -661,10 +663,12 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 			inode_fid = v9fs_writeback_fid(dentry);
 			if (IS_ERR(inode_fid)) {
 				err = PTR_ERR(inode_fid);
+				mutex_unlock(&v9inode->v_mutex);
 				goto error;
 			}
 			v9inode->writeback_fid = (void *) inode_fid;
 		}
+		mutex_unlock(&v9inode->v_mutex);
 		filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 67c138e..327c578 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -245,6 +245,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 	v9fs_set_create_acl(dentry, dacl, pacl);
 
 	v9inode = V9FS_I(inode);
+	mutex_lock(&v9inode->v_mutex);
 	if (v9ses->cache && !v9inode->writeback_fid) {
 		/*
 		 * clone a fid and add it to writeback_fid
@@ -256,10 +257,12 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 		inode_fid = v9fs_writeback_fid(dentry);
 		if (IS_ERR(inode_fid)) {
 			err = PTR_ERR(inode_fid);
+			mutex_unlock(&v9inode->v_mutex);
 			goto error;
 		}
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
+	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
 	filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
 	if (IS_ERR(filp)) {
-- 
cgit v0.10.2


From eeff66ef6e33925f615d49e6c846263e342ab60e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:47 +0530
Subject: net/9p: Convert the in the 9p rpc call path to GFP_NOFS

Without this we can cause reclaim allocation in writepage.

[ 3433.448430] =================================
[ 3433.449117] [ INFO: inconsistent lock state ]
[ 3433.449117] 2.6.38-rc5+ #84
[ 3433.449117] ---------------------------------
[ 3433.449117] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage.
[ 3433.449117] kswapd0/505 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 3433.449117]  (iprune_sem){+++++-}, at: [<ffffffff810ebbab>] shrink_icache_memory+0x45/0x2b1
[ 3433.449117] {RECLAIM_FS-ON-W} state was registered at:
[ 3433.449117]   [<ffffffff8107fe5f>] mark_held_locks+0x52/0x70
[ 3433.449117]   [<ffffffff8107ff02>] lockdep_trace_alloc+0x85/0x9f
[ 3433.449117]   [<ffffffff810d353d>] slab_pre_alloc_hook+0x18/0x3c
[ 3433.449117]   [<ffffffff810d3fd5>] kmem_cache_alloc+0x23/0xa2
[ 3433.449117]   [<ffffffff8127be77>] idr_pre_get+0x2d/0x6f
[ 3433.449117]   [<ffffffff815434eb>] p9_idpool_get+0x30/0xae
[ 3433.449117]   [<ffffffff81540123>] p9_client_rpc+0xd7/0x9b0
[ 3433.449117]   [<ffffffff815427b0>] p9_client_clunk+0x88/0xdb
[ 3433.449117]   [<ffffffff811d56e5>] v9fs_evict_inode+0x3c/0x48
[ 3433.449117]   [<ffffffff810eb511>] evict+0x1f/0x87
[ 3433.449117]   [<ffffffff810eb5c0>] dispose_list+0x47/0xe3
[ 3433.449117]   [<ffffffff810eb8da>] evict_inodes+0x138/0x14f
[ 3433.449117]   [<ffffffff810d90e2>] generic_shutdown_super+0x57/0xe8
[ 3433.449117]   [<ffffffff810d91e8>] kill_anon_super+0x11/0x50
[ 3433.449117]   [<ffffffff811d4951>] v9fs_kill_super+0x49/0xab
[ 3433.449117]   [<ffffffff810d926e>] deactivate_locked_super+0x21/0x46
[ 3433.449117]   [<ffffffff810d9e84>] deactivate_super+0x40/0x44
[ 3433.449117]   [<ffffffff810ef848>] mntput_no_expire+0x100/0x109
[ 3433.449117]   [<ffffffff810f0aeb>] sys_umount+0x2f1/0x31c
[ 3433.449117]   [<ffffffff8102c87b>] system_call_fastpath+0x16/0x1b
[ 3433.449117] irq event stamp: 192941
[ 3433.449117] hardirqs last  enabled at (192941): [<ffffffff81568dcf>] _raw_spin_unlock_irq+0x2b/0x30
[ 3433.449117] hardirqs last disabled at (192940): [<ffffffff810b5f97>] shrink_inactive_list+0x290/0x2f5
[ 3433.449117] softirqs last  enabled at (188470): [<ffffffff8105fd65>] __do_softirq+0x133/0x152
[ 3433.449117] softirqs last disabled at (188455): [<ffffffff8102d7cc>] call_softirq+0x1c/0x28
[ 3433.449117]
[ 3433.449117] other info that might help us debug this:
[ 3433.449117] 1 lock held by kswapd0/505:
[ 3433.449117]  #0:  (shrinker_rwsem){++++..}, at: [<ffffffff810b52e2>] shrink_slab+0x38/0x15f
[ 3433.449117]
[ 3433.449117] stack backtrace:
[ 3433.449117] Pid: 505, comm: kswapd0 Not tainted 2.6.38-rc5+ #84
[ 3433.449117] Call Trace:
[ 3433.449117]  [<ffffffff8107fbce>] ? valid_state+0x17e/0x191
[ 3433.449117]  [<ffffffff81036896>] ? save_stack_trace+0x28/0x45
[ 3433.449117]  [<ffffffff81080426>] ? check_usage_forwards+0x0/0x87
[ 3433.449117]  [<ffffffff8107fcf4>] ? mark_lock+0x113/0x22c
[ 3433.449117]  [<ffffffff8108105f>] ? __lock_acquire+0x37a/0xcf7
[ 3433.449117]  [<ffffffff8107fc0e>] ? mark_lock+0x2d/0x22c
[ 3433.449117]  [<ffffffff81081077>] ? __lock_acquire+0x392/0xcf7
[ 3433.449117]  [<ffffffff810b14d2>] ? determine_dirtyable_memory+0x15/0x28
[ 3433.449117]  [<ffffffff81081a33>] ? lock_acquire+0x57/0x6d
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff81567d85>] ? down_read+0x47/0x5c
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810b5385>] ? shrink_slab+0xdb/0x15f
[ 3433.449117]  [<ffffffff810b69bc>] ? kswapd+0x574/0x96a
[ 3433.449117]  [<ffffffff810b6448>] ? kswapd+0x0/0x96a
[ 3433.449117]  [<ffffffff810714e2>] ? kthread+0x7d/0x85
[ 3433.449117]  [<ffffffff8102d6d4>] ? kernel_thread_helper+0x4/0x10
[ 3433.449117]  [<ffffffff81569200>] ? restore_args+0x0/0x30
[ 3433.449117]  [<ffffffff81071465>] ? kthread+0x0/0x85
[ 3433.449117]  [<ffffffff8102d6d0>] ? kernel_thread_helper+0x0/0x10

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/client.c b/net/9p/client.c
index 347ec0c..2ccbf04 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -223,7 +223,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 
 	req = &c->reqs[row][col];
 	if (!req->tc) {
-		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
+		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
 		if (!req->wq) {
 			printk(KERN_ERR "Couldn't grow tag array\n");
 			return ERR_PTR(-ENOMEM);
@@ -233,17 +233,17 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 				P9_TRANS_PREF_PAYLOAD_SEP) {
 			int alloc_msize = min(c->msize, 4096);
 			req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->tc->capacity = alloc_msize;
 			req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->rc->capacity = alloc_msize;
 		} else {
 			req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->tc->capacity = c->msize;
 			req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
-					GFP_KERNEL);
+					  GFP_NOFS);
 			req->rc->capacity = c->msize;
 		}
 		if ((!req->tc) || (!req->rc)) {
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 2ce515b..8a4084f 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -205,7 +205,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				if (errcode)
 					break;
 
-				*sptr = kmalloc(len + 1, GFP_KERNEL);
+				*sptr = kmalloc(len + 1, GFP_NOFS);
 				if (*sptr == NULL) {
 					errcode = -EFAULT;
 					break;
@@ -273,7 +273,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				if (!errcode) {
 					*wnames =
 					    kmalloc(sizeof(char *) * *nwname,
-						    GFP_KERNEL);
+						    GFP_NOFS);
 					if (!*wnames)
 						errcode = -ENOMEM;
 				}
@@ -317,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 					*wqids =
 					    kmalloc(*nwqid *
 						    sizeof(struct p9_qid),
-						    GFP_KERNEL);
+						    GFP_NOFS);
 					if (*wqids == NULL)
 						errcode = -ENOMEM;
 				}
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a30471e..aa5672b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -350,7 +350,7 @@ static void p9_read_work(struct work_struct *work)
 
 		if (m->req->rc == NULL) {
 			m->req->rc = kmalloc(sizeof(struct p9_fcall) +
-						m->client->msize, GFP_KERNEL);
+						m->client->msize, GFP_NOFS);
 			if (!m->req->rc) {
 				m->req = NULL;
 				err = -ENOMEM;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 29a54cc..150e0c4 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -424,7 +424,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	struct p9_rdma_context *rpl_context = NULL;
 
 	/* Allocate an fcall for the reply */
-	rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
+	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
 	if (!rpl_context) {
 		err = -ENOMEM;
 		goto err_close;
@@ -437,7 +437,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 */
 	if (!req->rc) {
 		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
-								GFP_KERNEL);
+				  GFP_NOFS);
 		if (req->rc) {
 			req->rc->sdata = (char *) req->rc +
 						sizeof(struct p9_fcall);
@@ -468,7 +468,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	req->rc = NULL;
 
 	/* Post the request */
-	c = kmalloc(sizeof *c, GFP_KERNEL);
+	c = kmalloc(sizeof *c, GFP_NOFS);
 	if (!c) {
 		err = -ENOMEM;
 		goto err_free1;
diff --git a/net/9p/util.c b/net/9p/util.c
index e048701..b84619b 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -92,7 +92,7 @@ int p9_idpool_get(struct p9_idpool *p)
 	unsigned long flags;
 
 retry:
-	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+	if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
 		return 0;
 
 	spin_lock_irqsave(&p->lock, flags);
-- 
cgit v0.10.2


From 472e7f9f8b547605ee9670ac803e971c2e3eeac0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:47 +0530
Subject: net/9p: Fix compile warning

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index d62b9aa..9172ab7 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -41,9 +41,9 @@ EXPORT_SYMBOL(p9_release_req_pages);
 int
 p9_nr_pages(struct p9_req_t *req)
 {
-	int start_page, end_page;
-	start_page =  (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
-	end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+	unsigned long start_page, end_page;
+	start_page =  (unsigned long)req->tc->pubuf >> PAGE_SHIFT;
+	end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size +
 			PAGE_SIZE - 1) >> PAGE_SHIFT;
 	return end_page - start_page;
 }
@@ -69,8 +69,8 @@ p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
 	*pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
 
 	if (*pdata_off)
-		first_page_bytes = min((PAGE_SIZE - *pdata_off),
-				req->tc->pbuf_size);
+		first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off),
+				       req->tc->pbuf_size);
 
 	rpinfo = req->tc->private;
 	pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
-- 
cgit v0.10.2


From 059c138bc79320bd7d6fba91bad4d50eeec9c31f Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:48 +0530
Subject: fs/9p: Use truncate_setsize instead of vmtruncate

convert vmtruncate usage to truncate_setsize. We also writeback
all dirty pages before doing 9p operations and on success call truncate_setsize.
This ensure that we continue sanely on failed truncate on the server. The
disadvantage is that we are now going to write back the content that get
thrown away later as a part of truncate.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c6cef24..0afbbb4 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -971,6 +971,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	struct p9_wstat wstat;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+	retval = inode_change_ok(dentry->d_inode, iattr);
+	if (retval)
+		return retval;
+
 	retval = -EPERM;
 	v9ses = v9fs_inode2v9ses(dentry->d_inode);
 	fid = v9fs_fid_lookup(dentry);
@@ -997,12 +1001,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		if (iattr->ia_valid & ATTR_GID)
 			wstat.n_gid = iattr->ia_gid;
 	}
-	if ((iattr->ia_valid & ATTR_SIZE) &&
-	    iattr->ia_size != i_size_read(dentry->d_inode)) {
-		retval = vmtruncate(dentry->d_inode, iattr->ia_size);
-		if (retval)
-			return retval;
-	}
+
 	/* Write all dirty data */
 	if (S_ISREG(dentry->d_inode->i_mode))
 		filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -1010,6 +1009,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	retval = p9_client_wstat(fid, &wstat);
 	if (retval < 0)
 		return retval;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(dentry->d_inode))
+		truncate_setsize(dentry->d_inode, iattr->ia_size);
+
 	v9fs_invalidate_inode_attr(dentry->d_inode);
 
 	setattr_copy(dentry->d_inode, iattr);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 327c578..0a0ac30 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -456,12 +456,6 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if ((iattr->ia_valid & ATTR_SIZE) &&
-	    iattr->ia_size != i_size_read(dentry->d_inode)) {
-		retval = vmtruncate(dentry->d_inode, iattr->ia_size);
-		if (retval)
-			return retval;
-	}
 	/* Write all dirty data */
 	if (S_ISREG(dentry->d_inode->i_mode))
 		filemap_write_and_wait(dentry->d_inode->i_mapping);
@@ -469,8 +463,12 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 	retval = p9_client_setattr(fid, &p9attr);
 	if (retval < 0)
 		return retval;
-	v9fs_invalidate_inode_attr(dentry->d_inode);
 
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(dentry->d_inode))
+		truncate_setsize(dentry->d_inode, iattr->ia_size);
+
+	v9fs_invalidate_inode_attr(dentry->d_inode);
 	setattr_copy(dentry->d_inode, iattr);
 	mark_inode_dirty(dentry->d_inode);
 	if (iattr->ia_valid & ATTR_MODE) {
-- 
cgit v0.10.2


From ea59bb759b8fd240860c37026ab1b998d26ac285 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:49 +0530
Subject: fs/9p: Open writeback fid in O_SYNC mode

Older version of protocol don't support tsyncfs operation.
So for them force a O_SYNC flag on the server

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index cd63e00..99a1a7b 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,9 +286,11 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
 
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 {
-	int err;
+	int err, flags;
 	struct p9_fid *fid;
+	struct v9fs_session_info *v9ses;
 
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
 	fid = v9fs_fid_clone_with_uid(dentry, 0);
 	if (IS_ERR(fid))
 		goto error_out;
@@ -297,8 +299,17 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 	 * dirty pages. We always request for the open fid in read-write
 	 * mode so that a partial page write which result in page
 	 * read can work.
+	 *
+	 * we don't have a tsyncfs operation for older version
+	 * of protocol. So make sure the write back fid is
+	 * opened in O_SYNC mode.
 	 */
-	err = p9_client_open(fid, O_RDWR);
+	if (!v9fs_proto_dotl(v9ses))
+		flags = O_RDWR | O_SYNC;
+	else
+		flags = O_RDWR;
+
+	err = p9_client_open(fid, flags);
 	if (err < 0) {
 		p9_client_clunk(fid);
 		fid = ERR_PTR(err);
-- 
cgit v0.10.2


From 7add697a3d271aa7080513f92dab190c75174b7e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:49 +0530
Subject: fs/9p: Attach writeback_fid on first open with WR flag

We don't need writeback fid if we are only doing O_RDONLY open

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3337d58..6997eb6 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -91,7 +91,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 	file->private_data = fid;
 	mutex_lock(&v9inode->v_mutex);
-	if (v9ses->cache && !v9inode->writeback_fid) {
+	if (v9ses->cache && !v9inode->writeback_fid &&
+	    ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
 		/*
 		 * clone a fid and add it to writeback_fid
 		 * we do it during open time instead of
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0afbbb4..774a20a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -652,7 +652,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	if (nd && nd->flags & LOOKUP_OPEN) {
 		v9inode = V9FS_I(dentry->d_inode);
 		mutex_lock(&v9inode->v_mutex);
-		if (v9ses->cache && !v9inode->writeback_fid) {
+		if (v9ses->cache && !v9inode->writeback_fid &&
+		    ((flags & O_ACCMODE) != O_RDONLY)) {
 			/*
 			 * clone a fid and add it to writeback_fid
 			 * we do it during open time instead of
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0a0ac30..8b616dc 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -246,7 +246,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 
 	v9inode = V9FS_I(inode);
 	mutex_lock(&v9inode->v_mutex);
-	if (v9ses->cache && !v9inode->writeback_fid) {
+	if (v9ses->cache && !v9inode->writeback_fid &&
+	    ((flags & O_ACCMODE) != O_RDONLY)) {
 		/*
 		 * clone a fid and add it to writeback_fid
 		 * we do it during open time instead of
-- 
cgit v0.10.2


From 42869c8adae72366fc6c4f3924ce3d6c3735c4a3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 16:39:50 +0530
Subject: fs/9p: Add v9fs_dentry2v9ses

Add the new static inline and use the same

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 5154552..33aa1167 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -262,7 +262,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	/*
 	 * We allow set/get/list of acl when access=client is not specified
 	 */
@@ -312,7 +312,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	/*
 	 * set the attribute on the remote. Without even looking at the
 	 * xattr value. We leave it to the server to validate
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 99a1a7b..0ee5945 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -134,7 +134,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid, *old_fid = NULL;
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	access = v9ses->flags & V9FS_ACCESS_MASK;
 	fid = v9fs_fid_find(dentry, uid, any);
 	if (fid)
@@ -237,7 +237,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 	int  any, access;
 	struct v9fs_session_info *v9ses;
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	access = v9ses->flags & V9FS_ACCESS_MASK;
 	switch (access) {
 	case V9FS_ACCESS_SINGLE:
@@ -290,7 +290,7 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 	struct p9_fid *fid;
 	struct v9fs_session_info *v9ses;
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_clone_with_uid(dentry, 0);
 	if (IS_ERR(fid))
 		goto error_out;
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 89657e8..9665c2b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -174,6 +174,11 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
 	return (inode->i_sb->s_fs_info);
 }
 
+static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry)
+{
+	return dentry->d_sb->s_fs_info;
+}
+
 static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
 {
 	return v9ses->flags & V9FS_PROTO_2000U;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 774a20a..7f6c677 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -936,7 +936,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
 		generic_fillattr(dentry->d_inode, stat);
 		return 0;
@@ -977,7 +977,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		return retval;
 
 	retval = -EPERM;
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_lookup(dentry);
 	if(IS_ERR(fid))
 		return PTR_ERR(fid);
@@ -1139,7 +1139,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 
 	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
 	retval = -EPERM;
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_lookup(dentry);
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 8b616dc..ffbb113 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -395,7 +395,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
 
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
 		generic_fillattr(dentry->d_inode, stat);
 		return 0;
@@ -452,7 +452,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 	p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
 
 	retval = -EPERM;
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	fid = v9fs_fid_lookup(dentry);
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 09fd08d..f3eed33 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -262,7 +262,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		goto done;
 	}
 
-	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	v9ses = v9fs_dentry2v9ses(dentry);
 	if (v9fs_proto_dotl(v9ses)) {
 		res = p9_client_statfs(fid, &rs);
 		if (res == 0) {
-- 
cgit v0.10.2


From 53bda3e5b4e91763224ecb7d05dab94d281fd41d Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Tue, 8 Mar 2011 15:34:20 -0800
Subject: [net/9p] unconditional wake_up to proc waiting for space on VirtIO
 ring

Process may wait to get space on VirtIO ring to send a transaction to
VirtFS server. Current code just does a conditional wake_up() which
means only one process will be woken up even if multiple processes
are waiting.

This fix makes the wake_up unconditional. Hence we won't have any
processes waiting for-ever.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9b550ed..961e025 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -146,11 +146,10 @@ static void req_done(struct virtqueue *vq)
 		rc = virtqueue_get_buf(chan->vq, &len);
 
 		if (rc != NULL) {
-			if (!chan->ring_bufs_avail) {
-				chan->ring_bufs_avail = 1;
-				wake_up(chan->vc_wq);
-			}
+			chan->ring_bufs_avail = 1;
 			spin_unlock_irqrestore(&chan->lock, flags);
+			/* Wakeup if anyone waiting for VirtIO ring space. */
+			wake_up(chan->vc_wq);
 			P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 			P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
 					rc->tag);
-- 
cgit v0.10.2


From a01a984035ea799b14aa5e874dcaeb122f09c4b4 Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2011 14:12:49 -0700
Subject: [net/9p] Set the condition just before waking up.

Given that the sprious wake-ups are common, we need to move the
condition setting right next to the wake_up().  After setting the condition
to req->status = REQ_STATUS_RCVD, sprious wakeups may cause the
virtqueue back on the free list for someone else to use.
This may result in kernel panic while relasing the pinned pages
in p9_release_req_pages().

Also rearranged the while loop in req_done() for better redability.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 961e025..cb98af9 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -141,33 +141,33 @@ static void req_done(struct virtqueue *vq)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
 
-	do {
+	while (1) {
 		spin_lock_irqsave(&chan->lock, flags);
 		rc = virtqueue_get_buf(chan->vq, &len);
 
-		if (rc != NULL) {
-			chan->ring_bufs_avail = 1;
-			spin_unlock_irqrestore(&chan->lock, flags);
-			/* Wakeup if anyone waiting for VirtIO ring space. */
-			wake_up(chan->vc_wq);
-			P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
-			P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n",
-					rc->tag);
-			req = p9_tag_lookup(chan->client, rc->tag);
-			req->status = REQ_STATUS_RCVD;
-			if (req->tc->private) {
-				struct trans_rpage_info *rp = req->tc->private;
-				/*Release pages */
-				p9_release_req_pages(rp);
-				if (rp->rp_alloc)
-					kfree(rp);
-				req->tc->private = NULL;
-			}
-			p9_client_cb(chan->client, req);
-		} else {
+		if (rc == NULL) {
 			spin_unlock_irqrestore(&chan->lock, flags);
+			break;
 		}
-	} while (rc != NULL);
+
+		chan->ring_bufs_avail = 1;
+		spin_unlock_irqrestore(&chan->lock, flags);
+		/* Wakeup if anyone waiting for VirtIO ring space. */
+		wake_up(chan->vc_wq);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
+		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
+		req = p9_tag_lookup(chan->client, rc->tag);
+		if (req->tc->private) {
+			struct trans_rpage_info *rp = req->tc->private;
+			/*Release pages */
+			p9_release_req_pages(rp);
+			if (rp->rp_alloc)
+				kfree(rp);
+			req->tc->private = NULL;
+		}
+		req->status = REQ_STATUS_RCVD;
+		p9_client_cb(chan->client, req);
+	}
 }
 
 /**
-- 
cgit v0.10.2


From 316ad5501c2098cb2a2a25ed77a0421f1671411c Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2011 14:22:41 -0700
Subject: [net/9p] Don't re-pin pages on retrying virtqueue_add_buf().

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cb98af9..c6e1ae2 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -262,7 +262,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-req_retry:
 	req->status = REQ_STATUS_SENT;
 
 	if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
@@ -295,6 +294,7 @@ req_retry:
 		}
 	}
 
+req_retry_pinned:
 	spin_lock_irqsave(&chan->lock, flags);
 
 	/* Handle out VirtIO ring buffers */
@@ -355,7 +355,7 @@ req_retry:
 				return err;
 
 			P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
-			goto req_retry;
+			goto req_retry_pinned;
 		} else {
 			spin_unlock_irqrestore(&chan->lock, flags);
 			P9_DPRINTK(P9_DEBUG_TRANS,
-- 
cgit v0.10.2


From aaf0ef1d2bce05cfd06cf29c96a6973df4d0a6a8 Mon Sep 17 00:00:00 2001
From: "M. Mohan Kumar" <mohan@in.ibm.com>
Date: Wed, 16 Mar 2011 21:40:49 +0530
Subject: 9p: use the updated offset given by generic_write_checks

Without this fix, even if a file is opened in O_APPEND mode, data will be
written at current file position instead of end of file.

Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6997eb6..ffed558 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -508,9 +508,12 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	if (!count)
 		goto out;
 
-	return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+	retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
 					filp->private_data,
-					data, count, offset, 1);
+					data, count, &origin, 1);
+	/* update offset on successful write */
+	if (retval > 0)
+		*offset = origin;
 out:
 	return retval;
 }
-- 
cgit v0.10.2


From 68da9ba4eeadae86ad42e52b80822fbd56971267 Mon Sep 17 00:00:00 2001
From: "Venkateswararao Jujjuri (JV)" <jvrao@linux.vnet.ibm.com>
Date: Fri, 18 Mar 2011 15:49:48 -0700
Subject: [net/9p]: Introduce basic flow-control for VirtIO transport.

Recent zerocopy work in the 9P VirtIO transport maps and pins
user buffers into kernel memory for the server to work on them.
Since the user process can initiate this kind of pinning with a simple
read/write call, thousands of IO threads initiated by the user process can
hog the system resources and could result into denial of service.

This patch introduces flow control to avoid that extreme scenario.

The ceiling limit to avoid denial of service attacks is set to relatively
high (nr_free_pagecache_pages()/4) so that it won't interfere with
regular usage, but can step in extreme cases to limit the total system
hang. Since we don't have a global structure to accommodate this variable,
I choose the virtio_chan as the home for this.

Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Reviewed-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c6e1ae2..e8f046b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
+#include <linux/swap.h>
 #include <linux/virtio.h>
 #include <linux/virtio_9p.h>
 #include "trans_common.h"
@@ -51,6 +52,8 @@
 
 /* a single mutex to manage channel initialization and attachment */
 static DEFINE_MUTEX(virtio_9p_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
+static atomic_t vp_pinned = ATOMIC_INIT(0);
 
 /**
  * struct virtio_chan - per-instance transport information
@@ -78,7 +81,10 @@ struct virtio_chan {
 	struct virtqueue *vq;
 	int ring_bufs_avail;
 	wait_queue_head_t *vc_wq;
-
+	/* This is global limit. Since we don't have a global structure,
+	 * will be placing it in each channel.
+	 */
+	int p9_max_pages;
 	/* Scatterlist: can be too big for stack. */
 	struct scatterlist sg[VIRTQUEUE_NUM];
 
@@ -159,8 +165,11 @@ static void req_done(struct virtqueue *vq)
 		req = p9_tag_lookup(chan->client, rc->tag);
 		if (req->tc->private) {
 			struct trans_rpage_info *rp = req->tc->private;
+			int p = rp->rp_nr_pages;
 			/*Release pages */
 			p9_release_req_pages(rp);
+			atomic_sub(p, &vp_pinned);
+			wake_up(&vp_wq);
 			if (rp->rp_alloc)
 				kfree(rp);
 			req->tc->private = NULL;
@@ -269,6 +278,14 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 		int rpinfo_size = sizeof(struct trans_rpage_info) +
 			sizeof(struct page *) * nr_pages;
 
+		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+			err = wait_event_interruptible(vp_wq,
+				atomic_read(&vp_pinned) < chan->p9_max_pages);
+			if (err  == -ERESTARTSYS)
+				return err;
+			P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
+		}
+
 		if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
 			/* We can use sdata */
 			req->tc->private = req->tc->sdata + req->tc->size;
@@ -291,6 +308,8 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 			if (rpinfo->rp_alloc)
 				kfree(rpinfo);
 			return err;
+		} else {
+			atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
 		}
 	}
 
@@ -452,6 +471,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
 	}
 	init_waitqueue_head(chan->vc_wq);
 	chan->ring_bufs_avail = 1;
+	/* Ceiling limit to avoid denial of service attacks */
+	chan->p9_max_pages = nr_free_buffer_pages()/4;
 
 	mutex_lock(&virtio_9p_lock);
 	list_add_tail(&chan->chan_list, &virtio_chan_list);
-- 
cgit v0.10.2


From 3ea205c449d2b5996d0256aa8b2894f7aea228a2 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 22 Mar 2011 15:34:56 -0700
Subject: avr32: at32ap700x: fix typo in DMA master configuration

Commit 4aa5f366431fe (avr32: at32ap700x: specify DMA src and dst
masters) specified the masters for the ac97c playback device
but incorrectly set them in the capture slave information rather
than playback.

Cc: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>
Reported-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
[rebased on dmaengine for 2.6.39 (d42efe6b)]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index b4aaebd..bfc9d07 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -2061,8 +2061,8 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 		tx_dws->reg_width = DW_DMA_SLAVE_WIDTH_16BIT;
 		tx_dws->cfg_hi = DWC_CFGH_DST_PER(4);
 		tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
-		rx_dws->src_master = 0;
-		rx_dws->dst_master = 1;
+		tx_dws->src_master = 0;
+		tx_dws->dst_master = 1;
 		tx_dws->src_msize = DW_DMA_MSIZE_1;
 		tx_dws->dst_msize = DW_DMA_MSIZE_1;
 		tx_dws->fc = DW_DMA_FC_D_M2P;
-- 
cgit v0.10.2


From f23eb2b2b28547fc70df82dd5049eb39bec5ba12 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 22 Mar 2011 16:17:32 -0700
Subject: tty: stop using "delayed_work" in the tty layer

Using delayed-work for tty flip buffers ends up causing us to wait for
the next tick to complete some actions.  That's usually not all that
noticeable, but for certain latency-critical workloads it ends up being
totally unacceptable.

As an extreme case of this, passing a token back-and-forth over a pty
will take two ticks per iteration, so even just a thousand iterations
will take 8 seconds assuming a common 250Hz configuration.

Avoiding the whole delayed work issue brings that ping-pong test-case
down to 0.009s on my machine.

In more practical terms, this latency has been a performance problem for
things like dive computer simulators (simulating the serial interface
using the ptys) and for other environments (Alan mentions a CP/M emulator).

Reported-by: Jef Driesen <jefdriesen@telenet.be>
Acked-by: Greg KH <gregkh@suse.de>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index d8210ca..b945121 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -322,7 +322,7 @@ void tty_schedule_flip(struct tty_struct *tty)
 	if (tty->buf.tail != NULL)
 		tty->buf.tail->commit = tty->buf.tail->used;
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
-	schedule_delayed_work(&tty->buf.work, 1);
+	schedule_work(&tty->buf.work);
 }
 EXPORT_SYMBOL(tty_schedule_flip);
 
@@ -402,7 +402,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string_flags);
 static void flush_to_ldisc(struct work_struct *work)
 {
 	struct tty_struct *tty =
-		container_of(work, struct tty_struct, buf.work.work);
+		container_of(work, struct tty_struct, buf.work);
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 
@@ -443,7 +443,7 @@ static void flush_to_ldisc(struct work_struct *work)
 			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
 				break;
 			if (!tty->receive_room || seen_tail) {
-				schedule_delayed_work(&tty->buf.work, 1);
+				schedule_work(&tty->buf.work);
 				break;
 			}
 			if (count > tty->receive_room)
@@ -481,7 +481,7 @@ static void flush_to_ldisc(struct work_struct *work)
  */
 void tty_flush_to_ldisc(struct tty_struct *tty)
 {
-	flush_delayed_work(&tty->buf.work);
+	flush_work(&tty->buf.work);
 }
 
 /**
@@ -506,9 +506,9 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	if (tty->low_latency)
-		flush_to_ldisc(&tty->buf.work.work);
+		flush_to_ldisc(&tty->buf.work);
 	else
-		schedule_delayed_work(&tty->buf.work, 1);
+		schedule_work(&tty->buf.work);
 }
 EXPORT_SYMBOL(tty_flip_buffer_push);
 
@@ -529,6 +529,6 @@ void tty_buffer_init(struct tty_struct *tty)
 	tty->buf.tail = NULL;
 	tty->buf.free = NULL;
 	tty->buf.memory_used = 0;
-	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc);
+	INIT_WORK(&tty->buf.work, flush_to_ldisc);
 }
 
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 0fc564a..e19e136 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -529,7 +529,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 static int tty_ldisc_halt(struct tty_struct *tty)
 {
 	clear_bit(TTY_LDISC, &tty->flags);
-	return cancel_delayed_work_sync(&tty->buf.work);
+	return cancel_work_sync(&tty->buf.work);
 }
 
 /**
@@ -542,7 +542,7 @@ static void tty_ldisc_flush_works(struct tty_struct *tty)
 {
 	flush_work_sync(&tty->hangup_work);
 	flush_work_sync(&tty->SAK_work);
-	flush_delayed_work_sync(&tty->buf.work);
+	flush_work_sync(&tty->buf.work);
 }
 
 /**
@@ -722,9 +722,9 @@ enable:
 	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
 	if (work)
-		schedule_delayed_work(&tty->buf.work, 1);
+		schedule_work(&tty->buf.work);
 	if (o_work)
-		schedule_delayed_work(&o_tty->buf.work, 1);
+		schedule_work(&o_tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 	tty_unlock();
 	return retval;
@@ -830,12 +830,12 @@ void tty_ldisc_hangup(struct tty_struct *tty)
 
 	/*
 	 * this is like tty_ldisc_halt, but we need to give up
-	 * the BTM before calling cancel_delayed_work_sync,
-	 * which may need to wait for another function taking the BTM
+	 * the BTM before calling cancel_work_sync, which may
+	 * need to wait for another function taking the BTM
 	 */
 	clear_bit(TTY_LDISC, &tty->flags);
 	tty_unlock();
-	cancel_delayed_work_sync(&tty->buf.work);
+	cancel_work_sync(&tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 
 	tty_lock();
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index 4b0761c..ec2d17b 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -159,7 +159,7 @@ static inline void con_schedule_flip(struct tty_struct *t)
 	if (t->buf.tail != NULL)
 		t->buf.tail->commit = t->buf.tail->used;
 	spin_unlock_irqrestore(&t->buf.lock, flags);
-	schedule_delayed_work(&t->buf.work, 0);
+	schedule_work(&t->buf.work);
 }
 
 #endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4e53d464..9f469c7 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -82,7 +82,7 @@ struct tty_buffer {
 
 
 struct tty_bufhead {
-	struct delayed_work work;
+	struct work_struct work;
 	spinlock_t lock;
 	struct tty_buffer *head;	/* Queue head */
 	struct tty_buffer *tail;	/* Active buffer */
-- 
cgit v0.10.2


From c7a1fcd8e6e0c3c8f4f8f74fc926ff04da3bf7a7 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 22 Mar 2011 16:30:07 -0700
Subject: include/asm-generic/unistd.h: fix syncfs syscall number

syncfs() is duplicating name_to_handle_at() due to a merging mistake.

Cc: Sage Weil <sage@newdream.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 176b825..07c40d5 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -652,7 +652,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
 __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
-#define __NR_syncfs 264
+#define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 
 #undef __NR_syscalls
-- 
cgit v0.10.2


From 52c50567d8ab0a0a87f12cceaa4194967854f0bd Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 22 Mar 2011 16:30:08 -0700
Subject: mm: swap: unlock swapfile inode mutex before closing file on bad
 swapfiles

If an administrator tries to swapon a file backed by NFS, the inode mutex is
taken (as it is for any swapfile) but later identified to be a bad swapfile
due to the lack of bmap and tries to cleanup. During cleanup, an attempt is
made to close the file but with inode->i_mutex still held. Closing an NFS
file syncs it which tries to acquire the inode mutex leading to deadlock. If
lockdep is enabled the following appears on the console;

    =============================================
    [ INFO: possible recursive locking detected ]
    2.6.38-rc8-autobuild #1
    ---------------------------------------------
    swapon/2192 is trying to acquire lock:
     (&sb->s_type->i_mutex_key#13){+.+.+.}, at: vfs_fsync_range+0x47/0x7c

    but task is already holding lock:
     (&sb->s_type->i_mutex_key#13){+.+.+.}, at: sys_swapon+0x28d/0xae7

    other info that might help us debug this:
    1 lock held by swapon/2192:
     #0:  (&sb->s_type->i_mutex_key#13){+.+.+.}, at: sys_swapon+0x28d/0xae7

    stack backtrace:
    Pid: 2192, comm: swapon Not tainted 2.6.38-rc8-autobuild #1
    Call Trace:
        __lock_acquire+0x2eb/0x1623
        find_get_pages_tag+0x14a/0x174
        pagevec_lookup_tag+0x25/0x2e
        vfs_fsync_range+0x47/0x7c
        lock_acquire+0xd3/0x100
        vfs_fsync_range+0x47/0x7c
        nfs_flush_one+0x0/0xdf [nfs]
        mutex_lock_nested+0x40/0x2b1
        vfs_fsync_range+0x47/0x7c
        vfs_fsync_range+0x47/0x7c
        vfs_fsync+0x1c/0x1e
        nfs_file_flush+0x64/0x69 [nfs]
        filp_close+0x43/0x72
        sys_swapon+0xa39/0xae7
        sysret_check+0x2e/0x69
        system_call_fastpath+0x16/0x1b

This patch releases the mutex if its held before calling filep_close()
so swapon fails as expected without deadlock when the swapfile is backed
by NFS.  If accepted for 2.6.39, it should also be considered a -stable
candidate for 2.6.38 and 2.6.37.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: <stable@kernel.org>		[2.6.37+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0341c57..6d6d28c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2149,8 +2149,13 @@ bad_swap_2:
 	p->flags = 0;
 	spin_unlock(&swap_lock);
 	vfree(swap_map);
-	if (swap_file)
+	if (swap_file) {
+		if (did_down) {
+			mutex_unlock(&inode->i_mutex);
+			did_down = 0;
+		}
 		filp_close(swap_file, NULL);
+	}
 out:
 	if (page && !IS_ERR(page)) {
 		kunmap(page);
-- 
cgit v0.10.2


From 3a5dda7a17cf3706f79b86293f29db02d61e0d48 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:30:09 -0700
Subject: oom: prevent unnecessary oom kills or kernel panics

This patch prevents unnecessary oom kills or kernel panics by reverting
two commits:

	495789a5 (oom: make oom_score to per-process value)
	cef1d352 (oom: multi threaded process coredump don't make deadlock)

First, 495789a5 (oom: make oom_score to per-process value) ignores the
fact that all threads in a thread group do not necessarily exit at the
same time.

It is imperative that select_bad_process() detect threads that are in the
exit path, specifically those with PF_EXITING set, to prevent needlessly
killing additional tasks.  If a process is oom killed and the thread group
leader exits, select_bad_process() cannot detect the other threads that
are PF_EXITING by iterating over only processes.  Thus, it currently
chooses another task unnecessarily for oom kill or panics the machine when
nothing else is eligible.

By iterating over threads instead, it is possible to detect threads that
are exiting and nominate them for oom kill so they get access to memory
reserves.

Second, cef1d352 (oom: multi threaded process coredump don't make
deadlock) erroneously avoids making the oom killer a no-op when an
eligible thread other than current isfound to be exiting.  We want to
detect this situation so that we may allow that exiting thread time to
exit and free its memory; if it is able to exit on its own, that should
free memory so current is no loner oom.  If it is not able to exit on its
own, the oom killer will nominate it for oom kill which, in this case,
only means it will get access to memory reserves.

Without this change, it is easy for the oom killer to unnecessarily target
tasks when all threads of a victim don't exit before the thread group
leader or, in the worst case, panic the machine.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7dcca55..b5a7b5f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -292,11 +292,11 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		unsigned long totalpages, struct mem_cgroup *mem,
 		const nodemask_t *nodemask)
 {
-	struct task_struct *p;
+	struct task_struct *g, *p;
 	struct task_struct *chosen = NULL;
 	*ppoints = 0;
 
-	for_each_process(p) {
+	do_each_thread(g, p) {
 		unsigned int points;
 
 		if (oom_unkillable_task(p, mem, nodemask))
@@ -324,7 +324,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 * the process of exiting and releasing its resources.
 		 * Otherwise we could get an easy OOM deadlock.
 		 */
-		if (thread_group_empty(p) && (p->flags & PF_EXITING) && p->mm) {
+		if ((p->flags & PF_EXITING) && p->mm) {
 			if (p != current)
 				return ERR_PTR(-1UL);
 
@@ -337,7 +337,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 			chosen = p;
 			*ppoints = points;
 		}
-	}
+	} while_each_thread(g, p);
 
 	return chosen;
 }
-- 
cgit v0.10.2


From 30e2b41f20b6238f51e7cffb879c7a0f0073f5fe Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 22 Mar 2011 16:30:11 -0700
Subject: oom: skip zombies when iterating tasklist

We shouldn't defer oom killing if a thread has already detached its ->mm
and still has TIF_MEMDIE set.  Memory needs to be freed, so find kill
other threads that pin the same ->mm or find another task to kill.

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b5a7b5f..d7f345e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -299,6 +299,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	do_each_thread(g, p) {
 		unsigned int points;
 
+		if (!p->mm)
+			continue;
 		if (oom_unkillable_task(p, mem, nodemask))
 			continue;
 
@@ -324,7 +326,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 * the process of exiting and releasing its resources.
 		 * Otherwise we could get an easy OOM deadlock.
 		 */
-		if ((p->flags & PF_EXITING) && p->mm) {
+		if (p->flags & PF_EXITING) {
 			if (p != current)
 				return ERR_PTR(-1UL);
 
-- 
cgit v0.10.2


From edd45544c6f09550df0a5491aa8a07af24767e73 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:30:12 -0700
Subject: oom: avoid deferring oom killer if exiting task is being traced

The oom killer naturally defers killing anything if it finds an eligible
task that is already exiting and has yet to detach its ->mm.  This avoids
unnecessarily killing tasks when one is already in the exit path and may
free enough memory that the oom killer is no longer needed.  This is
detected by PF_EXITING since threads that have already detached its ->mm
are no longer considered at all.

The problem with always deferring when a thread is PF_EXITING, however, is
that it may never actually exit when being traced, specifically if another
task is tracing it with PTRACE_O_TRACEEXIT.  The oom killer does not want
to defer in this case since there is no guarantee that thread will ever
exit without intervention.

This patch will now only defer the oom killer when a thread is PF_EXITING
and no ptracer has stopped its progress in the exit path.  It also ensures
that a child is sacrificed for the chosen parent only if it has a
different ->mm as the comment implies: this ensures that the thread group
leader is always targeted appropriately.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: <stable@kernel.org>		[2.6.38.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d7f345e..33b5861 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -31,6 +31,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mempolicy.h>
 #include <linux/security.h>
+#include <linux/ptrace.h>
 
 int sysctl_panic_on_oom;
 int sysctl_oom_kill_allocating_task;
@@ -316,22 +317,29 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		if (test_tsk_thread_flag(p, TIF_MEMDIE))
 			return ERR_PTR(-1UL);
 
-		/*
-		 * This is in the process of releasing memory so wait for it
-		 * to finish before killing some other task by mistake.
-		 *
-		 * However, if p is the current task, we allow the 'kill' to
-		 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
-		 * which will allow it to gain access to memory reserves in
-		 * the process of exiting and releasing its resources.
-		 * Otherwise we could get an easy OOM deadlock.
-		 */
 		if (p->flags & PF_EXITING) {
-			if (p != current)
-				return ERR_PTR(-1UL);
-
-			chosen = p;
-			*ppoints = 1000;
+			/*
+			 * If p is the current task and is in the process of
+			 * releasing memory, we allow the "kill" to set
+			 * TIF_MEMDIE, which will allow it to gain access to
+			 * memory reserves.  Otherwise, it may stall forever.
+			 *
+			 * The loop isn't broken here, however, in case other
+			 * threads are found to have already been oom killed.
+			 */
+			if (p == current) {
+				chosen = p;
+				*ppoints = 1000;
+			} else {
+				/*
+				 * If this task is not being ptraced on exit,
+				 * then wait for it to finish before killing
+				 * some other task unnecessarily.
+				 */
+				if (!(task_ptrace(p->group_leader) &
+							PT_TRACE_EXIT))
+					return ERR_PTR(-1UL);
+			}
 		}
 
 		points = oom_badness(p, mem, nodemask, totalpages);
@@ -493,6 +501,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 		list_for_each_entry(child, &t->children, sibling) {
 			unsigned int child_points;
 
+			if (child->mm == p->mm)
+				continue;
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-- 
cgit v0.10.2


From 8d2587970b8bdf7c8d9208e3f4bb93182aef1a0f Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Tue, 22 Mar 2011 16:30:13 -0700
Subject: cgroups: if you list_empty() a head then don't list_del() it

list_del() leaves poison in the prev and next pointers.  The next
list_empty() will compare those poisons, and say the list isn't empty.
Any list operations that assume the node is on a list because of such a
check will be fooled into dereferencing poison.  One needs to INIT the
node after the del, and fortunately there's already a wrapper for that -
list_del_init().

Some of the dels are followed by deallocations, so can be ignored, and one
can be merged with an add to make a move.  Apart from that, I erred on the
side of caution in making nodes list_empty()-queriable.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Reviewed-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 95362d1..e31b220 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	/* Update the css_set linked lists if we're using them */
 	write_lock(&css_set_lock);
-	if (!list_empty(&tsk->cg_list)) {
-		list_del(&tsk->cg_list);
-		list_add(&tsk->cg_list, &newcg->tasks);
-	}
+	if (!list_empty(&tsk->cg_list))
+		list_move(&tsk->cg_list, &newcg->tasks);
 	write_unlock(&css_set_lock);
 
 	for_each_subsys(root, ss) {
@@ -3655,12 +3653,12 @@ again:
 	spin_lock(&release_list_lock);
 	set_bit(CGRP_REMOVED, &cgrp->flags);
 	if (!list_empty(&cgrp->release_list))
-		list_del(&cgrp->release_list);
+		list_del_init(&cgrp->release_list);
 	spin_unlock(&release_list_lock);
 
 	cgroup_lock_hierarchy(cgrp->root);
 	/* delete this cgroup from parent->children */
-	list_del(&cgrp->sibling);
+	list_del_init(&cgrp->sibling);
 	cgroup_unlock_hierarchy(cgrp->root);
 
 	d = dget(cgrp->dentry);
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 	subsys[ss->subsys_id] = NULL;
 
 	/* remove subsystem from rootnode's list of subsystems */
-	list_del(&ss->sibling);
+	list_del_init(&ss->sibling);
 
 	/*
 	 * disentangle the css from all css_sets attached to the dummytop. as
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	if (!list_empty(&tsk->cg_list)) {
 		write_lock(&css_set_lock);
 		if (!list_empty(&tsk->cg_list))
-			list_del(&tsk->cg_list);
+			list_del_init(&tsk->cg_list);
 		write_unlock(&css_set_lock);
 	}
 
-- 
cgit v0.10.2


From 551ea73838c5eba18d673bd4e7fb0fec77cdcb8a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Mar 2011 16:30:14 -0700
Subject: leds: convert bd2802 driver to dev_pm_ops

There is a move to deprecate bus-specific PM operations and move to using
dev_pm_ops instead in order to reduce the amount of boilerplate code in
buses and facilitiate updates to the PM core.  Do this move for the bs2802
driver.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Kim Kyuwon <q1.kim@samsung.com>
Cc: Kim Kyuwon <chammoru@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c
index 19dc4b6..3ebe382 100644
--- a/drivers/leds/leds-bd2802.c
+++ b/drivers/leds/leds-bd2802.c
@@ -19,7 +19,7 @@
 #include <linux/leds.h>
 #include <linux/leds-bd2802.h>
 #include <linux/slab.h>
-
+#include <linux/pm.h>
 
 #define LED_CTL(rgb2en, rgb1en) ((rgb2en) << 4 | ((rgb1en) << 0))
 
@@ -319,20 +319,6 @@ static void bd2802_turn_off(struct bd2802_led *led, enum led_ids id,
 	bd2802_update_state(led, id, color, BD2802_OFF);
 }
 
-static void bd2802_restore_state(struct bd2802_led *led)
-{
-	int i;
-
-	for (i = 0; i < LED_NUM; i++) {
-		if (led->led[i].r)
-			bd2802_turn_on(led, i, RED, led->led[i].r);
-		if (led->led[i].g)
-			bd2802_turn_on(led, i, GREEN, led->led[i].g);
-		if (led->led[i].b)
-			bd2802_turn_on(led, i, BLUE, led->led[i].b);
-	}
-}
-
 #define BD2802_SET_REGISTER(reg_addr, reg_name)				\
 static ssize_t bd2802_store_reg##reg_addr(struct device *dev,		\
 	struct device_attribute *attr, const char *buf, size_t count)	\
@@ -761,8 +747,25 @@ static int __exit bd2802_remove(struct i2c_client *client)
 	return 0;
 }
 
-static int bd2802_suspend(struct i2c_client *client, pm_message_t mesg)
+#ifdef CONFIG_PM
+
+static void bd2802_restore_state(struct bd2802_led *led)
 {
+	int i;
+
+	for (i = 0; i < LED_NUM; i++) {
+		if (led->led[i].r)
+			bd2802_turn_on(led, i, RED, led->led[i].r);
+		if (led->led[i].g)
+			bd2802_turn_on(led, i, GREEN, led->led[i].g);
+		if (led->led[i].b)
+			bd2802_turn_on(led, i, BLUE, led->led[i].b);
+	}
+}
+
+static int bd2802_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
 	struct bd2802_led *led = i2c_get_clientdata(client);
 
 	gpio_set_value(led->pdata->reset_gpio, 0);
@@ -770,8 +773,9 @@ static int bd2802_suspend(struct i2c_client *client, pm_message_t mesg)
 	return 0;
 }
 
-static int bd2802_resume(struct i2c_client *client)
+static int bd2802_resume(struct device *dev)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct bd2802_led *led = i2c_get_clientdata(client);
 
 	if (!bd2802_is_all_off(led) || led->adf_on) {
@@ -782,6 +786,12 @@ static int bd2802_resume(struct i2c_client *client)
 	return 0;
 }
 
+static SIMPLE_DEV_PM_OPS(bd2802_pm, bd2802_suspend, bd2802_resume);
+#define BD2802_PM (&bd2802_pm)
+#else		/* CONFIG_PM */
+#define BD2802_PM NULL
+#endif
+
 static const struct i2c_device_id bd2802_id[] = {
 	{ "BD2802", 0 },
 	{ }
@@ -791,11 +801,10 @@ MODULE_DEVICE_TABLE(i2c, bd2802_id);
 static struct i2c_driver bd2802_i2c_driver = {
 	.driver	= {
 		.name	= "BD2802",
+		.pm	= BD2802_PM,
 	},
 	.probe		= bd2802_probe,
 	.remove		= __exit_p(bd2802_remove),
-	.suspend	= bd2802_suspend,
-	.resume		= bd2802_resume,
 	.id_table	= bd2802_id,
 };
 
-- 
cgit v0.10.2


From b1e6b7068f026e88257c20522555c78122e5a14d Mon Sep 17 00:00:00 2001
From: Shreshtha Kumar Sahu <shreshthakumar.sahu@stericsson.com>
Date: Tue, 22 Mar 2011 16:30:16 -0700
Subject: leds: add driver for LM3530 ALS

Simple backlight driver for National Semiconductor LM3530.  Presently only
manual mode is supported, PWM and ALS support to be added.

Signed-off-by: Shreshtha Kumar Sahu <shreshthakumar.sahu@stericsson.com>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 6f190f4..9bec869 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -34,6 +34,16 @@ config LEDS_ATMEL_PWM
 	  This option enables support for LEDs driven using outputs
 	  of the dedicated PWM controller found on newer Atmel SOCs.
 
+config LEDS_LM3530
+	tristate "LCD Backlight driver for LM3530"
+	depends on LEDS_CLASS
+	depends on I2C
+	help
+	  This option enables support for the LCD backlight using
+	  LM3530 ambient light sensor chip. This ALS chip can be
+	  controlled manually or using PWM input or using ambient
+	  light automatically.
+
 config LEDS_LOCOMO
 	tristate "LED Support for Locomo device"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index aae6989..39c80fc 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_LEDS_88PM860X)		+= leds-88pm860x.o
 obj-$(CONFIG_LEDS_ATMEL_PWM)		+= leds-atmel-pwm.o
 obj-$(CONFIG_LEDS_BD2802)		+= leds-bd2802.o
 obj-$(CONFIG_LEDS_LOCOMO)		+= leds-locomo.o
+obj-$(CONFIG_LEDS_LM3530)		+= leds-lm3530.o
 obj-$(CONFIG_LEDS_MIKROTIK_RB532)	+= leds-rb532.o
 obj-$(CONFIG_LEDS_S3C24XX)		+= leds-s3c24xx.o
 obj-$(CONFIG_LEDS_AMS_DELTA)		+= leds-ams-delta.o
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
new file mode 100644
index 0000000..e7089a1
--- /dev/null
+++ b/drivers/leds/leds-lm3530.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson SA.
+ * Copyright (C) 2009 Motorola, Inc.
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * Simple driver for National Semiconductor LM3530 Backlight driver chip
+ *
+ * Author: Shreshtha Kumar SAHU <shreshthakumar.sahu@stericsson.com>
+ * based on leds-lm3530.c by Dan Murphy <D.Murphy@motorola.com>
+ */
+
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/led-lm3530.h>
+#include <linux/types.h>
+
+#define LM3530_LED_DEV "lcd-backlight"
+#define LM3530_NAME "lm3530-led"
+
+#define LM3530_GEN_CONFIG		0x10
+#define LM3530_ALS_CONFIG		0x20
+#define LM3530_BRT_RAMP_RATE		0x30
+#define LM3530_ALS_ZONE_REG		0x40
+#define LM3530_ALS_IMP_SELECT		0x41
+#define LM3530_BRT_CTRL_REG		0xA0
+#define LM3530_ALS_ZB0_REG		0x60
+#define LM3530_ALS_ZB1_REG		0x61
+#define LM3530_ALS_ZB2_REG		0x62
+#define LM3530_ALS_ZB3_REG		0x63
+#define LM3530_ALS_Z0T_REG		0x70
+#define LM3530_ALS_Z1T_REG		0x71
+#define LM3530_ALS_Z2T_REG		0x72
+#define LM3530_ALS_Z3T_REG		0x73
+#define LM3530_ALS_Z4T_REG		0x74
+#define LM3530_REG_MAX			15
+
+/* General Control Register */
+#define LM3530_EN_I2C_SHIFT		(0)
+#define LM3530_RAMP_LAW_SHIFT		(1)
+#define LM3530_MAX_CURR_SHIFT		(2)
+#define LM3530_EN_PWM_SHIFT		(5)
+#define LM3530_PWM_POL_SHIFT		(6)
+#define LM3530_EN_PWM_SIMPLE_SHIFT	(7)
+
+#define LM3530_ENABLE_I2C		(1 << LM3530_EN_I2C_SHIFT)
+#define LM3530_ENABLE_PWM		(1 << LM3530_EN_PWM_SHIFT)
+#define LM3530_POL_LOW			(1 << LM3530_PWM_POL_SHIFT)
+#define LM3530_ENABLE_PWM_SIMPLE	(1 << LM3530_EN_PWM_SIMPLE_SHIFT)
+
+/* ALS Config Register Options */
+#define LM3530_ALS_AVG_TIME_SHIFT	(0)
+#define LM3530_EN_ALS_SHIFT		(3)
+#define LM3530_ALS_SEL_SHIFT		(5)
+
+#define LM3530_ENABLE_ALS		(3 << LM3530_EN_ALS_SHIFT)
+
+/* Brightness Ramp Rate Register */
+#define LM3530_BRT_RAMP_FALL_SHIFT	(0)
+#define LM3530_BRT_RAMP_RISE_SHIFT	(3)
+
+/* ALS Resistor Select */
+#define LM3530_ALS1_IMP_SHIFT		(0)
+#define LM3530_ALS2_IMP_SHIFT		(4)
+
+/* Zone Boundary Register defaults */
+#define LM3530_DEF_ZB_0			(0x33)
+#define LM3530_DEF_ZB_1			(0x66)
+#define LM3530_DEF_ZB_2			(0x99)
+#define LM3530_DEF_ZB_3			(0xCC)
+
+/* Zone Target Register defaults */
+#define LM3530_DEF_ZT_0			(0x19)
+#define LM3530_DEF_ZT_1			(0x33)
+#define LM3530_DEF_ZT_2			(0x4C)
+#define LM3530_DEF_ZT_3			(0x66)
+#define LM3530_DEF_ZT_4			(0x7F)
+
+struct lm3530_mode_map {
+	const char *mode;
+	enum lm3530_mode mode_val;
+};
+
+static struct lm3530_mode_map mode_map[] = {
+	{ "man", LM3530_BL_MODE_MANUAL },
+	{ "als", LM3530_BL_MODE_ALS },
+	{ "pwm", LM3530_BL_MODE_PWM },
+};
+
+/**
+ * struct lm3530_data
+ * @led_dev: led class device
+ * @client: i2c client
+ * @pdata: LM3530 platform data
+ * @mode: mode of operation - manual, ALS, PWM
+ */
+struct lm3530_data {
+	struct led_classdev led_dev;
+	struct i2c_client *client;
+	struct lm3530_platform_data *pdata;
+	enum lm3530_mode mode;
+};
+
+static const u8 lm3530_reg[LM3530_REG_MAX] = {
+	LM3530_GEN_CONFIG,
+	LM3530_ALS_CONFIG,
+	LM3530_BRT_RAMP_RATE,
+	LM3530_ALS_ZONE_REG,
+	LM3530_ALS_IMP_SELECT,
+	LM3530_BRT_CTRL_REG,
+	LM3530_ALS_ZB0_REG,
+	LM3530_ALS_ZB1_REG,
+	LM3530_ALS_ZB2_REG,
+	LM3530_ALS_ZB3_REG,
+	LM3530_ALS_Z0T_REG,
+	LM3530_ALS_Z1T_REG,
+	LM3530_ALS_Z2T_REG,
+	LM3530_ALS_Z3T_REG,
+	LM3530_ALS_Z4T_REG,
+};
+
+static int lm3530_get_mode_from_str(const char *str)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mode_map); i++)
+		if (sysfs_streq(str, mode_map[i].mode))
+			return mode_map[i].mode_val;
+
+	return -1;
+}
+
+static int lm3530_init_registers(struct lm3530_data *drvdata)
+{
+	int ret = 0;
+	int i;
+	u8 gen_config;
+	u8 als_config = 0;
+	u8 brt_ramp;
+	u8 als_imp_sel = 0;
+	u8 brightness;
+	u8 reg_val[LM3530_REG_MAX];
+	struct lm3530_platform_data *pltfm = drvdata->pdata;
+	struct i2c_client *client = drvdata->client;
+
+	gen_config = (pltfm->brt_ramp_law << LM3530_RAMP_LAW_SHIFT) |
+			((pltfm->max_current & 7) << LM3530_MAX_CURR_SHIFT);
+
+	if (drvdata->mode == LM3530_BL_MODE_MANUAL ||
+	    drvdata->mode == LM3530_BL_MODE_ALS)
+		gen_config |= (LM3530_ENABLE_I2C);
+
+	if (drvdata->mode == LM3530_BL_MODE_ALS) {
+		als_config =
+			(pltfm->als_avrg_time << LM3530_ALS_AVG_TIME_SHIFT) |
+			(LM3530_ENABLE_ALS) |
+			(pltfm->als_input_mode << LM3530_ALS_SEL_SHIFT);
+
+		als_imp_sel =
+			(pltfm->als1_resistor_sel << LM3530_ALS1_IMP_SHIFT) |
+			(pltfm->als2_resistor_sel << LM3530_ALS2_IMP_SHIFT);
+	}
+
+	if (drvdata->mode == LM3530_BL_MODE_PWM)
+		gen_config |= (LM3530_ENABLE_PWM) |
+				(pltfm->pwm_pol_hi << LM3530_PWM_POL_SHIFT) |
+				(LM3530_ENABLE_PWM_SIMPLE);
+
+	brt_ramp = (pltfm->brt_ramp_fall << LM3530_BRT_RAMP_FALL_SHIFT) |
+			(pltfm->brt_ramp_rise << LM3530_BRT_RAMP_RISE_SHIFT);
+
+	brightness = pltfm->brt_val;
+
+	reg_val[0] = gen_config;	/* LM3530_GEN_CONFIG */
+	reg_val[1] = als_config;	/* LM3530_ALS_CONFIG */
+	reg_val[2] = brt_ramp;		/* LM3530_BRT_RAMP_RATE */
+	reg_val[3] = 0x00;		/* LM3530_ALS_ZONE_REG */
+	reg_val[4] = als_imp_sel;	/* LM3530_ALS_IMP_SELECT */
+	reg_val[5] = brightness;	/* LM3530_BRT_CTRL_REG */
+	reg_val[6] = LM3530_DEF_ZB_0;	/* LM3530_ALS_ZB0_REG */
+	reg_val[7] = LM3530_DEF_ZB_1;	/* LM3530_ALS_ZB1_REG */
+	reg_val[8] = LM3530_DEF_ZB_2;	/* LM3530_ALS_ZB2_REG */
+	reg_val[9] = LM3530_DEF_ZB_3;	/* LM3530_ALS_ZB3_REG */
+	reg_val[10] = LM3530_DEF_ZT_0;	/* LM3530_ALS_Z0T_REG */
+	reg_val[11] = LM3530_DEF_ZT_1;	/* LM3530_ALS_Z1T_REG */
+	reg_val[12] = LM3530_DEF_ZT_2;	/* LM3530_ALS_Z2T_REG */
+	reg_val[13] = LM3530_DEF_ZT_3;	/* LM3530_ALS_Z3T_REG */
+	reg_val[14] = LM3530_DEF_ZT_4;	/* LM3530_ALS_Z4T_REG */
+
+	for (i = 0; i < LM3530_REG_MAX; i++) {
+		ret = i2c_smbus_write_byte_data(client,
+				lm3530_reg[i], reg_val[i]);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void lm3530_brightness_set(struct led_classdev *led_cdev,
+				     enum led_brightness brt_val)
+{
+	int err;
+	struct lm3530_data *drvdata =
+	    container_of(led_cdev, struct lm3530_data, led_dev);
+
+	switch (drvdata->mode) {
+	case LM3530_BL_MODE_MANUAL:
+
+		/* set the brightness in brightness control register*/
+		err = i2c_smbus_write_byte_data(drvdata->client,
+				LM3530_BRT_CTRL_REG, brt_val / 2);
+		if (err)
+			dev_err(&drvdata->client->dev,
+				"Unable to set brightness: %d\n", err);
+		break;
+	case LM3530_BL_MODE_ALS:
+		break;
+	case LM3530_BL_MODE_PWM:
+		break;
+	default:
+		break;
+	}
+}
+
+
+static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
+				   *attr, const char *buf, size_t size)
+{
+	int err;
+	struct i2c_client *client = container_of(
+					dev->parent, struct i2c_client, dev);
+	struct lm3530_data *drvdata = i2c_get_clientdata(client);
+	int mode;
+
+	mode = lm3530_get_mode_from_str(buf);
+	if (mode < 0) {
+		dev_err(dev, "Invalid mode\n");
+		return -EINVAL;
+	}
+
+	if (mode == LM3530_BL_MODE_MANUAL)
+		drvdata->mode = LM3530_BL_MODE_MANUAL;
+	else if (mode == LM3530_BL_MODE_ALS)
+		drvdata->mode = LM3530_BL_MODE_ALS;
+	else if (mode == LM3530_BL_MODE_PWM) {
+		dev_err(dev, "PWM mode not supported\n");
+		return -EINVAL;
+	}
+
+	err = lm3530_init_registers(drvdata);
+	if (err) {
+		dev_err(dev, "Setting %s Mode failed :%d\n", buf, err);
+		return err;
+	}
+
+	return sizeof(drvdata->mode);
+}
+
+static DEVICE_ATTR(mode, 0644, NULL, lm3530_mode_set);
+
+static int __devinit lm3530_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct lm3530_platform_data *pdata = client->dev.platform_data;
+	struct lm3530_data *drvdata;
+	int err = 0;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "platform data required\n");
+		err = -ENODEV;
+		goto err_out;
+	}
+
+	/* BL mode */
+	if (pdata->mode > LM3530_BL_MODE_PWM) {
+		dev_err(&client->dev, "Illegal Mode request\n");
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev, "I2C_FUNC_I2C not supported\n");
+		err = -EIO;
+		goto err_out;
+	}
+
+	drvdata = kzalloc(sizeof(struct lm3530_data), GFP_KERNEL);
+	if (drvdata == NULL) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	drvdata->mode = pdata->mode;
+	drvdata->client = client;
+	drvdata->pdata = pdata;
+	drvdata->led_dev.name = LM3530_LED_DEV;
+	drvdata->led_dev.brightness_set = lm3530_brightness_set;
+
+	i2c_set_clientdata(client, drvdata);
+
+	err = lm3530_init_registers(drvdata);
+	if (err < 0) {
+		dev_err(&client->dev, "Register Init failed: %d\n", err);
+		err = -ENODEV;
+		goto err_reg_init;
+	}
+
+	err = led_classdev_register((struct device *)
+				      &client->dev, &drvdata->led_dev);
+	if (err < 0) {
+		dev_err(&client->dev, "Register led class failed: %d\n", err);
+		err = -ENODEV;
+		goto err_class_register;
+	}
+
+	err = device_create_file(drvdata->led_dev.dev, &dev_attr_mode);
+	if (err < 0) {
+		dev_err(&client->dev, "File device creation failed: %d\n", err);
+		err = -ENODEV;
+		goto err_create_file;
+	}
+
+	return 0;
+
+err_create_file:
+	led_classdev_unregister(&drvdata->led_dev);
+err_class_register:
+err_reg_init:
+	kfree(drvdata);
+err_out:
+	return err;
+}
+
+static int __devexit lm3530_remove(struct i2c_client *client)
+{
+	struct lm3530_data *drvdata = i2c_get_clientdata(client);
+
+	device_remove_file(drvdata->led_dev.dev, &dev_attr_mode);
+	led_classdev_unregister(&drvdata->led_dev);
+	kfree(drvdata);
+	return 0;
+}
+
+static const struct i2c_device_id lm3530_id[] = {
+	{LM3530_NAME, 0},
+	{}
+};
+
+static struct i2c_driver lm3530_i2c_driver = {
+	.probe = lm3530_probe,
+	.remove = lm3530_remove,
+	.id_table = lm3530_id,
+	.driver = {
+		.name = LM3530_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init lm3530_init(void)
+{
+	return i2c_add_driver(&lm3530_i2c_driver);
+}
+
+static void __exit lm3530_exit(void)
+{
+	i2c_del_driver(&lm3530_i2c_driver);
+}
+
+module_init(lm3530_init);
+module_exit(lm3530_exit);
+
+MODULE_DESCRIPTION("Back Light driver for LM3530");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shreshtha Kumar SAHU <shreshthakumar.sahu@stericsson.com>");
diff --git a/include/linux/led-lm3530.h b/include/linux/led-lm3530.h
new file mode 100644
index 0000000..bb69d20
--- /dev/null
+++ b/include/linux/led-lm3530.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2011 ST-Ericsson SA.
+ * Copyright (C) 2009 Motorola, Inc.
+ *
+ * License Terms: GNU General Public License v2
+ *
+ * Simple driver for National Semiconductor LM35330 Backlight driver chip
+ *
+ * Author: Shreshtha Kumar SAHU <shreshthakumar.sahu@stericsson.com>
+ * based on leds-lm3530.c by Dan Murphy <D.Murphy@motorola.com>
+ */
+
+#ifndef _LINUX_LED_LM3530_H__
+#define _LINUX_LED_LM3530_H__
+
+#define LM3530_FS_CURR_5mA		(0) /* Full Scale Current */
+#define LM3530_FS_CURR_8mA		(1)
+#define LM3530_FS_CURR_12mA		(2)
+#define LM3530_FS_CURR_15mA		(3)
+#define LM3530_FS_CURR_19mA		(4)
+#define LM3530_FS_CURR_22mA		(5)
+#define LM3530_FS_CURR_26mA		(6)
+#define LM3530_FS_CURR_29mA		(7)
+
+#define LM3530_ALS_AVRG_TIME_32ms	(0) /* ALS Averaging Time */
+#define LM3530_ALS_AVRG_TIME_64ms	(1)
+#define LM3530_ALS_AVRG_TIME_128ms	(2)
+#define LM3530_ALS_AVRG_TIME_256ms	(3)
+#define LM3530_ALS_AVRG_TIME_512ms	(4)
+#define LM3530_ALS_AVRG_TIME_1024ms	(5)
+#define LM3530_ALS_AVRG_TIME_2048ms	(6)
+#define LM3530_ALS_AVRG_TIME_4096ms	(7)
+
+#define LM3530_RAMP_TIME_1ms		(0) /* Brigtness Ramp Time */
+#define LM3530_RAMP_TIME_130ms		(1) /* Max to 0 and vice versa */
+#define LM3530_RAMP_TIME_260ms		(2)
+#define LM3530_RAMP_TIME_520ms		(3)
+#define LM3530_RAMP_TIME_1s		(4)
+#define LM3530_RAMP_TIME_2s		(5)
+#define LM3530_RAMP_TIME_4s		(6)
+#define LM3530_RAMP_TIME_8s		(7)
+
+/* ALS Resistor Select */
+#define LM3530_ALS_IMPD_Z		(0x00) /* ALS Impedence */
+#define LM3530_ALS_IMPD_13_53kOhm	(0x01)
+#define LM3530_ALS_IMPD_9_01kOhm	(0x02)
+#define LM3530_ALS_IMPD_5_41kOhm	(0x03)
+#define LM3530_ALS_IMPD_2_27kOhm	(0x04)
+#define LM3530_ALS_IMPD_1_94kOhm	(0x05)
+#define LM3530_ALS_IMPD_1_81kOhm	(0x06)
+#define LM3530_ALS_IMPD_1_6kOhm		(0x07)
+#define LM3530_ALS_IMPD_1_138kOhm	(0x08)
+#define LM3530_ALS_IMPD_1_05kOhm	(0x09)
+#define LM3530_ALS_IMPD_1_011kOhm	(0x0A)
+#define LM3530_ALS_IMPD_941Ohm		(0x0B)
+#define LM3530_ALS_IMPD_759Ohm		(0x0C)
+#define LM3530_ALS_IMPD_719Ohm		(0x0D)
+#define LM3530_ALS_IMPD_700Ohm		(0x0E)
+#define LM3530_ALS_IMPD_667Ohm		(0x0F)
+
+enum lm3530_mode {
+	LM3530_BL_MODE_MANUAL = 0,	/* "man" */
+	LM3530_BL_MODE_ALS,		/* "als" */
+	LM3530_BL_MODE_PWM,		/* "pwm" */
+};
+
+/* ALS input select */
+enum lm3530_als_mode {
+	LM3530_INPUT_AVRG = 0,	/* ALS1 and ALS2 input average */
+	LM3530_INPUT_ALS1,	/* ALS1 Input */
+	LM3530_INPUT_ALS2,	/* ALS2 Input */
+	LM3530_INPUT_CEIL,	/* Max of ALS1 and ALS2 */
+};
+
+/**
+ * struct lm3530_platform_data
+ * @mode: mode of operation i.e. Manual, ALS or PWM
+ * @als_input_mode: select source of ALS input - ALS1/2 or average
+ * @max_current: full scale LED current
+ * @pwm_pol_hi: PWM input polarity - active high/active low
+ * @als_avrg_time: ALS input averaging time
+ * @brt_ramp_law: brightness mapping mode - exponential/linear
+ * @brt_ramp_fall: rate of fall of led current
+ * @brt_ramp_rise: rate of rise of led current
+ * @als1_resistor_sel: internal resistance from ALS1 input to ground
+ * @als2_resistor_sel: internal resistance from ALS2 input to ground
+ * @brt_val: brightness value (0-255)
+ */
+struct lm3530_platform_data {
+	enum lm3530_mode mode;
+	enum lm3530_als_mode als_input_mode;
+
+	u8 max_current;
+	bool pwm_pol_hi;
+	u8 als_avrg_time;
+
+	bool brt_ramp_law;
+	u8 brt_ramp_fall;
+	u8 brt_ramp_rise;
+
+	u8 als1_resistor_sel;
+	u8 als2_resistor_sel;
+
+	u8 brt_val;
+};
+
+#endif	/* _LINUX_LED_LM3530_H__ */
-- 
cgit v0.10.2


From 9517f925f2eb9ffca78b3f0f9389fc675bcb572c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 22 Mar 2011 16:30:17 -0700
Subject: leds: make *struct gpio_led_platform_data.leds const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And fix a typo.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/leds/leds-net5501.c b/drivers/leds/leds-net5501.c
index 1739557..7e764b8 100644
--- a/drivers/leds/leds-net5501.c
+++ b/drivers/leds/leds-net5501.c
@@ -19,7 +19,7 @@
 
 #include <asm/geode.h>
 
-static struct gpio_led net5501_leds[] = {
+static const struct gpio_led net5501_leds[] = {
 	{
 		.name = "error",
 		.gpio = 6,
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 0f19df9..383811d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -194,11 +194,11 @@ struct gpio_led {
 
 struct gpio_led_platform_data {
 	int 		num_leds;
-	struct gpio_led *leds;
+	const struct gpio_led *leds;
 
 #define GPIO_LED_NO_BLINK_LOW	0	/* No blink GPIO state low */
 #define GPIO_LED_NO_BLINK_HIGH	1	/* No blink GPIO state high */
-#define GPIO_LED_BLINK		2	/* Plase, blink */
+#define GPIO_LED_BLINK		2	/* Please, blink */
 	int		(*gpio_blink_set)(unsigned gpio, int state,
 					unsigned long *delay_on,
 					unsigned long *delay_off);
-- 
cgit v0.10.2


From 1baf0eb3975ab56e29d57984b7bd6c9cd360dbf3 Mon Sep 17 00:00:00 2001
From: Donghwa Lee <dh09.lee@samsung.com>
Date: Tue, 22 Mar 2011 16:30:18 -0700
Subject: drivers/vidfeo/backlight: ld9040 amoled driver support

Add a ld9040 amoled panel driver.

Signed-off-by: Donghwa Lee <dh09.lee@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index e54a337..39a4cd2 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -109,6 +109,14 @@ config LCD_S6E63M0
 	  If you have an S6E63M0 LCD Panel, say Y to enable its
 	  LCD control driver.
 
+config LCD_LD9040
+	tristate "LD9040 AMOLED LCD Driver"
+	depends on SPI && BACKLIGHT_CLASS_DEVICE
+	default n
+	help
+	  If you have an LD9040 Panel, say Y to enable its
+	  control driver.
+
 endif # LCD_CLASS_DEVICE
 
 #
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 44c0f81..457996c 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_LCD_VGG2432A4)	   += vgg2432a4.o
 obj-$(CONFIG_LCD_TDO24M)	   += tdo24m.o
 obj-$(CONFIG_LCD_TOSA)		   += tosa_lcd.o
 obj-$(CONFIG_LCD_S6E63M0)	+= s6e63m0.o
+obj-$(CONFIG_LCD_LD9040)	+= ld9040.o
 
 obj-$(CONFIG_BACKLIGHT_CLASS_DEVICE) += backlight.o
 obj-$(CONFIG_BACKLIGHT_ATMEL_PWM)    += atmel-pwm-bl.o
diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c
new file mode 100644
index 0000000..7281b25
--- /dev/null
+++ b/drivers/video/backlight/ld9040.c
@@ -0,0 +1,819 @@
+/*
+ * ld9040 AMOLED LCD panel driver.
+ *
+ * Copyright (c) 2011 Samsung Electronics
+ * Author: Donghwa Lee  <dh09.lee@samsung.com>
+ * Derived from drivers/video/backlight/s6e63m0.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ */
+
+#include <linux/wait.h>
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/lcd.h>
+#include <linux/backlight.h>
+
+#include "ld9040_gamma.h"
+
+#define SLEEPMSEC		0x1000
+#define ENDDEF			0x2000
+#define	DEFMASK			0xFF00
+#define COMMAND_ONLY		0xFE
+#define DATA_ONLY		0xFF
+
+#define MIN_BRIGHTNESS		0
+#define MAX_BRIGHTNESS		24
+#define power_is_on(pwr)	((pwr) <= FB_BLANK_NORMAL)
+
+struct ld9040 {
+	struct device			*dev;
+	struct spi_device		*spi;
+	unsigned int			power;
+	unsigned int			current_brightness;
+
+	struct lcd_device		*ld;
+	struct backlight_device		*bd;
+	struct lcd_platform_data	*lcd_pd;
+};
+
+static const unsigned short seq_swreset[] = {
+	0x01, COMMAND_ONLY,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_user_setting[] = {
+	0xF0, 0x5A,
+
+	DATA_ONLY, 0x5A,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_elvss_on[] = {
+	0xB1, 0x0D,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x16,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gtcon[] = {
+	0xF7, 0x09,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_panel_condition[] = {
+	0xF8, 0x05,
+
+	DATA_ONLY, 0x65,
+	DATA_ONLY, 0x96,
+	DATA_ONLY, 0x71,
+	DATA_ONLY, 0x7D,
+	DATA_ONLY, 0x19,
+	DATA_ONLY, 0x3B,
+	DATA_ONLY, 0x0D,
+	DATA_ONLY, 0x19,
+	DATA_ONLY, 0x7E,
+	DATA_ONLY, 0x0D,
+	DATA_ONLY, 0xE2,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x7E,
+	DATA_ONLY, 0x7D,
+	DATA_ONLY, 0x07,
+	DATA_ONLY, 0x07,
+	DATA_ONLY, 0x20,
+	DATA_ONLY, 0x20,
+	DATA_ONLY, 0x20,
+	DATA_ONLY, 0x02,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gamma_set1[] = {
+	0xF9, 0x00,
+
+	DATA_ONLY, 0xA7,
+	DATA_ONLY, 0xB4,
+	DATA_ONLY, 0xAE,
+	DATA_ONLY, 0xBF,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x91,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0xB2,
+	DATA_ONLY, 0xB4,
+	DATA_ONLY, 0xAA,
+	DATA_ONLY, 0xBB,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0xAC,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0xB3,
+	DATA_ONLY, 0xB1,
+	DATA_ONLY, 0xAA,
+	DATA_ONLY, 0xBC,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0xB3,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gamma_ctrl[] = {
+	0xFB, 0x02,
+
+	DATA_ONLY, 0x5A,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gamma_start[] = {
+	0xF9, COMMAND_ONLY,
+
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_apon[] = {
+	0xF3, 0x00,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x0A,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_display_ctrl[] = {
+	0xF2, 0x02,
+
+	DATA_ONLY, 0x08,
+	DATA_ONLY, 0x08,
+	DATA_ONLY, 0x10,
+	DATA_ONLY, 0x10,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_manual_pwr[] = {
+	0xB0, 0x04,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_pwr_ctrl[] = {
+	0xF4, 0x0A,
+
+	DATA_ONLY, 0x87,
+	DATA_ONLY, 0x25,
+	DATA_ONLY, 0x6A,
+	DATA_ONLY, 0x44,
+	DATA_ONLY, 0x02,
+	DATA_ONLY, 0x88,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_sleep_out[] = {
+	0x11, COMMAND_ONLY,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_sleep_in[] = {
+	0x10, COMMAND_ONLY,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_display_on[] = {
+	0x29, COMMAND_ONLY,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_display_off[] = {
+	0x28, COMMAND_ONLY,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vci1_1st_en[] = {
+	0xF3, 0x10,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vl1_en[] = {
+	0xF3, 0x11,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vl2_en[] = {
+	0xF3, 0x13,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vci1_2nd_en[] = {
+	0xF3, 0x33,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vl3_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vreg1_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0x01,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vgh_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0x11,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vgl_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0x31,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x02,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vmos_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xB1,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vint_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xF1,
+	/* DATA_ONLY, 0x71,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vbh_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xF9,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_vbl_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFD,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gam_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFF,
+	/* DATA_ONLY, 0x73,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_sd_amp_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFF,
+	/* DATA_ONLY, 0x73,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x80,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_gls_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFF,
+	/* DATA_ONLY, 0x73,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x81,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_els_en[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFF,
+	/* DATA_ONLY, 0x73,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x83,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static const unsigned short seq_el_on[] = {
+	0xF3, 0x37,
+
+	DATA_ONLY, 0xFF,
+	/* DATA_ONLY, 0x73,	VMOS/VBL/VBH not used */
+	DATA_ONLY, 0x87,
+	DATA_ONLY, 0x00,
+	DATA_ONLY, 0x03,
+	/* DATA_ONLY, 0x02,	VMOS/VBL/VBH not used */
+	ENDDEF, 0x00
+};
+
+static int ld9040_spi_write_byte(struct ld9040 *lcd, int addr, int data)
+{
+	u16 buf[1];
+	struct spi_message msg;
+
+	struct spi_transfer xfer = {
+		.len		= 2,
+		.tx_buf		= buf,
+	};
+
+	buf[0] = (addr << 8) | data;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	return spi_sync(lcd->spi, &msg);
+}
+
+static int ld9040_spi_write(struct ld9040 *lcd, unsigned char address,
+	unsigned char command)
+{
+	int ret = 0;
+
+	if (address != DATA_ONLY)
+		ret = ld9040_spi_write_byte(lcd, 0x0, address);
+	if (command != COMMAND_ONLY)
+		ret = ld9040_spi_write_byte(lcd, 0x1, command);
+
+	return ret;
+}
+
+static int ld9040_panel_send_sequence(struct ld9040 *lcd,
+	const unsigned short *wbuf)
+{
+	int ret = 0, i = 0;
+
+	while ((wbuf[i] & DEFMASK) != ENDDEF) {
+		if ((wbuf[i] & DEFMASK) != SLEEPMSEC) {
+			ret = ld9040_spi_write(lcd, wbuf[i], wbuf[i+1]);
+			if (ret)
+				break;
+		} else
+			udelay(wbuf[i+1]*1000);
+		i += 2;
+	}
+
+	return ret;
+}
+
+static int _ld9040_gamma_ctl(struct ld9040 *lcd, const unsigned int *gamma)
+{
+	unsigned int i = 0;
+	int ret = 0;
+
+	/* start gamma table updating. */
+	ret = ld9040_panel_send_sequence(lcd, seq_gamma_start);
+	if (ret) {
+		dev_err(lcd->dev, "failed to disable gamma table updating.\n");
+		goto gamma_err;
+	}
+
+	for (i = 0 ; i < GAMMA_TABLE_COUNT; i++) {
+		ret = ld9040_spi_write(lcd, DATA_ONLY, gamma[i]);
+		if (ret) {
+			dev_err(lcd->dev, "failed to set gamma table.\n");
+			goto gamma_err;
+		}
+	}
+
+	/* update gamma table. */
+	ret = ld9040_panel_send_sequence(lcd, seq_gamma_ctrl);
+	if (ret)
+		dev_err(lcd->dev, "failed to update gamma table.\n");
+
+gamma_err:
+	return ret;
+}
+
+static int ld9040_gamma_ctl(struct ld9040 *lcd, int gamma)
+{
+	int ret = 0;
+
+	ret = _ld9040_gamma_ctl(lcd, gamma_table.gamma_22_table[gamma]);
+
+	return ret;
+}
+
+
+static int ld9040_ldi_init(struct ld9040 *lcd)
+{
+	int ret, i;
+	static const unsigned short *init_seq[] = {
+		seq_user_setting,
+		seq_panel_condition,
+		seq_display_ctrl,
+		seq_manual_pwr,
+		seq_elvss_on,
+		seq_gtcon,
+		seq_gamma_set1,
+		seq_gamma_ctrl,
+		seq_sleep_out,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(init_seq); i++) {
+		ret = ld9040_panel_send_sequence(lcd, init_seq[i]);
+		/* workaround: minimum delay time for transferring CMD */
+		udelay(300);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int ld9040_ldi_enable(struct ld9040 *lcd)
+{
+	int ret = 0;
+
+	ret = ld9040_panel_send_sequence(lcd, seq_display_on);
+
+	return ret;
+}
+
+static int ld9040_ldi_disable(struct ld9040 *lcd)
+{
+	int ret;
+
+	ret = ld9040_panel_send_sequence(lcd, seq_display_off);
+	ret = ld9040_panel_send_sequence(lcd, seq_sleep_in);
+
+	return ret;
+}
+
+static int ld9040_power_on(struct ld9040 *lcd)
+{
+	int ret = 0;
+	struct lcd_platform_data *pd = NULL;
+	pd = lcd->lcd_pd;
+	if (!pd) {
+		dev_err(lcd->dev, "platform data is NULL.\n");
+		return -EFAULT;
+	}
+
+	if (!pd->power_on) {
+		dev_err(lcd->dev, "power_on is NULL.\n");
+		return -EFAULT;
+	} else {
+		pd->power_on(lcd->ld, 1);
+		mdelay(pd->power_on_delay);
+	}
+
+	if (!pd->reset) {
+		dev_err(lcd->dev, "reset is NULL.\n");
+		return -EFAULT;
+	} else {
+		pd->reset(lcd->ld);
+		mdelay(pd->reset_delay);
+	}
+
+	ret = ld9040_ldi_init(lcd);
+	if (ret) {
+		dev_err(lcd->dev, "failed to initialize ldi.\n");
+		return ret;
+	}
+
+	ret = ld9040_ldi_enable(lcd);
+	if (ret) {
+		dev_err(lcd->dev, "failed to enable ldi.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int ld9040_power_off(struct ld9040 *lcd)
+{
+	int ret = 0;
+	struct lcd_platform_data *pd = NULL;
+
+	pd = lcd->lcd_pd;
+	if (!pd) {
+		dev_err(lcd->dev, "platform data is NULL.\n");
+		return -EFAULT;
+	}
+
+	ret = ld9040_ldi_disable(lcd);
+	if (ret) {
+		dev_err(lcd->dev, "lcd setting failed.\n");
+		return -EIO;
+	}
+
+	mdelay(pd->power_off_delay);
+
+	if (!pd->power_on) {
+		dev_err(lcd->dev, "power_on is NULL.\n");
+		return -EFAULT;
+	} else
+		pd->power_on(lcd->ld, 0);
+
+	return 0;
+}
+
+static int ld9040_power(struct ld9040 *lcd, int power)
+{
+	int ret = 0;
+
+	if (power_is_on(power) && !power_is_on(lcd->power))
+		ret = ld9040_power_on(lcd);
+	else if (!power_is_on(power) && power_is_on(lcd->power))
+		ret = ld9040_power_off(lcd);
+
+	if (!ret)
+		lcd->power = power;
+
+	return ret;
+}
+
+static int ld9040_set_power(struct lcd_device *ld, int power)
+{
+	struct ld9040 *lcd = lcd_get_data(ld);
+
+	if (power != FB_BLANK_UNBLANK && power != FB_BLANK_POWERDOWN &&
+		power != FB_BLANK_NORMAL) {
+		dev_err(lcd->dev, "power value should be 0, 1 or 4.\n");
+		return -EINVAL;
+	}
+
+	return ld9040_power(lcd, power);
+}
+
+static int ld9040_get_power(struct lcd_device *ld)
+{
+	struct ld9040 *lcd = lcd_get_data(ld);
+
+	return lcd->power;
+}
+
+static int ld9040_get_brightness(struct backlight_device *bd)
+{
+	return bd->props.brightness;
+}
+
+static int ld9040_set_brightness(struct backlight_device *bd)
+{
+	int ret = 0, brightness = bd->props.brightness;
+	struct ld9040 *lcd = bl_get_data(bd);
+
+	if (brightness < MIN_BRIGHTNESS ||
+		brightness > bd->props.max_brightness) {
+		dev_err(&bd->dev, "lcd brightness should be %d to %d.\n",
+			MIN_BRIGHTNESS, MAX_BRIGHTNESS);
+		return -EINVAL;
+	}
+
+	ret = ld9040_gamma_ctl(lcd, bd->props.brightness);
+	if (ret) {
+		dev_err(&bd->dev, "lcd brightness setting failed.\n");
+		return -EIO;
+	}
+
+	return ret;
+}
+
+static struct lcd_ops ld9040_lcd_ops = {
+	.set_power = ld9040_set_power,
+	.get_power = ld9040_get_power,
+};
+
+static const struct backlight_ops ld9040_backlight_ops  = {
+	.get_brightness = ld9040_get_brightness,
+	.update_status = ld9040_set_brightness,
+};
+
+
+static int ld9040_probe(struct spi_device *spi)
+{
+	int ret = 0;
+	struct ld9040 *lcd = NULL;
+	struct lcd_device *ld = NULL;
+	struct backlight_device *bd = NULL;
+
+	lcd = kzalloc(sizeof(struct ld9040), GFP_KERNEL);
+	if (!lcd)
+		return -ENOMEM;
+
+	/* ld9040 lcd panel uses 3-wire 9bits SPI Mode. */
+	spi->bits_per_word = 9;
+
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		dev_err(&spi->dev, "spi setup failed.\n");
+		goto out_free_lcd;
+	}
+
+	lcd->spi = spi;
+	lcd->dev = &spi->dev;
+
+	lcd->lcd_pd = spi->dev.platform_data;
+	if (!lcd->lcd_pd) {
+		dev_err(&spi->dev, "platform data is NULL.\n");
+		goto out_free_lcd;
+	}
+
+	ld = lcd_device_register("ld9040", &spi->dev, lcd, &ld9040_lcd_ops);
+	if (IS_ERR(ld)) {
+		ret = PTR_ERR(ld);
+		goto out_free_lcd;
+	}
+
+	lcd->ld = ld;
+
+	bd = backlight_device_register("ld9040-bl", &spi->dev,
+		lcd, &ld9040_backlight_ops, NULL);
+	if (IS_ERR(ld)) {
+		ret = PTR_ERR(ld);
+		goto out_free_lcd;
+	}
+
+	bd->props.max_brightness = MAX_BRIGHTNESS;
+	bd->props.brightness = MAX_BRIGHTNESS;
+	lcd->bd = bd;
+
+	/*
+	 * if lcd panel was on from bootloader like u-boot then
+	 * do not lcd on.
+	 */
+	if (!lcd->lcd_pd->lcd_enabled) {
+		/*
+		 * if lcd panel was off from bootloader then
+		 * current lcd status is powerdown and then
+		 * it enables lcd panel.
+		 */
+		lcd->power = FB_BLANK_POWERDOWN;
+
+		ld9040_power(lcd, FB_BLANK_UNBLANK);
+	} else
+		lcd->power = FB_BLANK_UNBLANK;
+
+	dev_set_drvdata(&spi->dev, lcd);
+
+	dev_info(&spi->dev, "ld9040 panel driver has been probed.\n");
+	return 0;
+
+out_free_lcd:
+	kfree(lcd);
+	return ret;
+}
+
+static int __devexit ld9040_remove(struct spi_device *spi)
+{
+	struct ld9040 *lcd = dev_get_drvdata(&spi->dev);
+
+	ld9040_power(lcd, FB_BLANK_POWERDOWN);
+	lcd_device_unregister(lcd->ld);
+	kfree(lcd);
+
+	return 0;
+}
+
+#if defined(CONFIG_PM)
+static int ld9040_suspend(struct spi_device *spi, pm_message_t mesg)
+{
+	int ret = 0;
+	struct ld9040 *lcd = dev_get_drvdata(&spi->dev);
+
+	dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power);
+
+	/*
+	 * when lcd panel is suspend, lcd panel becomes off
+	 * regardless of status.
+	 */
+	ret = ld9040_power(lcd, FB_BLANK_POWERDOWN);
+
+	return ret;
+}
+
+static int ld9040_resume(struct spi_device *spi)
+{
+	int ret = 0;
+	struct ld9040 *lcd = dev_get_drvdata(&spi->dev);
+
+	lcd->power = FB_BLANK_POWERDOWN;
+
+	ret = ld9040_power(lcd, FB_BLANK_UNBLANK);
+
+	return ret;
+}
+#else
+#define ld9040_suspend		NULL
+#define ld9040_resume		NULL
+#endif
+
+/* Power down all displays on reboot, poweroff or halt. */
+static void ld9040_shutdown(struct spi_device *spi)
+{
+	struct ld9040 *lcd = dev_get_drvdata(&spi->dev);
+
+	ld9040_power(lcd, FB_BLANK_POWERDOWN);
+}
+
+static struct spi_driver ld9040_driver = {
+	.driver = {
+		.name	= "ld9040",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ld9040_probe,
+	.remove		= __devexit_p(ld9040_remove),
+	.shutdown	= ld9040_shutdown,
+	.suspend	= ld9040_suspend,
+	.resume		= ld9040_resume,
+};
+
+static int __init ld9040_init(void)
+{
+	return spi_register_driver(&ld9040_driver);
+}
+
+static void __exit ld9040_exit(void)
+{
+	spi_unregister_driver(&ld9040_driver);
+}
+
+module_init(ld9040_init);
+module_exit(ld9040_exit);
+
+MODULE_AUTHOR("Donghwa Lee <dh09.lee@samsung.com>");
+MODULE_DESCRIPTION("ld9040 LCD Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/backlight/ld9040_gamma.h b/drivers/video/backlight/ld9040_gamma.h
new file mode 100644
index 0000000..038d9c8
--- /dev/null
+++ b/drivers/video/backlight/ld9040_gamma.h
@@ -0,0 +1,200 @@
+/*
+ * Gamma level definitions.
+ *
+ * Copyright (c) 2011 Samsung Electronics
+ * InKi Dae <inki.dae@samsung.com>
+ * Donghwa Lee <dh09.lee@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef _LD9040_BRIGHTNESS_H
+#define _LD9040_BRIGHTNESS_H
+
+#define MAX_GAMMA_LEVEL		25
+#define GAMMA_TABLE_COUNT	21
+
+/* gamma value: 2.2 */
+static const unsigned int ld9040_22_300[] = {
+	0x00, 0xa7, 0xb4, 0xae, 0xbf, 0x00, 0x91,
+	0x00, 0xb2, 0xb4, 0xaa, 0xbb, 0x00, 0xac,
+	0x00, 0xb3, 0xb1, 0xaa, 0xbc, 0x00, 0xb3
+};
+
+static const unsigned int ld9040_22_290[] = {
+	0x00, 0xa9, 0xb7, 0xae, 0xbd, 0x00, 0x89,
+	0x00, 0xb7, 0xb6, 0xa8, 0xba, 0x00, 0xa4,
+	0x00, 0xb1, 0xb4, 0xaa, 0xbb, 0x00, 0xaa
+};
+
+static const unsigned int ld9040_22_280[] = {
+	0x00, 0xa9, 0xb6, 0xad, 0xbf, 0x00, 0x86,
+	0x00, 0xb8, 0xb5, 0xa8, 0xbc, 0x00, 0xa0,
+	0x00, 0xb3, 0xb3, 0xa9, 0xbc, 0x00, 0xa7
+};
+
+static const unsigned int ld9040_22_270[] = {
+	0x00, 0xa8, 0xb8, 0xae, 0xbe, 0x00, 0x84,
+	0x00, 0xb9, 0xb7, 0xa8, 0xbc, 0x00, 0x9d,
+	0x00, 0xb2, 0xb5, 0xaa, 0xbc, 0x00, 0xa4
+
+};
+static const unsigned int ld9040_22_260[] = {
+	0x00, 0xa4, 0xb8, 0xb0, 0xbf, 0x00, 0x80,
+	0x00, 0xb8, 0xb6, 0xaa, 0xbc, 0x00, 0x9a,
+	0x00, 0xb0, 0xb5, 0xab, 0xbd, 0x00, 0xa0
+};
+
+static const unsigned int ld9040_22_250[] = {
+	0x00, 0xa4, 0xb9, 0xaf, 0xc1, 0x00, 0x7d,
+	0x00, 0xb9, 0xb6, 0xaa, 0xbb, 0x00, 0x97,
+	0x00, 0xb1, 0xb5, 0xaa, 0xbf, 0x00, 0x9d
+};
+
+static const unsigned int ld9040_22_240[] = {
+	0x00, 0xa2, 0xb9, 0xaf, 0xc2, 0x00, 0x7a,
+	0x00, 0xb9, 0xb7, 0xaa, 0xbd, 0x00, 0x94,
+	0x00, 0xb0, 0xb5, 0xab, 0xbf, 0x00, 0x9a
+};
+
+static const unsigned int ld9040_22_230[] = {
+	0x00, 0xa0, 0xb9, 0xaf, 0xc3, 0x00, 0x77,
+	0x00, 0xb9, 0xb7, 0xab, 0xbe, 0x00, 0x90,
+	0x00, 0xb0, 0xb6, 0xab, 0xbf, 0x00, 0x97
+};
+
+static const unsigned int ld9040_22_220[] = {
+	0x00, 0x9e, 0xba, 0xb0, 0xc2, 0x00, 0x75,
+	0x00, 0xb9, 0xb8, 0xab, 0xbe, 0x00, 0x8e,
+	0x00, 0xb0, 0xb6, 0xac, 0xbf, 0x00, 0x94
+};
+
+static const unsigned int ld9040_22_210[] = {
+	0x00, 0x9c, 0xb9, 0xb0, 0xc4, 0x00, 0x72,
+	0x00, 0xb8, 0xb8, 0xac, 0xbf, 0x00, 0x8a,
+	0x00, 0xb0, 0xb6, 0xac, 0xc0, 0x00, 0x91
+};
+
+static const unsigned int ld9040_22_200[] = {
+	0x00, 0x9a, 0xba, 0xb1, 0xc4, 0x00, 0x6f,
+	0x00, 0xb8, 0xb8, 0xad, 0xc0, 0x00, 0x86,
+	0x00, 0xb0, 0xb7, 0xad, 0xc0, 0x00, 0x8d
+};
+
+static const unsigned int ld9040_22_190[] = {
+	0x00, 0x97, 0xba, 0xb2, 0xc5, 0x00, 0x6c,
+	0x00, 0xb8, 0xb8, 0xae, 0xc1, 0x00, 0x82,
+	0x00, 0xb0, 0xb6, 0xae, 0xc2, 0x00, 0x89
+};
+
+static const unsigned int ld9040_22_180[] = {
+	0x00, 0x93, 0xba, 0xb3, 0xc5, 0x00, 0x69,
+	0x00, 0xb8, 0xb9, 0xae, 0xc1, 0x00, 0x7f,
+	0x00, 0xb0, 0xb6, 0xae, 0xc3, 0x00, 0x85
+};
+
+static const unsigned int ld9040_22_170[] = {
+	0x00, 0x8b, 0xb9, 0xb3, 0xc7, 0x00, 0x65,
+	0x00, 0xb7, 0xb8, 0xaf, 0xc3, 0x00, 0x7a,
+	0x00, 0x80, 0xb6, 0xae, 0xc4, 0x00, 0x81
+};
+
+static const unsigned int ld9040_22_160[] = {
+	0x00, 0x89, 0xba, 0xb3, 0xc8, 0x00, 0x62,
+	0x00, 0xb6, 0xba, 0xaf, 0xc3, 0x00, 0x76,
+	0x00, 0xaf, 0xb7, 0xae, 0xc4, 0x00, 0x7e
+};
+
+static const unsigned int ld9040_22_150[] = {
+	0x00, 0x82, 0xba, 0xb4, 0xc7, 0x00, 0x5f,
+	0x00, 0xb5, 0xba, 0xb0, 0xc3, 0x00, 0x72,
+	0x00, 0xae, 0xb8, 0xb0, 0xc3, 0x00, 0x7a
+};
+
+static const unsigned int ld9040_22_140[] = {
+	0x00, 0x7b, 0xbb, 0xb4, 0xc8, 0x00, 0x5b,
+	0x00, 0xb5, 0xba, 0xb1, 0xc4, 0x00, 0x6e,
+	0x00, 0xae, 0xb9, 0xb0, 0xc5, 0x00, 0x75
+};
+
+static const unsigned int ld9040_22_130[] = {
+	0x00, 0x71, 0xbb, 0xb5, 0xc8, 0x00, 0x57,
+	0x00, 0xb5, 0xbb, 0xb0, 0xc5, 0x00, 0x6a,
+	0x00, 0xae, 0xb9, 0xb1, 0xc6, 0x00, 0x70
+};
+
+static const unsigned int ld9040_22_120[] = {
+	0x00, 0x47, 0xba, 0xb6, 0xca, 0x00, 0x53,
+	0x00, 0xb5, 0xbb, 0xb3, 0xc6, 0x00, 0x65,
+	0x00, 0xae, 0xb8, 0xb3, 0xc7, 0x00, 0x6c
+};
+
+static const unsigned int ld9040_22_110[] = {
+	0x00, 0x13, 0xbb, 0xb7, 0xca, 0x00, 0x4f,
+	0x00, 0xb4, 0xbb, 0xb3, 0xc7, 0x00, 0x60,
+	0x00, 0xad, 0xb8, 0xb4, 0xc7, 0x00, 0x67
+};
+
+static const unsigned int ld9040_22_100[] = {
+	0x00, 0x13, 0xba, 0xb8, 0xcb, 0x00, 0x4b,
+	0x00, 0xb3, 0xbc, 0xb4, 0xc7, 0x00, 0x5c,
+	0x00, 0xac, 0xb8, 0xb4, 0xc8, 0x00, 0x62
+};
+
+static const unsigned int ld9040_22_90[] = {
+	0x00, 0x13, 0xb9, 0xb8, 0xcd, 0x00, 0x46,
+	0x00, 0xb1, 0xbc, 0xb5, 0xc8, 0x00, 0x56,
+	0x00, 0xaa, 0xb8, 0xb4, 0xc9, 0x00, 0x5d
+};
+
+static const unsigned int ld9040_22_80[] = {
+	0x00, 0x13, 0xba, 0xb9, 0xcd, 0x00, 0x41,
+	0x00, 0xb0, 0xbe, 0xb5, 0xc9, 0x00, 0x51,
+	0x00, 0xa9, 0xb9, 0xb5, 0xca, 0x00, 0x57
+};
+
+static const unsigned int ld9040_22_70[] = {
+	0x00, 0x13, 0xb9, 0xb9, 0xd0, 0x00, 0x3c,
+	0x00, 0xaf, 0xbf, 0xb6, 0xcb, 0x00, 0x4b,
+	0x00, 0xa8, 0xb9, 0xb5, 0xcc, 0x00, 0x52
+};
+
+static const unsigned int ld9040_22_50[] = {
+	0x00, 0x13, 0xb2, 0xba, 0xd2, 0x00, 0x30,
+	0x00, 0xaf, 0xc0, 0xb8, 0xcd, 0x00, 0x3d,
+	0x00, 0xa8, 0xb8, 0xb7, 0xcd, 0x00, 0x44
+};
+
+struct ld9040_gamma {
+	unsigned int *gamma_22_table[MAX_GAMMA_LEVEL];
+} gamma_table = {
+	.gamma_22_table[0] = (unsigned int *)&ld9040_22_50,
+	.gamma_22_table[1] = (unsigned int *)&ld9040_22_70,
+	.gamma_22_table[2] = (unsigned int *)&ld9040_22_80,
+	.gamma_22_table[3] = (unsigned int *)&ld9040_22_90,
+	.gamma_22_table[4] = (unsigned int *)&ld9040_22_100,
+	.gamma_22_table[5] = (unsigned int *)&ld9040_22_110,
+	.gamma_22_table[6] = (unsigned int *)&ld9040_22_120,
+	.gamma_22_table[7] = (unsigned int *)&ld9040_22_130,
+	.gamma_22_table[8] = (unsigned int *)&ld9040_22_140,
+	.gamma_22_table[9] = (unsigned int *)&ld9040_22_150,
+	.gamma_22_table[10] = (unsigned int *)&ld9040_22_160,
+	.gamma_22_table[11] = (unsigned int *)&ld9040_22_170,
+	.gamma_22_table[12] = (unsigned int *)&ld9040_22_180,
+	.gamma_22_table[13] = (unsigned int *)&ld9040_22_190,
+	.gamma_22_table[14] = (unsigned int *)&ld9040_22_200,
+	.gamma_22_table[15] = (unsigned int *)&ld9040_22_210,
+	.gamma_22_table[16] = (unsigned int *)&ld9040_22_220,
+	.gamma_22_table[17] = (unsigned int *)&ld9040_22_230,
+	.gamma_22_table[18] = (unsigned int *)&ld9040_22_240,
+	.gamma_22_table[19] = (unsigned int *)&ld9040_22_250,
+	.gamma_22_table[20] = (unsigned int *)&ld9040_22_260,
+	.gamma_22_table[21] = (unsigned int *)&ld9040_22_270,
+	.gamma_22_table[22] = (unsigned int *)&ld9040_22_280,
+	.gamma_22_table[23] = (unsigned int *)&ld9040_22_290,
+	.gamma_22_table[24] = (unsigned int *)&ld9040_22_300,
+};
+
+#endif
-- 
cgit v0.10.2


From 67d1da79b25c05d9a38b820bb5b5d89c91070ab2 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Tue, 22 Mar 2011 16:30:19 -0700
Subject: drivers/leds/leds-lp5521.c: world-writable sysfs engine* files

Don't allow everybody to change LED settings.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 80a3ae3..c0cff64 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -534,7 +534,7 @@ static ssize_t lp5521_selftest(struct device *dev,
 }
 
 /* led class device attributes */
-static DEVICE_ATTR(led_current, S_IRUGO | S_IWUGO, show_current, store_current);
+static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, show_current, store_current);
 static DEVICE_ATTR(max_current, S_IRUGO , show_max_current, NULL);
 
 static struct attribute *lp5521_led_attributes[] = {
@@ -548,15 +548,15 @@ static struct attribute_group lp5521_led_attribute_group = {
 };
 
 /* device attributes */
-static DEVICE_ATTR(engine1_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine1_mode, S_IRUGO | S_IWUSR,
 		   show_engine1_mode, store_engine1_mode);
-static DEVICE_ATTR(engine2_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine2_mode, S_IRUGO | S_IWUSR,
 		   show_engine2_mode, store_engine2_mode);
-static DEVICE_ATTR(engine3_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine3_mode, S_IRUGO | S_IWUSR,
 		   show_engine3_mode, store_engine3_mode);
-static DEVICE_ATTR(engine1_load, S_IWUGO, NULL, store_engine1_load);
-static DEVICE_ATTR(engine2_load, S_IWUGO, NULL, store_engine2_load);
-static DEVICE_ATTR(engine3_load, S_IWUGO, NULL, store_engine3_load);
+static DEVICE_ATTR(engine1_load, S_IWUSR, NULL, store_engine1_load);
+static DEVICE_ATTR(engine2_load, S_IWUSR, NULL, store_engine2_load);
+static DEVICE_ATTR(engine3_load, S_IWUSR, NULL, store_engine3_load);
 static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL);
 
 static struct attribute *lp5521_attributes[] = {
-- 
cgit v0.10.2


From ccd7510fd8dea5b4b2af87fb2aef2ebd6b23b76b Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Tue, 22 Mar 2011 16:30:20 -0700
Subject: drivers/leds/leds-lp5523.c: world-writable engine* sysfs files

Don't allow everybody to change LED settings.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index d0c4068..e19fed2 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -713,7 +713,7 @@ static ssize_t store_current(struct device *dev,
 }
 
 /* led class device attributes */
-static DEVICE_ATTR(led_current, S_IRUGO | S_IWUGO, show_current, store_current);
+static DEVICE_ATTR(led_current, S_IRUGO | S_IWUSR, show_current, store_current);
 static DEVICE_ATTR(max_current, S_IRUGO , show_max_current, NULL);
 
 static struct attribute *lp5523_led_attributes[] = {
@@ -727,21 +727,21 @@ static struct attribute_group lp5523_led_attribute_group = {
 };
 
 /* device attributes */
-static DEVICE_ATTR(engine1_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine1_mode, S_IRUGO | S_IWUSR,
 		   show_engine1_mode, store_engine1_mode);
-static DEVICE_ATTR(engine2_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine2_mode, S_IRUGO | S_IWUSR,
 		   show_engine2_mode, store_engine2_mode);
-static DEVICE_ATTR(engine3_mode, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine3_mode, S_IRUGO | S_IWUSR,
 		   show_engine3_mode, store_engine3_mode);
-static DEVICE_ATTR(engine1_leds, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine1_leds, S_IRUGO | S_IWUSR,
 		   show_engine1_leds, store_engine1_leds);
-static DEVICE_ATTR(engine2_leds, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine2_leds, S_IRUGO | S_IWUSR,
 		   show_engine2_leds, store_engine2_leds);
-static DEVICE_ATTR(engine3_leds, S_IRUGO | S_IWUGO,
+static DEVICE_ATTR(engine3_leds, S_IRUGO | S_IWUSR,
 		   show_engine3_leds, store_engine3_leds);
-static DEVICE_ATTR(engine1_load, S_IWUGO, NULL, store_engine1_load);
-static DEVICE_ATTR(engine2_load, S_IWUGO, NULL, store_engine2_load);
-static DEVICE_ATTR(engine3_load, S_IWUGO, NULL, store_engine3_load);
+static DEVICE_ATTR(engine1_load, S_IWUSR, NULL, store_engine1_load);
+static DEVICE_ATTR(engine2_load, S_IWUSR, NULL, store_engine2_load);
+static DEVICE_ATTR(engine3_load, S_IWUSR, NULL, store_engine3_load);
 static DEVICE_ATTR(selftest, S_IRUGO, lp5523_selftest, NULL);
 
 static struct attribute *lp5523_attributes[] = {
-- 
cgit v0.10.2


From bb7ca747f8d6243b3943c5b133048652020f4a50 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:21 -0700
Subject: backlight: add backlight type

There may be multiple ways of controlling the backlight on a given
machine.  Allow drivers to expose the type of interface they are
providing, making it possible for userspace to make appropriate policy
decisions.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Airlie <airlied@linux.ie>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight
index 4d637e1..70302f3 100644
--- a/Documentation/ABI/stable/sysfs-class-backlight
+++ b/Documentation/ABI/stable/sysfs-class-backlight
@@ -34,3 +34,23 @@ Contact:	Richard Purdie <rpurdie@rpsys.net>
 Description:
 		Maximum brightness for <backlight>.
 Users:		HAL
+
+What:		/sys/class/backlight/<backlight>/type
+Date:		September 2010
+KernelVersion:	2.6.37
+Contact:	Matthew Garrett <mjg@redhat.com>
+Description:
+		The type of interface controlled by <backlight>.
+		"firmware": The driver uses a standard firmware interface
+		"platform": The driver uses a platform-specific interface
+		"raw": The driver controls hardware registers directly
+
+		In the general case, when multiple backlight
+		interfaces are available for a single device, firmware
+		control should be preferred to platform control should
+		be preferred to raw control. Using a firmware
+		interface reduces the probability of confusion with
+		the hardware and the OS independently updating the
+		backlight state. Platform interfaces are mostly a
+		holdover from pre-standardisation of firmware
+		interfaces.
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 90f8f76..a9eec8c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -795,6 +795,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		count++;
 
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_FIRMWARE;
 		props.max_brightness = device->brightness->count - 3;
 		device->backlight = backlight_device_register(name, NULL, device,
 							      &acpi_backlight_ops,
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index d3a9c6e..18d7bcc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -98,6 +98,7 @@ static int nouveau_nv40_backlight_init(struct drm_device *dev)
 		return 0;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 31;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
 				       &nv40_bl_ops, &props);
@@ -121,6 +122,7 @@ static int nouveau_nv50_backlight_init(struct drm_device *dev)
 		return 0;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 1025;
 	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
 				       &nv50_bl_ops, &props);
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index de9cf21b..657da5a 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -944,6 +944,7 @@ static int picolcd_init_backlight(struct picolcd_data *data, struct hid_report *
 	}
 
 	memset(&props, 0, sizeof(props));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
 	bdev = backlight_device_register(dev_name(dev), dev, data,
 			&picolcd_blops, &props);
diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index ade1e65..b1d9117 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -163,6 +163,7 @@ void __init pmu_backlight_init()
 	snprintf(name, sizeof(name), "pmubl");
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, NULL, NULL, &pmu_backlight_data,
 				       &props);
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index ad3d099..c978470 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -1031,6 +1031,7 @@ static int __devinit acer_backlight_init(struct device *dev)
 	struct backlight_device *bd;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = max_brightness;
 	bd = backlight_device_register("acer-wmi", dev, NULL, &acer_bl_ops,
 				       &props);
diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index f3aa6a7..5a6f7d7 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c
@@ -667,6 +667,7 @@ static int asus_backlight_init(struct asus_laptop *asus)
 
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.max_brightness = 15;
+	props.type = BACKLIGHT_PLATFORM;
 
 	bd = backlight_device_register(ASUS_LAPTOP_FILE,
 				       &asus->platform_device->dev, asus,
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index fe49593..f503607 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1507,6 +1507,7 @@ static int __init asus_acpi_init(void)
 	}
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = 15;
 	asus_backlight_device = backlight_device_register("asus", NULL, NULL,
 							  &asus_backlight_data,
diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c
index 9111354..94f93b6 100644
--- a/drivers/platform/x86/classmate-laptop.c
+++ b/drivers/platform/x86/classmate-laptop.c
@@ -564,6 +564,7 @@ static int cmpc_ipml_add(struct acpi_device *acpi)
 		return -ENOMEM;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = 7;
 	ipml->bd = backlight_device_register("cmpc_bl", &acpi->dev,
 					     acpi->handle, &cmpc_bl_ops,
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index 034572b..eb95878 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -970,6 +970,7 @@ static int __init compal_init(void)
 	if (!acpi_video_backlight_support()) {
 		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = BACKLIGHT_LEVEL_MAX;
 		compalbl_device = backlight_device_register(DRIVER_NAME,
 							    NULL, NULL,
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index ad24ef3..de301aa8 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -671,6 +671,7 @@ static int __init dell_init(void)
 	if (max_intensity) {
 		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = max_intensity;
 		dell_backlight_device = backlight_device_register("dell_backlight",
 								  &platform_device->dev,
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 49d9ad7..6605bea 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -1147,6 +1147,7 @@ static int eeepc_backlight_init(struct eeepc_laptop *eeepc)
 	struct backlight_device *bd;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = 15;
 	bd = backlight_device_register(EEEPC_LAPTOP_FILE,
 				       &eeepc->platform_device->dev, eeepc,
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 95e3b09..493054c 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -1128,6 +1128,7 @@ static int __init fujitsu_init(void)
 
 		memset(&props, 0, sizeof(struct backlight_properties));
 		max_brightness = fujitsu->max_brightness;
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = max_brightness - 1;
 		fujitsu->bl_device = backlight_device_register("fujitsu-laptop",
 							       NULL, NULL,
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 7e9bb6d..142d385 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -804,6 +804,7 @@ static int __init msi_init(void)
 	} else {
 		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = MSI_LCD_LEVEL_MAX - 1;
 		msibl_device = backlight_device_register("msi-laptop-bl", NULL,
 							 NULL, &msibl_ops,
diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index 35278ad..d5419c9 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c
@@ -254,6 +254,7 @@ static int __init msi_wmi_init(void)
 	if (!acpi_video_backlight_support()) {
 		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = ARRAY_SIZE(backlight_map) - 1;
 		backlight = backlight_device_register(DRV_NAME, NULL, NULL,
 						      &msi_backlight_ops,
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index cc1e0ba..05be30e 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -602,6 +602,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
 	}
 	/* initialize backlight */
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = pcc->sinf[SINF_AC_MAX_BRIGHT];
 	pcc->backlight = backlight_device_register("panasonic", NULL, pcc,
 						   &pcc_backlight_ops, &props);
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 5e83370..13d8d63 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -1305,8 +1305,9 @@ static int sony_nc_add(struct acpi_device *device)
 		       "controlled by ACPI video driver\n");
 	} else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT",
 						&handle))) {
-							struct backlight_properties props;
+		struct backlight_properties props;
 		memset(&props, 0, sizeof(struct backlight_properties));
+		props.type = BACKLIGHT_PLATFORM;
 		props.max_brightness = SONY_MAX_BRIGHTNESS - 1;
 		sony_backlight_device = backlight_device_register("sony", NULL,
 								  NULL,
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index eb99223..947bdca 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6307,6 +6307,7 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
 		return 1;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = bright_maxlvl;
 	props.brightness = b & TP_EC_BACKLIGHT_LVLMSK;
 	ibm_backlight_device = backlight_device_register(TPACPI_BACKLIGHT_DEV_NAME,
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 209cced..63f42a2 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1018,6 +1018,7 @@ static int __init toshiba_acpi_init(void)
 		create_toshiba_proc_entries();
 	}
 
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
 	toshiba_backlight_device = backlight_device_register("toshiba",
 							     &toshiba_acpi.p_dev->dev,
diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c
index b90c2cf..750fe50 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon.c
@@ -574,6 +574,7 @@ static const struct backlight_ops dcon_bl_ops = {
 
 static struct backlight_properties dcon_bl_props = {
 	.max_brightness = 15,
+	.type = BACKLIGHT_RAW,
 	.power = FB_BLANK_UNBLANK,
 };
 
diff --git a/drivers/staging/samsung-laptop/samsung-laptop.c b/drivers/staging/samsung-laptop/samsung-laptop.c
index 6607a89..2529446 100644
--- a/drivers/staging/samsung-laptop/samsung-laptop.c
+++ b/drivers/staging/samsung-laptop/samsung-laptop.c
@@ -781,6 +781,7 @@ static int __init samsung_init(void)
 
 	/* create a backlight device to talk to this one */
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = sabi_config->max_brightness;
 	backlight_device = backlight_device_register("samsung", &sdev->dev,
 						     NULL, &backlight_ops,
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index 1fa6ce3..68ab460 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -282,6 +282,7 @@ static int appledisplay_probe(struct usb_interface *iface,
 	snprintf(bl_name, sizeof(bl_name), "appledisplay%d",
 		atomic_inc_return(&count_displays) - 1);
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
 	pdata->bd = backlight_device_register(bl_name, NULL, pdata,
 					      &appledisplay_bl_data, &props);
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index bac16345..4b4e8da 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -127,6 +127,7 @@ static void init_backlight(struct atmel_lcdfb_info *sinfo)
 		return;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
 	bl = backlight_device_register("backlight", &sinfo->pdev->dev, sinfo,
 				       &atmel_lcdc_bl_ops, &props);
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index 4cb6a57..b0b2ac3 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -1818,6 +1818,7 @@ static void aty128_bl_init(struct aty128fb_par *par)
 	snprintf(name, sizeof(name), "aty128bl%d", info->node);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, info->dev, par, &aty128_bl_data,
 				       &props);
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 94e293f..d437b3d 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2241,6 +2241,7 @@ static void aty_bl_init(struct atyfb_par *par)
 	snprintf(name, sizeof(name), "atybl%d", info->node);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, info->dev, par, &aty_bl_data,
 				       &props);
diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c
index 9b811dd..db572df 100644
--- a/drivers/video/aty/radeon_backlight.c
+++ b/drivers/video/aty/radeon_backlight.c
@@ -158,6 +158,7 @@ void radeonfb_bl_init(struct radeonfb_info *rinfo)
 	snprintf(name, sizeof(name), "radeonbl%d", rinfo->info->node);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, rinfo->info->dev, pdata,
 				       &radeon_bl_data, &props);
diff --git a/drivers/video/backlight/88pm860x_bl.c b/drivers/video/backlight/88pm860x_bl.c
index b224396..e59623a 100644
--- a/drivers/video/backlight/88pm860x_bl.c
+++ b/drivers/video/backlight/88pm860x_bl.c
@@ -227,6 +227,7 @@ static int pm860x_backlight_probe(struct platform_device *pdev)
 	}
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
 					&pm860x_backlight_ops, &props);
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index 9f436e0..af31197 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -303,6 +303,7 @@ static int __devinit adp5520_bl_probe(struct platform_device *pdev)
 	mutex_init(&data->lock);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = ADP5020_MAX_BRIGHTNESS;
 	bl = backlight_device_register(pdev->name, data->master, data,
 				       &adp5520_bl_ops, &props);
diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c
index 734c650..d2a96a4 100644
--- a/drivers/video/backlight/adp8860_bl.c
+++ b/drivers/video/backlight/adp8860_bl.c
@@ -709,6 +709,7 @@ static int __devinit adp8860_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, data);
 
 	memset(&props, 0, sizeof(props));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = ADP8860_MAX_BRIGHTNESS;
 
 	mutex_init(&data->lock);
diff --git a/drivers/video/backlight/adx_bl.c b/drivers/video/backlight/adx_bl.c
index fe9af12..c861c41 100644
--- a/drivers/video/backlight/adx_bl.c
+++ b/drivers/video/backlight/adx_bl.c
@@ -104,6 +104,7 @@ static int __devinit adx_backlight_probe(struct platform_device *pdev)
 	}
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 0xff;
 	bldev = backlight_device_register(dev_name(&pdev->dev), &pdev->dev,
 					  bl, &adx_backlight_ops, &props);
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index e6a66da..0443a4f 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -168,6 +168,7 @@ static int atmel_pwm_bl_probe(struct platform_device *pdev)
 	}
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = pdata->pwm_duty_max - pdata->pwm_duty_min;
 	bldev = backlight_device_register("atmel-pwm-bl", &pdev->dev, pwmbl,
 					  &atmel_pwm_bl_ops, &props);
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 0870329..80d292f 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -19,6 +19,12 @@
 #include <asm/backlight.h>
 #endif
 
+static const char const *backlight_types[] = {
+	[BACKLIGHT_RAW] = "raw",
+	[BACKLIGHT_PLATFORM] = "platform",
+	[BACKLIGHT_FIRMWARE] = "firmware",
+};
+
 #if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \
 			   defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE))
 /* This callback gets called when something important happens inside a
@@ -169,6 +175,14 @@ static ssize_t backlight_store_brightness(struct device *dev,
 	return rc;
 }
 
+static ssize_t backlight_show_type(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct backlight_device *bd = to_backlight_device(dev);
+
+	return sprintf(buf, "%s\n", backlight_types[bd->props.type]);
+}
+
 static ssize_t backlight_show_max_brightness(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -234,6 +248,7 @@ static struct device_attribute bl_device_attributes[] = {
 	__ATTR(actual_brightness, 0444, backlight_show_actual_brightness,
 		     NULL),
 	__ATTR(max_brightness, 0444, backlight_show_max_brightness, NULL),
+	__ATTR(type, 0444, backlight_show_type, NULL),
 	__ATTR_NULL,
 };
 
@@ -292,9 +307,16 @@ struct backlight_device *backlight_device_register(const char *name,
 	dev_set_drvdata(&new_bd->dev, devdata);
 
 	/* Set default properties */
-	if (props)
+	if (props) {
 		memcpy(&new_bd->props, props,
 		       sizeof(struct backlight_properties));
+		if (props->type <= 0 || props->type >= BACKLIGHT_TYPE_MAX) {
+			WARN(1, "%s: invalid backlight type", name);
+			new_bd->props.type = BACKLIGHT_RAW;
+		}
+	} else {
+		new_bd->props.type = BACKLIGHT_RAW;
+	}
 
 	rc = device_register(&new_bd->dev);
 	if (rc) {
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index 1e71c35..af60983 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -562,6 +562,7 @@ static int __devinit corgi_lcd_probe(struct spi_device *spi)
 	lcd->mode = (pdata) ? pdata->init_mode : CORGI_LCD_MODE_VGA;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = pdata->max_intensity;
 	lcd->bl_dev = backlight_device_register("corgi_bl", &spi->dev, lcd,
 						&corgi_bl_ops, &props);
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index 397d15e..6c8c540 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -193,6 +193,7 @@ static int cr_backlight_probe(struct platform_device *pdev)
 	}
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	bdp = backlight_device_register("cr-backlight", &pdev->dev, NULL,
 					&cr_backlight_ops, &props);
 	if (IS_ERR(bdp)) {
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 87659ed..62043f1 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -136,6 +136,7 @@ static int da903x_backlight_probe(struct platform_device *pdev)
 		da903x_write(data->da903x_dev, DA9034_WLED_CONTROL2,
 				DA9034_WLED_ISET(pdata->output_current));
 
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = max_brightness;
 	bl = backlight_device_register(pdev->name, data->da903x_dev, data,
 				       &da903x_backlight_ops, &props);
diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c
index b0cc491..9f1e389 100644
--- a/drivers/video/backlight/ep93xx_bl.c
+++ b/drivers/video/backlight/ep93xx_bl.c
@@ -87,6 +87,7 @@ static int __init ep93xxbl_probe(struct platform_device *dev)
 	ep93xxbl->mmio = EP93XX_RASTER_BRIGHTNESS;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = EP93XX_MAX_BRIGHT;
 	bl = backlight_device_register(dev->name, &dev->dev, ep93xxbl,
 				       &ep93xxbl_ops, &props);
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 312ca61..8c6befd 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -91,6 +91,7 @@ static int genericbl_probe(struct platform_device *pdev)
 		name = machinfo->name;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = machinfo->max_intensity;
 	bd = backlight_device_register(name, &pdev->dev, NULL, &genericbl_ops,
 				       &props);
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 267d23f..38aa002 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -109,6 +109,7 @@ static int __devinit hp680bl_probe(struct platform_device *pdev)
 	struct backlight_device *bd;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = HP680_MAX_INTENSITY;
 	bd = backlight_device_register("hp680-bl", &pdev->dev, NULL,
 				       &hp680bl_ops, &props);
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 2f177b3..40e4fa8 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -106,6 +106,7 @@ static int jornada_bl_probe(struct platform_device *pdev)
 	struct backlight_device *bd;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = BL_MAX_BRIGHT;
 	bd = backlight_device_register(S1D_DEVICENAME, &pdev->dev, NULL,
 				       &jornada_bl_ops, &props);
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index f439a86..72dd555 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -149,6 +149,7 @@ static int kb3886bl_probe(struct platform_device *pdev)
 		machinfo->limit_mask = -1;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = machinfo->max_intensity;
 	kb3886_backlight_device = backlight_device_register("kb3886-bl",
 							    &pdev->dev, NULL,
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index d2f5901..bbca312 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -184,6 +184,7 @@ static int locomolcd_probe(struct locomo_dev *ldev)
 	local_irq_restore(flags);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 4;
 	locomolcd_bl_device = backlight_device_register("locomo-bl",
 							&ldev->dev, NULL,
diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index 209acc1..07e8e27 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -136,6 +136,7 @@ static int __devinit max8925_backlight_probe(struct platform_device *pdev)
 	data->current_brightness = 0;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = MAX_BRIGHTNESS;
 	bl = backlight_device_register(name, &pdev->dev, data,
 					&max8925_backlight_ops, &props);
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 1485f73..74c3394 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -367,6 +367,7 @@ static int __init mbp_init(void)
 		return -ENXIO;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = 15;
 	mbp_backlight_device = backlight_device_register("mbp_backlight", NULL,
 							 NULL,
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index d3bc562..08d26a7 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -146,6 +146,7 @@ static int omapbl_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = OMAPBL_MAX_INTENSITY;
 	dev = backlight_device_register("omap-bl", &pdev->dev, bl, &omapbl_ops,
 					&props);
diff --git a/drivers/video/backlight/pcf50633-backlight.c b/drivers/video/backlight/pcf50633-backlight.c
index 3c424f7..ef5628d 100644
--- a/drivers/video/backlight/pcf50633-backlight.c
+++ b/drivers/video/backlight/pcf50633-backlight.c
@@ -112,6 +112,7 @@ static int __devinit pcf50633_bl_probe(struct platform_device *pdev)
 	if (!pcf_bl)
 		return -ENOMEM;
 
+	bl_props.type = BACKLIGHT_RAW;
 	bl_props.max_brightness = 0x3f;
 	bl_props.power = FB_BLANK_UNBLANK;
 
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 809278c..6af183d 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -84,6 +84,7 @@ static int progearbl_probe(struct platform_device *pdev)
 	pci_write_config_byte(sb_dev, SB_MPS1, temp | 0x20);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = HW_LEVEL_MAX - HW_LEVEL_MIN;
 	progear_backlight_device = backlight_device_register("progear-bl",
 							     &pdev->dev, NULL,
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 21866ec..2c2a2c7 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -108,6 +108,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		dev_dbg(&pdev->dev, "got pwm for backlight\n");
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = data->max_brightness;
 	bl = backlight_device_register(dev_name(&pdev->dev), &pdev->dev, pb,
 				       &pwm_backlight_ops, &props);
diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c
index 5927db0..322040f 100644
--- a/drivers/video/backlight/s6e63m0.c
+++ b/drivers/video/backlight/s6e63m0.c
@@ -778,6 +778,7 @@ static int __devinit s6e63m0_probe(struct spi_device *spi)
 
 	bd->props.max_brightness = MAX_BRIGHTNESS;
 	bd->props.brightness = MAX_BRIGHTNESS;
+	bd->props.type = BACKLIGHT_RAW;
 	lcd->bd = bd;
 
 	/*
diff --git a/drivers/video/backlight/tosa_bl.c b/drivers/video/backlight/tosa_bl.c
index 2a04b38..425a736 100644
--- a/drivers/video/backlight/tosa_bl.c
+++ b/drivers/video/backlight/tosa_bl.c
@@ -102,6 +102,7 @@ static int __devinit tosa_bl_probe(struct i2c_client *client,
 	data->i2c = client;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 512 - 1;
 	data->bl = backlight_device_register("tosa-bl", &client->dev, data,
 					     &bl_ops, &props);
diff --git a/drivers/video/backlight/wm831x_bl.c b/drivers/video/backlight/wm831x_bl.c
index 08fd87f..d4c6eb2 100644
--- a/drivers/video/backlight/wm831x_bl.c
+++ b/drivers/video/backlight/wm831x_bl.c
@@ -193,6 +193,7 @@ static int wm831x_backlight_probe(struct platform_device *pdev)
 	data->current_brightness = 0;
 	data->isink_reg = isink_reg;
 
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = max_isel;
 	bl = backlight_device_register("wm831x", &pdev->dev, data,
 				       &wm831x_backlight_ops, &props);
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
index e7d0f52..2464b91 100644
--- a/drivers/video/bf54x-lq043fb.c
+++ b/drivers/video/bf54x-lq043fb.c
@@ -649,6 +649,7 @@ static int __devinit bfin_bf54x_probe(struct platform_device *pdev)
 	}
 #ifndef NO_BL_SUPPORT
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 255;
 	bl_dev = backlight_device_register("bf54x-bl", NULL, NULL,
 					   &bfin_lq043fb_bl_ops, &props);
diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c
index 3cf7767..d8de29f 100644
--- a/drivers/video/bfin-t350mcqb-fb.c
+++ b/drivers/video/bfin-t350mcqb-fb.c
@@ -545,6 +545,7 @@ static int __devinit bfin_t350mcqb_probe(struct platform_device *pdev)
 	}
 #ifndef NO_BL_SUPPORT
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 255;
 	bl_dev = backlight_device_register("bf52x-bl", NULL, NULL,
 					   &bfin_lq043fb_bl_ops, &props);
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 69bd4a5..ef72cb4 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -499,6 +499,7 @@ static void imxfb_init_backlight(struct imxfb_info *fbi)
 
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.max_brightness = 0xff;
+	props.type = BACKLIGHT_RAW;
 	writel(fbi->pwmr, fbi->regs + LCDC_PWMR);
 
 	bl = backlight_device_register("imxfb-bl", &fbi->pdev->dev, fbi,
diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c
index 6aac6d1..8471008 100644
--- a/drivers/video/nvidia/nv_backlight.c
+++ b/drivers/video/nvidia/nv_backlight.c
@@ -111,6 +111,7 @@ void nvidia_bl_init(struct nvidia_par *par)
 	snprintf(name, sizeof(name), "nvidiabl%d", info->node);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, info->dev, par, &nvidia_bl_ops,
 				       &props);
diff --git a/drivers/video/omap2/displays/panel-acx565akm.c b/drivers/video/omap2/displays/panel-acx565akm.c
index e773106..7e04c92 100644
--- a/drivers/video/omap2/displays/panel-acx565akm.c
+++ b/drivers/video/omap2/displays/panel-acx565akm.c
@@ -534,6 +534,7 @@ static int acx_panel_probe(struct omap_dss_device *dssdev)
 
 	props.fb_blank = FB_BLANK_UNBLANK;
 	props.power = FB_BLANK_UNBLANK;
+	props.type = BACKLIGHT_RAW;
 
 	bldev = backlight_device_register("acx565akm", &md->spi->dev,
 			md, &acx565akm_bl_ops, &props);
diff --git a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
index 9a138f6..d2b35d2 100644
--- a/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/video/omap2/displays/panel-sharp-ls037v7dw01.c
@@ -99,6 +99,7 @@ static int sharp_ls_panel_probe(struct omap_dss_device *dssdev)
 
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.max_brightness = dssdev->max_backlight_level;
+	props.type = BACKLIGHT_RAW;
 
 	bl = backlight_device_register("sharp-ls", &dssdev->dev, dssdev,
 			&sharp_ls_bl_ops, &props);
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index 61026f9..c74e8b7 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -729,6 +729,8 @@ static int taal_probe(struct omap_dss_device *dssdev)
 		props.max_brightness = 255;
 	else
 		props.max_brightness = 127;
+
+	props.type = BACKLIGHT_RAW;
 	bldev = backlight_device_register("taal", &dssdev->dev, dssdev,
 					  &taal_bl_ops, &props);
 	if (IS_ERR(bldev)) {
diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c
index da38818..d8ab7be 100644
--- a/drivers/video/riva/fbdev.c
+++ b/drivers/video/riva/fbdev.c
@@ -355,6 +355,7 @@ static void riva_bl_init(struct riva_par *par)
 	snprintf(name, sizeof(name), "rivabl%d", info->node);
 
 	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_RAW;
 	props.max_brightness = FB_BACKLIGHT_LEVELS - 1;
 	bd = backlight_device_register(name, info->dev, par, &riva_bl_ops,
 				       &props);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 4a3d52e..5ffc6dd 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -32,6 +32,13 @@ enum backlight_update_reason {
 	BACKLIGHT_UPDATE_SYSFS,
 };
 
+enum backlight_type {
+	BACKLIGHT_RAW = 1,
+	BACKLIGHT_PLATFORM,
+	BACKLIGHT_FIRMWARE,
+	BACKLIGHT_TYPE_MAX,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -62,6 +69,8 @@ struct backlight_properties {
 	/* FB Blanking active? (values as for power) */
 	/* Due to be removed, please use (state & BL_CORE_FBBLANK) */
 	int fb_blank;
+	/* Backlight type */
+	enum backlight_type type;
 	/* Flags used to signal drivers of state changes */
 	/* Upper 4 bits are reserved for driver internal use */
 	unsigned int state;
-- 
cgit v0.10.2


From 63ec0119d3720034dfd626c9785aefa5a6f972ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel@daenzer.net>
Date: Tue, 22 Mar 2011 16:30:23 -0700
Subject: radeon: expose backlight class device for legacy LVDS encoder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows e.g. power management daemons to control the backlight level. Inspired
by the corresponding code in radeonfb.

[mjg@redhat.com: updated to add backlight type and make the connector the parent device]
Signed-off-by: Michel Dänzer <michel@daenzer.net>
Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Airlie <airlied@linux.ie>
Acked-by: Alex Deucher <alexdeucher@gmail.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Tested-by: Sedat Dilek <sedat.dilek@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index 1c02d23..9746fee 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -1,6 +1,7 @@
 config DRM_RADEON_KMS
 	bool "Enable modesetting on radeon by default - NEW DRIVER"
 	depends on DRM_RADEON
+	select BACKLIGHT_CLASS_DEVICE
 	help
 	  Choose this option if you want kernel modesetting enabled by default.
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 3f3c9aa..28c7961 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -40,6 +40,10 @@ radeon_atombios_connected_scratch_regs(struct drm_connector *connector,
 				       struct drm_encoder *encoder,
 				       bool connected);
 
+extern void
+radeon_legacy_backlight_init(struct radeon_encoder *radeon_encoder,
+			     struct drm_connector *drm_connector);
+
 void radeon_connector_hotplug(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -1526,6 +1530,17 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
+	if (connector_type == DRM_MODE_CONNECTOR_LVDS) {
+		struct drm_encoder *drm_encoder;
+
+		list_for_each_entry(drm_encoder, &dev->mode_config.encoder_list, head) {
+			struct radeon_encoder *radeon_encoder;
+
+			radeon_encoder = to_radeon_encoder(drm_encoder);
+			if (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_LVDS)
+				radeon_legacy_backlight_init(radeon_encoder, connector);
+		}
+	}
 	return;
 
 failed:
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 59f834b..5b54268 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -28,6 +28,10 @@
 #include "radeon_drm.h"
 #include "radeon.h"
 #include "atom.h"
+#include <linux/backlight.h>
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
 
 static void radeon_legacy_encoder_disable(struct drm_encoder *encoder)
 {
@@ -39,7 +43,7 @@ static void radeon_legacy_encoder_disable(struct drm_encoder *encoder)
 	radeon_encoder->active_device = 0;
 }
 
-static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
+static void radeon_legacy_lvds_update(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -47,15 +51,23 @@ static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 	uint32_t lvds_gen_cntl, lvds_pll_cntl, pixclks_cntl, disp_pwr_man;
 	int panel_pwr_delay = 2000;
 	bool is_mac = false;
+	uint8_t backlight_level;
 	DRM_DEBUG_KMS("\n");
 
+	lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
+	backlight_level = (lvds_gen_cntl >> RADEON_LVDS_BL_MOD_LEVEL_SHIFT) & 0xff;
+
 	if (radeon_encoder->enc_priv) {
 		if (rdev->is_atom_bios) {
 			struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
 			panel_pwr_delay = lvds->panel_pwr_delay;
+			if (lvds->bl_dev)
+				backlight_level = lvds->backlight_level;
 		} else {
 			struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
 			panel_pwr_delay = lvds->panel_pwr_delay;
+			if (lvds->bl_dev)
+				backlight_level = lvds->backlight_level;
 		}
 	}
 
@@ -82,11 +94,13 @@ static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 		lvds_pll_cntl &= ~RADEON_LVDS_PLL_RESET;
 		WREG32(RADEON_LVDS_PLL_CNTL, lvds_pll_cntl);
 
-		lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
-		lvds_gen_cntl |= (RADEON_LVDS_ON | RADEON_LVDS_EN | RADEON_LVDS_DIGON | RADEON_LVDS_BLON);
+		lvds_gen_cntl &= ~(RADEON_LVDS_DISPLAY_DIS |
+				   RADEON_LVDS_BL_MOD_LEVEL_MASK);
+		lvds_gen_cntl |= (RADEON_LVDS_ON | RADEON_LVDS_EN |
+				  RADEON_LVDS_DIGON | RADEON_LVDS_BLON |
+				  (backlight_level << RADEON_LVDS_BL_MOD_LEVEL_SHIFT));
 		if (is_mac)
 			lvds_gen_cntl |= RADEON_LVDS_BL_MOD_EN;
-		lvds_gen_cntl &= ~(RADEON_LVDS_DISPLAY_DIS);
 		udelay(panel_pwr_delay * 1000);
 		WREG32(RADEON_LVDS_GEN_CNTL, lvds_gen_cntl);
 		break;
@@ -95,7 +109,6 @@ static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 	case DRM_MODE_DPMS_OFF:
 		pixclks_cntl = RREG32_PLL(RADEON_PIXCLKS_CNTL);
 		WREG32_PLL_P(RADEON_PIXCLKS_CNTL, 0, ~RADEON_PIXCLK_LVDS_ALWAYS_ONb);
-		lvds_gen_cntl = RREG32(RADEON_LVDS_GEN_CNTL);
 		lvds_gen_cntl |= RADEON_LVDS_DISPLAY_DIS;
 		if (is_mac) {
 			lvds_gen_cntl &= ~RADEON_LVDS_BL_MOD_EN;
@@ -119,6 +132,25 @@ static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
+static void radeon_legacy_lvds_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	DRM_DEBUG("\n");
+
+	if (radeon_encoder->enc_priv) {
+		if (rdev->is_atom_bios) {
+			struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
+			lvds->dpms_mode = mode;
+		} else {
+			struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
+			lvds->dpms_mode = mode;
+		}
+	}
+
+	radeon_legacy_lvds_update(encoder, mode);
+}
+
 static void radeon_legacy_lvds_prepare(struct drm_encoder *encoder)
 {
 	struct radeon_device *rdev = encoder->dev->dev_private;
@@ -237,9 +269,222 @@ static const struct drm_encoder_helper_funcs radeon_legacy_lvds_helper_funcs = {
 	.disable = radeon_legacy_encoder_disable,
 };
 
+#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
+
+#define MAX_RADEON_LEVEL 0xFF
+
+struct radeon_backlight_privdata {
+	struct radeon_encoder *encoder;
+	uint8_t negative;
+};
+
+static uint8_t radeon_legacy_lvds_level(struct backlight_device *bd)
+{
+	struct radeon_backlight_privdata *pdata = bl_get_data(bd);
+	uint8_t level;
+
+	/* Convert brightness to hardware level */
+	if (bd->props.brightness < 0)
+		level = 0;
+	else if (bd->props.brightness > MAX_RADEON_LEVEL)
+		level = MAX_RADEON_LEVEL;
+	else
+		level = bd->props.brightness;
+
+	if (pdata->negative)
+		level = MAX_RADEON_LEVEL - level;
+
+	return level;
+}
+
+static int radeon_legacy_backlight_update_status(struct backlight_device *bd)
+{
+	struct radeon_backlight_privdata *pdata = bl_get_data(bd);
+	struct radeon_encoder *radeon_encoder = pdata->encoder;
+	struct drm_device *dev = radeon_encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int dpms_mode = DRM_MODE_DPMS_ON;
+
+	if (radeon_encoder->enc_priv) {
+		if (rdev->is_atom_bios) {
+			struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
+			dpms_mode = lvds->dpms_mode;
+			lvds->backlight_level = radeon_legacy_lvds_level(bd);
+		} else {
+			struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
+			dpms_mode = lvds->dpms_mode;
+			lvds->backlight_level = radeon_legacy_lvds_level(bd);
+		}
+	}
+
+	if (bd->props.brightness > 0)
+		radeon_legacy_lvds_update(&radeon_encoder->base, dpms_mode);
+	else
+		radeon_legacy_lvds_update(&radeon_encoder->base, DRM_MODE_DPMS_OFF);
+
+	return 0;
+}
+
+static int radeon_legacy_backlight_get_brightness(struct backlight_device *bd)
+{
+	struct radeon_backlight_privdata *pdata = bl_get_data(bd);
+	struct radeon_encoder *radeon_encoder = pdata->encoder;
+	struct drm_device *dev = radeon_encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	uint8_t backlight_level;
+
+	backlight_level = (RREG32(RADEON_LVDS_GEN_CNTL) >>
+			   RADEON_LVDS_BL_MOD_LEVEL_SHIFT) & 0xff;
+
+	return pdata->negative ? MAX_RADEON_LEVEL - backlight_level : backlight_level;
+}
+
+static const struct backlight_ops radeon_backlight_ops = {
+	.get_brightness = radeon_legacy_backlight_get_brightness,
+	.update_status	= radeon_legacy_backlight_update_status,
+};
+
+void radeon_legacy_backlight_init(struct radeon_encoder *radeon_encoder,
+				  struct drm_connector *drm_connector)
+{
+	struct drm_device *dev = radeon_encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct backlight_device *bd;
+	struct backlight_properties props;
+	struct radeon_backlight_privdata *pdata;
+	uint8_t backlight_level;
+
+	if (!radeon_encoder->enc_priv)
+		return;
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+	if (!pmac_has_backlight_type("ati") &&
+	    !pmac_has_backlight_type("mnca"))
+		return;
+#endif
+
+	pdata = kmalloc(sizeof(struct radeon_backlight_privdata), GFP_KERNEL);
+	if (!pdata) {
+		DRM_ERROR("Memory allocation failed\n");
+		goto error;
+	}
+
+	props.max_brightness = MAX_RADEON_LEVEL;
+	props.type = BACKLIGHT_RAW;
+	bd = backlight_device_register("radeon_bl", &drm_connector->kdev,
+				       pdata, &radeon_backlight_ops, &props);
+	if (IS_ERR(bd)) {
+		DRM_ERROR("Backlight registration failed\n");
+		goto error;
+	}
+
+	pdata->encoder = radeon_encoder;
+
+	backlight_level = (RREG32(RADEON_LVDS_GEN_CNTL) >>
+			   RADEON_LVDS_BL_MOD_LEVEL_SHIFT) & 0xff;
+
+	/* First, try to detect backlight level sense based on the assumption
+	 * that firmware set it up at full brightness
+	 */
+	if (backlight_level == 0)
+		pdata->negative = true;
+	else if (backlight_level == 0xff)
+		pdata->negative = false;
+	else {
+		/* XXX hack... maybe some day we can figure out in what direction
+		 * backlight should work on a given panel?
+		 */
+		pdata->negative = (rdev->family != CHIP_RV200 &&
+				   rdev->family != CHIP_RV250 &&
+				   rdev->family != CHIP_RV280 &&
+				   rdev->family != CHIP_RV350);
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+		pdata->negative = (pdata->negative ||
+				   of_machine_is_compatible("PowerBook4,3") ||
+				   of_machine_is_compatible("PowerBook6,3") ||
+				   of_machine_is_compatible("PowerBook6,5"));
+#endif
+	}
+
+	if (rdev->is_atom_bios) {
+		struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
+		lvds->bl_dev = bd;
+	} else {
+		struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
+		lvds->bl_dev = bd;
+	}
+
+	bd->props.brightness = radeon_legacy_backlight_get_brightness(bd);
+	bd->props.power = FB_BLANK_UNBLANK;
+	backlight_update_status(bd);
+
+	DRM_INFO("radeon legacy LVDS backlight initialized\n");
+
+	return;
+
+error:
+	kfree(pdata);
+	return;
+}
+
+static void radeon_legacy_backlight_exit(struct radeon_encoder *radeon_encoder)
+{
+	struct drm_device *dev = radeon_encoder->base.dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct backlight_device *bd = NULL;
+
+	if (!radeon_encoder->enc_priv)
+		return;
+
+	if (rdev->is_atom_bios) {
+		struct radeon_encoder_atom_dig *lvds = radeon_encoder->enc_priv;
+		bd = lvds->bl_dev;
+		lvds->bl_dev = NULL;
+	} else {
+		struct radeon_encoder_lvds *lvds = radeon_encoder->enc_priv;
+		bd = lvds->bl_dev;
+		lvds->bl_dev = NULL;
+	}
+
+	if (bd) {
+		struct radeon_legacy_backlight_privdata *pdata;
+
+		pdata = bl_get_data(bd);
+		backlight_device_unregister(bd);
+		kfree(pdata);
+
+		DRM_INFO("radeon legacy LVDS backlight unloaded\n");
+	}
+}
+
+#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
+
+void radeon_legacy_backlight_init(struct radeon_encoder *encoder)
+{
+}
+
+static void radeon_legacy_backlight_exit(struct radeon_encoder *encoder)
+{
+}
+
+#endif
+
+
+static void radeon_lvds_enc_destroy(struct drm_encoder *encoder)
+{
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+
+	if (radeon_encoder->enc_priv) {
+		radeon_legacy_backlight_exit(radeon_encoder);
+		kfree(radeon_encoder->enc_priv);
+	}
+	drm_encoder_cleanup(encoder);
+	kfree(radeon_encoder);
+}
 
 static const struct drm_encoder_funcs radeon_legacy_lvds_enc_funcs = {
-	.destroy = radeon_enc_destroy,
+	.destroy = radeon_lvds_enc_destroy,
 };
 
 static void radeon_legacy_primary_dac_dpms(struct drm_encoder *encoder, int mode)
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 5067d18..e458281 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -302,6 +302,9 @@ struct radeon_encoder_lvds {
 	uint32_t lvds_gen_cntl;
 	/* panel mode */
 	struct drm_display_mode native_mode;
+	struct backlight_device *bl_dev;
+	int      dpms_mode;
+	uint8_t  backlight_level;
 };
 
 struct radeon_encoder_tv_dac {
@@ -355,6 +358,9 @@ struct radeon_encoder_atom_dig {
 	uint32_t lcd_ss_id;
 	/* panel mode */
 	struct drm_display_mode native_mode;
+	struct backlight_device *bl_dev;
+	int dpms_mode;
+	uint8_t backlight_level;
 };
 
 struct radeon_encoder_atom_dac {
-- 
cgit v0.10.2


From 7eae3efa13c5d1c8e7d8dd0504ad524963b8add1 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:24 -0700
Subject: nouveau: change the backlight parent device to the connector, not the
 PCI dev

We may eventually end up with per-connector backlights, especially with
ddcci devices.  Make sure that the parent node for the backlight device is
the connector rather than the PCI device.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Airlie <airlied@linux.ie>
Cc: Alex Deucher <alexdeucher@gmail.com>
Acked-by: Ben Skeggs <bskeggs@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Tested-by: Sedat Dilek <sedat.dilek@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 18d7bcc..00a55df 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -88,10 +88,11 @@ static const struct backlight_ops nv50_bl_ops = {
 	.update_status = nv50_set_intensity,
 };
 
-static int nouveau_nv40_backlight_init(struct drm_device *dev)
+static int nouveau_nv40_backlight_init(struct drm_connector *connector)
 {
-	struct backlight_properties props;
+	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
 	if (!(nv_rd32(dev, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
@@ -100,7 +101,7 @@ static int nouveau_nv40_backlight_init(struct drm_device *dev)
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 31;
-	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
+	bd = backlight_device_register("nv_backlight", &connector->kdev, dev,
 				       &nv40_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
@@ -112,10 +113,11 @@ static int nouveau_nv40_backlight_init(struct drm_device *dev)
 	return 0;
 }
 
-static int nouveau_nv50_backlight_init(struct drm_device *dev)
+static int nouveau_nv50_backlight_init(struct drm_connector *connector)
 {
-	struct backlight_properties props;
+	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct backlight_properties props;
 	struct backlight_device *bd;
 
 	if (!nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT))
@@ -124,7 +126,7 @@ static int nouveau_nv50_backlight_init(struct drm_device *dev)
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 1025;
-	bd = backlight_device_register("nv_backlight", &dev->pdev->dev, dev,
+	bd = backlight_device_register("nv_backlight", &connector->kdev, dev,
 				       &nv50_bl_ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
@@ -135,8 +137,9 @@ static int nouveau_nv50_backlight_init(struct drm_device *dev)
 	return 0;
 }
 
-int nouveau_backlight_init(struct drm_device *dev)
+int nouveau_backlight_init(struct drm_connector *connector)
 {
+	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 #ifdef CONFIG_ACPI
@@ -149,9 +152,9 @@ int nouveau_backlight_init(struct drm_device *dev)
 
 	switch (dev_priv->card_type) {
 	case NV_40:
-		return nouveau_nv40_backlight_init(dev);
+		return nouveau_nv40_backlight_init(connector);
 	case NV_50:
-		return nouveau_nv50_backlight_init(dev);
+		return nouveau_nv50_backlight_init(connector);
 	default:
 		break;
 	}
@@ -159,8 +162,9 @@ int nouveau_backlight_init(struct drm_device *dev)
 	return 0;
 }
 
-void nouveau_backlight_exit(struct drm_device *dev)
+void nouveau_backlight_exit(struct drm_connector *connector)
 {
+	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->backlight) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 390d82c..7ae1511 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -116,6 +116,10 @@ nouveau_connector_destroy(struct drm_connector *connector)
 				      nouveau_connector_hotplug, connector);
 	}
 
+	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
+	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		nouveau_backlight_exit(connector);
+
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
@@ -894,6 +898,11 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	}
 
 	drm_sysfs_connector_add(connector);
+
+	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
+	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		nouveau_backlight_init(connector);
+
 	dcb->drm = connector;
 	return dcb->drm;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 0611188..fff180a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -999,15 +999,15 @@ static inline int nouveau_acpi_edid(struct drm_device *dev, struct drm_connector
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
-extern int nouveau_backlight_init(struct drm_device *);
-extern void nouveau_backlight_exit(struct drm_device *);
+extern int nouveau_backlight_init(struct drm_connector *);
+extern void nouveau_backlight_exit(struct drm_connector *);
 #else
-static inline int nouveau_backlight_init(struct drm_device *dev)
+static inline int nouveau_backlight_init(struct drm_connector *dev)
 {
 	return 0;
 }
 
-static inline void nouveau_backlight_exit(struct drm_device *dev) { }
+static inline void nouveau_backlight_exit(struct drm_connector *dev) { }
 #endif
 
 /* nouveau_bios.c */
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 0529491..4fcbd09 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -704,10 +704,6 @@ nouveau_card_init(struct drm_device *dev)
 			goto out_fence;
 	}
 
-	ret = nouveau_backlight_init(dev);
-	if (ret)
-		NV_ERROR(dev, "Error %d registering backlight\n", ret);
-
 	nouveau_fbcon_init(dev);
 	drm_kms_helper_poll_init(dev);
 	return 0;
@@ -759,8 +755,6 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_engine *engine = &dev_priv->engine;
 
-	nouveau_backlight_exit(dev);
-
 	if (!engine->graph.accel_blocked) {
 		nouveau_fence_fini(dev);
 		nouveau_channel_put_unlocked(&dev_priv->channel);
-- 
cgit v0.10.2


From 9661e92c10a9775243c1ecb73373528ed8725a10 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:25 -0700
Subject: acpi: tie ACPI backlight devices to PCI devices if possible

Dual-GPU machines may provide more than one ACPI backlight interface.  Tie
the backlight device to the GPU in order to allow userspace to identify
the correct interface.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Airlie <airlied@linux.ie>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Tested-by: Sedat Dilek <sedat.dilek@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a9eec8c..a18e497 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -782,6 +782,9 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 
 	if (acpi_video_backlight_support()) {
 		struct backlight_properties props;
+		struct pci_dev *pdev;
+		acpi_handle acpi_parent;
+		struct device *parent = NULL;
 		int result;
 		static int count = 0;
 		char *name;
@@ -794,10 +797,20 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 			return;
 		count++;
 
+		acpi_get_parent(device->dev->handle, &acpi_parent);
+
+		pdev = acpi_get_pci_dev(acpi_parent);
+		if (pdev) {
+			parent = &pdev->dev;
+			pci_dev_put(pdev);
+		}
+
 		memset(&props, 0, sizeof(struct backlight_properties));
 		props.type = BACKLIGHT_FIRMWARE;
 		props.max_brightness = device->brightness->count - 3;
-		device->backlight = backlight_device_register(name, NULL, device,
+		device->backlight = backlight_device_register(name,
+							      parent,
+							      device,
 							      &acpi_backlight_ops,
 							      &props);
 		kfree(name);
-- 
cgit v0.10.2


From 23a9847f0d5252872c4fecb5d4f803c523bd9283 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:26 -0700
Subject: mbp_nvidia_bl: remove DMI dependency

This driver only has to deal with two different classes of hardware, but
right now it needs new DMI entries for every new machine. It turns out
that there's an ACPI device that uniquely identifies Apples with backlights,
so this patch reworks the driver into an ACPI one, identifies the hardware
by checking the PCI vendor of the root bridge and strips out all the DMI
code. It also changes the config text to clarify that it works on devices
other than Macbook Pros and GPUs other than nvidia.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Cc: Mourad De Clerck <mourad@aquazul.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 39a4cd2..aa8cf9d 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -244,12 +244,12 @@ config BACKLIGHT_MAX8925
 	  If you have a LCD backlight connected to the WLED output of MAX8925
 	  WLED output, say Y here to enable this driver.
 
-config BACKLIGHT_MBP_NVIDIA
-       tristate "MacBook Pro Nvidia Backlight Driver"
+config BACKLIGHT_MACBOOK
+       tristate "MacBook Backlight Driver"
        depends on X86
        help
-         If you have an Apple Macbook Pro with Nvidia graphics hardware say Y
-	 to enable a driver for its backlight
+         If you have an Apple Macbook say Y to enable a driver for its
+	 backlight
 
 config BACKLIGHT_TOSA
 	tristate "Sharp SL-6000 Backlight Driver"
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 457996c..988d7cd 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_PWM)	+= pwm_bl.o
 obj-$(CONFIG_BACKLIGHT_DA903X)	+= da903x_bl.o
 obj-$(CONFIG_BACKLIGHT_MAX8925)	+= max8925_bl.o
-obj-$(CONFIG_BACKLIGHT_MBP_NVIDIA) += mbp_nvidia_bl.o
+obj-$(CONFIG_BACKLIGHT_MACBOOK)	+= mbp_nvidia_bl.o
 obj-$(CONFIG_BACKLIGHT_TOSA)	+= tosa_bl.o
 obj-$(CONFIG_BACKLIGHT_SAHARA)	+= kb3886_bl.o
 obj-$(CONFIG_BACKLIGHT_WM831X)	+= wm831x_bl.o
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index 74c3394..b22cfb2 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -1,5 +1,5 @@
 /*
- *  Backlight Driver for Nvidia 8600 in Macbook Pro
+ *  Backlight Driver for Macbooks
  *
  *  Copyright (c) Red Hat <mjg@redhat.com>
  *  Based on code from Pommed:
@@ -19,23 +19,27 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/platform_device.h>
 #include <linux/backlight.h>
 #include <linux/err.h>
-#include <linux/dmi.h>
 #include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
 
-static struct backlight_device *mbp_backlight_device;
+static struct backlight_device *mb_backlight_device;
 
-/* Structure to be passed to the DMI_MATCH function. */
-struct dmi_match_data {
+struct hw_data {
 	/* I/O resource to allocate. */
 	unsigned long iostart;
 	unsigned long iolen;
 	/* Backlight operations structure. */
 	const struct backlight_ops backlight_ops;
+	void (*set_brightness)(int);
 };
 
+static const struct hw_data *hw_data;
+
+#define DRIVER "mb_backlight: "
+
 /* Module parameters. */
 static int debug;
 module_param_named(debug, debug, int, 0644);
@@ -44,16 +48,21 @@ MODULE_PARM_DESC(debug, "Set to one to enable debugging messages.");
 /*
  * Implementation for MacBooks with Intel chipset.
  */
+static void intel_chipset_set_brightness(int intensity)
+{
+	outb(0x04 | (intensity << 4), 0xb3);
+	outb(0xbf, 0xb2);
+}
+
 static int intel_chipset_send_intensity(struct backlight_device *bd)
 {
 	int intensity = bd->props.brightness;
 
 	if (debug)
-		printk(KERN_DEBUG "mbp_nvidia_bl: setting brightness to %d\n",
+		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
 		       intensity);
 
-	outb(0x04 | (intensity << 4), 0xb3);
-	outb(0xbf, 0xb2);
+	intel_chipset_set_brightness(intensity);
 	return 0;
 }
 
@@ -66,35 +75,41 @@ static int intel_chipset_get_intensity(struct backlight_device *bd)
 	intensity = inb(0xb3) >> 4;
 
 	if (debug)
-		printk(KERN_DEBUG "mbp_nvidia_bl: read brightness of %d\n",
+		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
 		       intensity);
 
 	return intensity;
 }
 
-static const struct dmi_match_data intel_chipset_data = {
+static const struct hw_data intel_chipset_data = {
 	.iostart = 0xb2,
 	.iolen = 2,
 	.backlight_ops	= {
 		.options	= BL_CORE_SUSPENDRESUME,
 		.get_brightness	= intel_chipset_get_intensity,
 		.update_status	= intel_chipset_send_intensity,
-	}
+	},
+	.set_brightness = intel_chipset_set_brightness,
 };
 
 /*
  * Implementation for MacBooks with Nvidia chipset.
  */
+static void nvidia_chipset_set_brightness(int intensity)
+{
+	outb(0x04 | (intensity << 4), 0x52f);
+	outb(0xbf, 0x52e);
+}
+
 static int nvidia_chipset_send_intensity(struct backlight_device *bd)
 {
 	int intensity = bd->props.brightness;
 
 	if (debug)
-		printk(KERN_DEBUG "mbp_nvidia_bl: setting brightness to %d\n",
+		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
 		       intensity);
 
-	outb(0x04 | (intensity << 4), 0x52f);
-	outb(0xbf, 0x52e);
+	nvidia_chipset_set_brightness(intensity);
 	return 0;
 }
 
@@ -107,295 +122,106 @@ static int nvidia_chipset_get_intensity(struct backlight_device *bd)
 	intensity = inb(0x52f) >> 4;
 
 	if (debug)
-		printk(KERN_DEBUG "mbp_nvidia_bl: read brightness of %d\n",
+		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
 		       intensity);
 
 	return intensity;
 }
 
-static const struct dmi_match_data nvidia_chipset_data = {
+static const struct hw_data nvidia_chipset_data = {
 	.iostart = 0x52e,
 	.iolen = 2,
 	.backlight_ops		= {
 		.options	= BL_CORE_SUSPENDRESUME,
 		.get_brightness	= nvidia_chipset_get_intensity,
 		.update_status	= nvidia_chipset_send_intensity
-	}
+	},
+	.set_brightness = nvidia_chipset_set_brightness,
 };
 
-/*
- * DMI matching.
- */
-static /* const */ struct dmi_match_data *driver_data;
-
-static int mbp_dmi_match(const struct dmi_system_id *id)
+static int __devinit mb_bl_add(struct acpi_device *dev)
 {
-	driver_data = id->driver_data;
+	struct backlight_properties props;
+	struct pci_dev *host;
 
-	printk(KERN_INFO "mbp_nvidia_bl: %s detected\n", id->ident);
-	return 1;
-}
+	host = pci_get_bus_and_slot(0, 0);
 
-static const struct dmi_system_id __initdata mbp_device_table[] = {
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 1,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook1,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 2,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook2,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 3,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook3,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 4,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook4,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 4,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook4,2"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 1,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro1,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 1,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro1,2"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 2,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro2,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 2,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro2,2"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 3,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 3,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro3,2"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 4,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro4,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookAir 1,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir1,1"),
-		},
-		.driver_data	= (void *)&intel_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 5,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,1"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 5,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBook 6,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookAir 2,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir2,1"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 5,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 5,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,2"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 5,3",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,3"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 5,4",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,4"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookPro 5,5",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,5"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookAir 3,1",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir3,1"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{
-		.callback	= mbp_dmi_match,
-		.ident		= "MacBookAir 3,2",
-		.matches	= {
-			DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "MacBookAir3,2"),
-		},
-		.driver_data	= (void *)&nvidia_chipset_data,
-	},
-	{ }
-};
+	if (!host) {
+		printk(KERN_ERR DRIVER "unable to find PCI host\n");
+		return -ENODEV;
+	}
 
-static int __init mbp_init(void)
-{
-	struct backlight_properties props;
-	if (!dmi_check_system(mbp_device_table))
+	if (host->vendor == PCI_VENDOR_ID_INTEL)
+		hw_data = &intel_chipset_data;
+	else if (host->vendor == PCI_VENDOR_ID_NVIDIA)
+		hw_data = &nvidia_chipset_data;
+
+	pci_dev_put(host);
+
+	if (!hw_data) {
+		printk(KERN_ERR DRIVER "unknown hardware\n");
 		return -ENODEV;
+	}
 
-	if (!request_region(driver_data->iostart, driver_data->iolen, 
-						"Macbook Pro backlight"))
+	if (!request_region(hw_data->iostart, hw_data->iolen,
+			    "Macbook backlight"))
 		return -ENXIO;
 
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_PLATFORM;
 	props.max_brightness = 15;
-	mbp_backlight_device = backlight_device_register("mbp_backlight", NULL,
-							 NULL,
-							 &driver_data->backlight_ops,
-							 &props);
-	if (IS_ERR(mbp_backlight_device)) {
-		release_region(driver_data->iostart, driver_data->iolen);
-		return PTR_ERR(mbp_backlight_device);
+	mb_backlight_device = backlight_device_register("mb_backlight", NULL,
+				    NULL, &hw_data->backlight_ops, &props);
+
+	if (IS_ERR(mb_backlight_device)) {
+		release_region(hw_data->iostart, hw_data->iolen);
+		return PTR_ERR(mb_backlight_device);
 	}
 
-	mbp_backlight_device->props.brightness =
-		driver_data->backlight_ops.get_brightness(mbp_backlight_device);
-	backlight_update_status(mbp_backlight_device);
+	mb_backlight_device->props.brightness =
+		hw_data->backlight_ops.get_brightness(mb_backlight_device);
+	backlight_update_status(mb_backlight_device);
 
 	return 0;
 }
 
-static void __exit mbp_exit(void)
+static int __devexit mb_bl_remove(struct acpi_device *dev, int type)
 {
-	backlight_device_unregister(mbp_backlight_device);
+	backlight_device_unregister(mb_backlight_device);
 
-	release_region(driver_data->iostart, driver_data->iolen);
+	release_region(hw_data->iostart, hw_data->iolen);
+	hw_data = NULL;
+	return 0;
+}
+
+static const struct acpi_device_id mb_bl_ids[] = {
+	{"APP0002", 0},
+	{"", 0},
+};
+
+static struct acpi_driver mb_bl_driver = {
+	.name = "Macbook backlight",
+	.ids = mb_bl_ids,
+	.ops = {
+		.add = mb_bl_add,
+		.remove = mb_bl_remove,
+	},
+};
+
+static int __init mb_init(void)
+{
+	return acpi_bus_register_driver(&mb_bl_driver);
+}
+
+static void __exit mb_exit(void)
+{
+	acpi_bus_unregister_driver(&mb_bl_driver);
 }
 
-module_init(mbp_init);
-module_exit(mbp_exit);
+module_init(mb_init);
+module_exit(mb_exit);
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
-MODULE_DESCRIPTION("Nvidia-based Macbook Pro Backlight Driver");
+MODULE_DESCRIPTION("Macbook Backlight Driver");
 MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(dmi, mbp_device_table);
+MODULE_DEVICE_TABLE(acpi, mb_bl_ids);
-- 
cgit v0.10.2


From 99fd28e19429f5212f0758adc1b2f08c6e63b6db Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:27 -0700
Subject: mbp_nvidia_bl: check that the backlight control functions

The SMI-based backlight control functionality may fail to work if the
system is running under EFI rather than BIOS.  Check that the hardware
responds as expected, and exit if it doesn't.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Cc: Mourad De Clerck <mourad@aquazul.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
index b22cfb2..dd0e145 100644
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ b/drivers/video/backlight/mbp_nvidia_bl.c
@@ -143,6 +143,7 @@ static int __devinit mb_bl_add(struct acpi_device *dev)
 {
 	struct backlight_properties props;
 	struct pci_dev *host;
+	int intensity;
 
 	host = pci_get_bus_and_slot(0, 0);
 
@@ -163,6 +164,18 @@ static int __devinit mb_bl_add(struct acpi_device *dev)
 		return -ENODEV;
 	}
 
+	/* Check that the hardware responds - this may not work under EFI */
+
+	intensity = hw_data->backlight_ops.get_brightness(NULL);
+
+	if (!intensity) {
+		hw_data->set_brightness(1);
+		if (!hw_data->backlight_ops.get_brightness(NULL))
+			return -ENODEV;
+
+		hw_data->set_brightness(0);
+	}
+
 	if (!request_region(hw_data->iostart, hw_data->iolen,
 			    "Macbook backlight"))
 		return -ENXIO;
-- 
cgit v0.10.2


From 39b3dee76fa1f1ac8310ea5898ba7c07280bdd9f Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 22 Mar 2011 16:30:28 -0700
Subject: mbp_nvidia_bl: rename to apple_bl

It works on hardware other than Macbook Pros, and it works on GPUs other
than Nvidia.  It should even work on iMacs, so change the name to match
reality more precisely and include an alias so existing users don't get
confused.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Cc: Mourad De Clerck <mourad@aquazul.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index aa8cf9d..39fff49 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -244,11 +244,11 @@ config BACKLIGHT_MAX8925
 	  If you have a LCD backlight connected to the WLED output of MAX8925
 	  WLED output, say Y here to enable this driver.
 
-config BACKLIGHT_MACBOOK
-       tristate "MacBook Backlight Driver"
+config BACKLIGHT_APPLE
+       tristate "Apple Backlight Driver"
        depends on X86
        help
-         If you have an Apple Macbook say Y to enable a driver for its
+         If you have an Intel-based Apple say Y to enable a driver for its
 	 backlight
 
 config BACKLIGHT_TOSA
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 988d7cd..b9ca849 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o
 obj-$(CONFIG_BACKLIGHT_PWM)	+= pwm_bl.o
 obj-$(CONFIG_BACKLIGHT_DA903X)	+= da903x_bl.o
 obj-$(CONFIG_BACKLIGHT_MAX8925)	+= max8925_bl.o
-obj-$(CONFIG_BACKLIGHT_MACBOOK)	+= mbp_nvidia_bl.o
+obj-$(CONFIG_BACKLIGHT_APPLE)	+= apple_bl.o
 obj-$(CONFIG_BACKLIGHT_TOSA)	+= tosa_bl.o
 obj-$(CONFIG_BACKLIGHT_SAHARA)	+= kb3886_bl.o
 obj-$(CONFIG_BACKLIGHT_WM831X)	+= wm831x_bl.o
diff --git a/drivers/video/backlight/apple_bl.c b/drivers/video/backlight/apple_bl.c
new file mode 100644
index 0000000..be98d15
--- /dev/null
+++ b/drivers/video/backlight/apple_bl.c
@@ -0,0 +1,241 @@
+/*
+ *  Backlight Driver for Intel-based Apples
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Based on code from Pommed:
+ *  Copyright (C) 2006 Nicolas Boichat <nicolas @boichat.ch>
+ *  Copyright (C) 2006 Felipe Alfaro Solana <felipe_alfaro @linuxmail.org>
+ *  Copyright (C) 2007 Julien BLACHE <jb@jblache.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This driver triggers SMIs which cause the firmware to change the
+ *  backlight brightness. This is icky in many ways, but it's impractical to
+ *  get at the firmware code in order to figure out what it's actually doing.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/backlight.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/acpi.h>
+
+static struct backlight_device *apple_backlight_device;
+
+struct hw_data {
+	/* I/O resource to allocate. */
+	unsigned long iostart;
+	unsigned long iolen;
+	/* Backlight operations structure. */
+	const struct backlight_ops backlight_ops;
+	void (*set_brightness)(int);
+};
+
+static const struct hw_data *hw_data;
+
+#define DRIVER "apple_backlight: "
+
+/* Module parameters. */
+static int debug;
+module_param_named(debug, debug, int, 0644);
+MODULE_PARM_DESC(debug, "Set to one to enable debugging messages.");
+
+/*
+ * Implementation for machines with Intel chipset.
+ */
+static void intel_chipset_set_brightness(int intensity)
+{
+	outb(0x04 | (intensity << 4), 0xb3);
+	outb(0xbf, 0xb2);
+}
+
+static int intel_chipset_send_intensity(struct backlight_device *bd)
+{
+	int intensity = bd->props.brightness;
+
+	if (debug)
+		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
+		       intensity);
+
+	intel_chipset_set_brightness(intensity);
+	return 0;
+}
+
+static int intel_chipset_get_intensity(struct backlight_device *bd)
+{
+	int intensity;
+
+	outb(0x03, 0xb3);
+	outb(0xbf, 0xb2);
+	intensity = inb(0xb3) >> 4;
+
+	if (debug)
+		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
+		       intensity);
+
+	return intensity;
+}
+
+static const struct hw_data intel_chipset_data = {
+	.iostart = 0xb2,
+	.iolen = 2,
+	.backlight_ops	= {
+		.options	= BL_CORE_SUSPENDRESUME,
+		.get_brightness	= intel_chipset_get_intensity,
+		.update_status	= intel_chipset_send_intensity,
+	},
+	.set_brightness = intel_chipset_set_brightness,
+};
+
+/*
+ * Implementation for machines with Nvidia chipset.
+ */
+static void nvidia_chipset_set_brightness(int intensity)
+{
+	outb(0x04 | (intensity << 4), 0x52f);
+	outb(0xbf, 0x52e);
+}
+
+static int nvidia_chipset_send_intensity(struct backlight_device *bd)
+{
+	int intensity = bd->props.brightness;
+
+	if (debug)
+		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
+		       intensity);
+
+	nvidia_chipset_set_brightness(intensity);
+	return 0;
+}
+
+static int nvidia_chipset_get_intensity(struct backlight_device *bd)
+{
+	int intensity;
+
+	outb(0x03, 0x52f);
+	outb(0xbf, 0x52e);
+	intensity = inb(0x52f) >> 4;
+
+	if (debug)
+		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
+		       intensity);
+
+	return intensity;
+}
+
+static const struct hw_data nvidia_chipset_data = {
+	.iostart = 0x52e,
+	.iolen = 2,
+	.backlight_ops		= {
+		.options	= BL_CORE_SUSPENDRESUME,
+		.get_brightness	= nvidia_chipset_get_intensity,
+		.update_status	= nvidia_chipset_send_intensity
+	},
+	.set_brightness = nvidia_chipset_set_brightness,
+};
+
+static int __devinit apple_bl_add(struct acpi_device *dev)
+{
+	struct backlight_properties props;
+	struct pci_dev *host;
+	int intensity;
+
+	host = pci_get_bus_and_slot(0, 0);
+
+	if (!host) {
+		printk(KERN_ERR DRIVER "unable to find PCI host\n");
+		return -ENODEV;
+	}
+
+	if (host->vendor == PCI_VENDOR_ID_INTEL)
+		hw_data = &intel_chipset_data;
+	else if (host->vendor == PCI_VENDOR_ID_NVIDIA)
+		hw_data = &nvidia_chipset_data;
+
+	pci_dev_put(host);
+
+	if (!hw_data) {
+		printk(KERN_ERR DRIVER "unknown hardware\n");
+		return -ENODEV;
+	}
+
+	/* Check that the hardware responds - this may not work under EFI */
+
+	intensity = hw_data->backlight_ops.get_brightness(NULL);
+
+	if (!intensity) {
+		hw_data->set_brightness(1);
+		if (!hw_data->backlight_ops.get_brightness(NULL))
+			return -ENODEV;
+
+		hw_data->set_brightness(0);
+	}
+
+	if (!request_region(hw_data->iostart, hw_data->iolen,
+			    "Apple backlight"))
+		return -ENXIO;
+
+	memset(&props, 0, sizeof(struct backlight_properties));
+	props.type = BACKLIGHT_PLATFORM;
+	props.max_brightness = 15;
+	apple_backlight_device = backlight_device_register("apple_backlight",
+				  NULL, NULL, &hw_data->backlight_ops, &props);
+
+	if (IS_ERR(apple_backlight_device)) {
+		release_region(hw_data->iostart, hw_data->iolen);
+		return PTR_ERR(apple_backlight_device);
+	}
+
+	apple_backlight_device->props.brightness =
+		hw_data->backlight_ops.get_brightness(apple_backlight_device);
+	backlight_update_status(apple_backlight_device);
+
+	return 0;
+}
+
+static int __devexit apple_bl_remove(struct acpi_device *dev, int type)
+{
+	backlight_device_unregister(apple_backlight_device);
+
+	release_region(hw_data->iostart, hw_data->iolen);
+	hw_data = NULL;
+	return 0;
+}
+
+static const struct acpi_device_id apple_bl_ids[] = {
+	{"APP0002", 0},
+	{"", 0},
+};
+
+static struct acpi_driver apple_bl_driver = {
+	.name = "Apple backlight",
+	.ids = apple_bl_ids,
+	.ops = {
+		.add = apple_bl_add,
+		.remove = apple_bl_remove,
+	},
+};
+
+static int __init apple_bl_init(void)
+{
+	return acpi_bus_register_driver(&apple_bl_driver);
+}
+
+static void __exit apple_bl_exit(void)
+{
+	acpi_bus_unregister_driver(&apple_bl_driver);
+}
+
+module_init(apple_bl_init);
+module_exit(apple_bl_exit);
+
+MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_DESCRIPTION("Apple Backlight Driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(acpi, apple_bl_ids);
+MODULE_ALIAS("mbp_nvidia_bl");
diff --git a/drivers/video/backlight/mbp_nvidia_bl.c b/drivers/video/backlight/mbp_nvidia_bl.c
deleted file mode 100644
index dd0e145..0000000
--- a/drivers/video/backlight/mbp_nvidia_bl.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- *  Backlight Driver for Macbooks
- *
- *  Copyright (c) Red Hat <mjg@redhat.com>
- *  Based on code from Pommed:
- *  Copyright (C) 2006 Nicolas Boichat <nicolas @boichat.ch>
- *  Copyright (C) 2006 Felipe Alfaro Solana <felipe_alfaro @linuxmail.org>
- *  Copyright (C) 2007 Julien BLACHE <jb@jblache.org>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  This driver triggers SMIs which cause the firmware to change the
- *  backlight brightness. This is icky in many ways, but it's impractical to
- *  get at the firmware code in order to figure out what it's actually doing.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/backlight.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-
-static struct backlight_device *mb_backlight_device;
-
-struct hw_data {
-	/* I/O resource to allocate. */
-	unsigned long iostart;
-	unsigned long iolen;
-	/* Backlight operations structure. */
-	const struct backlight_ops backlight_ops;
-	void (*set_brightness)(int);
-};
-
-static const struct hw_data *hw_data;
-
-#define DRIVER "mb_backlight: "
-
-/* Module parameters. */
-static int debug;
-module_param_named(debug, debug, int, 0644);
-MODULE_PARM_DESC(debug, "Set to one to enable debugging messages.");
-
-/*
- * Implementation for MacBooks with Intel chipset.
- */
-static void intel_chipset_set_brightness(int intensity)
-{
-	outb(0x04 | (intensity << 4), 0xb3);
-	outb(0xbf, 0xb2);
-}
-
-static int intel_chipset_send_intensity(struct backlight_device *bd)
-{
-	int intensity = bd->props.brightness;
-
-	if (debug)
-		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
-		       intensity);
-
-	intel_chipset_set_brightness(intensity);
-	return 0;
-}
-
-static int intel_chipset_get_intensity(struct backlight_device *bd)
-{
-	int intensity;
-
-	outb(0x03, 0xb3);
-	outb(0xbf, 0xb2);
-	intensity = inb(0xb3) >> 4;
-
-	if (debug)
-		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
-		       intensity);
-
-	return intensity;
-}
-
-static const struct hw_data intel_chipset_data = {
-	.iostart = 0xb2,
-	.iolen = 2,
-	.backlight_ops	= {
-		.options	= BL_CORE_SUSPENDRESUME,
-		.get_brightness	= intel_chipset_get_intensity,
-		.update_status	= intel_chipset_send_intensity,
-	},
-	.set_brightness = intel_chipset_set_brightness,
-};
-
-/*
- * Implementation for MacBooks with Nvidia chipset.
- */
-static void nvidia_chipset_set_brightness(int intensity)
-{
-	outb(0x04 | (intensity << 4), 0x52f);
-	outb(0xbf, 0x52e);
-}
-
-static int nvidia_chipset_send_intensity(struct backlight_device *bd)
-{
-	int intensity = bd->props.brightness;
-
-	if (debug)
-		printk(KERN_DEBUG DRIVER "setting brightness to %d\n",
-		       intensity);
-
-	nvidia_chipset_set_brightness(intensity);
-	return 0;
-}
-
-static int nvidia_chipset_get_intensity(struct backlight_device *bd)
-{
-	int intensity;
-
-	outb(0x03, 0x52f);
-	outb(0xbf, 0x52e);
-	intensity = inb(0x52f) >> 4;
-
-	if (debug)
-		printk(KERN_DEBUG DRIVER "read brightness of %d\n",
-		       intensity);
-
-	return intensity;
-}
-
-static const struct hw_data nvidia_chipset_data = {
-	.iostart = 0x52e,
-	.iolen = 2,
-	.backlight_ops		= {
-		.options	= BL_CORE_SUSPENDRESUME,
-		.get_brightness	= nvidia_chipset_get_intensity,
-		.update_status	= nvidia_chipset_send_intensity
-	},
-	.set_brightness = nvidia_chipset_set_brightness,
-};
-
-static int __devinit mb_bl_add(struct acpi_device *dev)
-{
-	struct backlight_properties props;
-	struct pci_dev *host;
-	int intensity;
-
-	host = pci_get_bus_and_slot(0, 0);
-
-	if (!host) {
-		printk(KERN_ERR DRIVER "unable to find PCI host\n");
-		return -ENODEV;
-	}
-
-	if (host->vendor == PCI_VENDOR_ID_INTEL)
-		hw_data = &intel_chipset_data;
-	else if (host->vendor == PCI_VENDOR_ID_NVIDIA)
-		hw_data = &nvidia_chipset_data;
-
-	pci_dev_put(host);
-
-	if (!hw_data) {
-		printk(KERN_ERR DRIVER "unknown hardware\n");
-		return -ENODEV;
-	}
-
-	/* Check that the hardware responds - this may not work under EFI */
-
-	intensity = hw_data->backlight_ops.get_brightness(NULL);
-
-	if (!intensity) {
-		hw_data->set_brightness(1);
-		if (!hw_data->backlight_ops.get_brightness(NULL))
-			return -ENODEV;
-
-		hw_data->set_brightness(0);
-	}
-
-	if (!request_region(hw_data->iostart, hw_data->iolen,
-			    "Macbook backlight"))
-		return -ENXIO;
-
-	memset(&props, 0, sizeof(struct backlight_properties));
-	props.type = BACKLIGHT_PLATFORM;
-	props.max_brightness = 15;
-	mb_backlight_device = backlight_device_register("mb_backlight", NULL,
-				    NULL, &hw_data->backlight_ops, &props);
-
-	if (IS_ERR(mb_backlight_device)) {
-		release_region(hw_data->iostart, hw_data->iolen);
-		return PTR_ERR(mb_backlight_device);
-	}
-
-	mb_backlight_device->props.brightness =
-		hw_data->backlight_ops.get_brightness(mb_backlight_device);
-	backlight_update_status(mb_backlight_device);
-
-	return 0;
-}
-
-static int __devexit mb_bl_remove(struct acpi_device *dev, int type)
-{
-	backlight_device_unregister(mb_backlight_device);
-
-	release_region(hw_data->iostart, hw_data->iolen);
-	hw_data = NULL;
-	return 0;
-}
-
-static const struct acpi_device_id mb_bl_ids[] = {
-	{"APP0002", 0},
-	{"", 0},
-};
-
-static struct acpi_driver mb_bl_driver = {
-	.name = "Macbook backlight",
-	.ids = mb_bl_ids,
-	.ops = {
-		.add = mb_bl_add,
-		.remove = mb_bl_remove,
-	},
-};
-
-static int __init mb_init(void)
-{
-	return acpi_bus_register_driver(&mb_bl_driver);
-}
-
-static void __exit mb_exit(void)
-{
-	acpi_bus_unregister_driver(&mb_bl_driver);
-}
-
-module_init(mb_init);
-module_exit(mb_exit);
-
-MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
-MODULE_DESCRIPTION("Macbook Backlight Driver");
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(acpi, mb_bl_ids);
-- 
cgit v0.10.2


From b372412e159897c20ce2ad100f51c5d94ae98845 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Mar 2011 16:30:29 -0700
Subject: backlight: apple_bl depends on ACPI

apple_bl uses ACPI interfaces (data & code), so it should depend on ACPI.

  drivers/video/backlight/apple_bl.c:142: warning: 'struct acpi_device' declared inside parameter list
  drivers/video/backlight/apple_bl.c:142: warning: its scope is only this definition or declaration, which is probably not what you want
  drivers/video/backlight/apple_bl.c:201: warning: 'struct acpi_device' declared inside parameter list
  drivers/video/backlight/apple_bl.c:215: error: variable 'apple_bl_driver' has initializer but incomplete type
  drivers/video/backlight/apple_bl.c:216: error: unknown field 'name' specified in initializer
  ...

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index 39fff49..0c9373b 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -246,10 +246,10 @@ config BACKLIGHT_MAX8925
 
 config BACKLIGHT_APPLE
        tristate "Apple Backlight Driver"
-       depends on X86
+       depends on X86 && ACPI
        help
          If you have an Intel-based Apple say Y to enable a driver for its
-	 backlight
+	 backlight.
 
 config BACKLIGHT_TOSA
 	tristate "Sharp SL-6000 Backlight Driver"
-- 
cgit v0.10.2


From 0508e04e05cdf9b819210f5d5a12264e5eeff1f3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 22 Mar 2011 16:30:30 -0700
Subject: drivers/video/backlight/jornada720_*.c: make needlessly global
 symbols static

The following symbols are needlessly defined global: jornada_bl_init,
jornada_bl_exit, jornada_lcd_init, jornada_lcd_exit.

Make them static.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Acked-by: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index 40e4fa8..de65d80 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -147,12 +147,12 @@ static struct platform_driver jornada_bl_driver = {
 	},
 };
 
-int __init jornada_bl_init(void)
+static int __init jornada_bl_init(void)
 {
 	return platform_driver_register(&jornada_bl_driver);
 }
 
-void __exit jornada_bl_exit(void)
+static void __exit jornada_bl_exit(void)
 {
 	platform_driver_unregister(&jornada_bl_driver);
 }
diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c
index cbbb167..d2ff658 100644
--- a/drivers/video/backlight/jornada720_lcd.c
+++ b/drivers/video/backlight/jornada720_lcd.c
@@ -135,12 +135,12 @@ static struct platform_driver jornada_lcd_driver = {
 	},
 };
 
-int __init jornada_lcd_init(void)
+static int __init jornada_lcd_init(void)
 {
 	return platform_driver_register(&jornada_lcd_driver);
 }
 
-void __exit jornada_lcd_exit(void)
+static void __exit jornada_lcd_exit(void)
 {
 	platform_driver_unregister(&jornada_lcd_driver);
 }
-- 
cgit v0.10.2


From ef0a5e80f56f6409e957e7117da9551c3d3ff239 Mon Sep 17 00:00:00 2001
From: Robert Morell <rmorell@nvidia.com>
Date: Tue, 22 Mar 2011 16:30:31 -0700
Subject: pwm_backlight: add check_fb() hook

In systems with multiple framebuffer devices, one of the devices might be
blanked while another is unblanked.  In order for the backlight blanking
logic to know whether to turn off the backlight for a particular
framebuffer's blanking notification, it needs to be able to check if a
given framebuffer device corresponds to the backlight.

This plumbs the check_fb hook from core backlight through the
pwm_backlight helper to allow platform code to plug in a check_fb hook.

Signed-off-by: Robert Morell <rmorell@nvidia.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Cc: Arun Murthy <arun.murthy@stericsson.com>
Cc: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 2c2a2c7..b8f38ec 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -28,6 +28,7 @@ struct pwm_bl_data {
 	unsigned int		lth_brightness;
 	int			(*notify)(struct device *,
 					  int brightness);
+	int			(*check_fb)(struct device *, struct fb_info *);
 };
 
 static int pwm_backlight_update_status(struct backlight_device *bl)
@@ -62,9 +63,18 @@ static int pwm_backlight_get_brightness(struct backlight_device *bl)
 	return bl->props.brightness;
 }
 
+static int pwm_backlight_check_fb(struct backlight_device *bl,
+				  struct fb_info *info)
+{
+	struct pwm_bl_data *pb = dev_get_drvdata(&bl->dev);
+
+	return !pb->check_fb || pb->check_fb(pb->dev, info);
+}
+
 static const struct backlight_ops pwm_backlight_ops = {
 	.update_status	= pwm_backlight_update_status,
 	.get_brightness	= pwm_backlight_get_brightness,
+	.check_fb	= pwm_backlight_check_fb,
 };
 
 static int pwm_backlight_probe(struct platform_device *pdev)
@@ -95,6 +105,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
 	pb->period = data->pwm_period_ns;
 	pb->notify = data->notify;
+	pb->check_fb = data->check_fb;
 	pb->lth_brightness = data->lth_brightness *
 		(data->pwm_period_ns / data->max_brightness);
 	pb->dev = &pdev->dev;
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index e031e1a..5e3e25a 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -4,6 +4,8 @@
 #ifndef __LINUX_PWM_BACKLIGHT_H
 #define __LINUX_PWM_BACKLIGHT_H
 
+#include <linux/backlight.h>
+
 struct platform_pwm_backlight_data {
 	int pwm_id;
 	unsigned int max_brightness;
@@ -13,6 +15,7 @@ struct platform_pwm_backlight_data {
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
 	void (*exit)(struct device *dev);
+	int (*check_fb)(struct device *dev, struct fb_info *info);
 };
 
 #endif
-- 
cgit v0.10.2


From 89699605fe7cfd8611900346f61cb6cbf179b10a Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 22 Mar 2011 16:30:36 -0700
Subject: mm: vmap area cache

Provide a free area cache for the vmalloc virtual address allocator, based
on the algorithm used by the user virtual memory allocator.

This reduces the number of rbtree operations and linear traversals over
the vmap extents in order to find a free area, by starting off at the last
point that a free area was found.

The free area cache is reset if areas are freed behind it, or if we are
searching for a smaller area or alignment than last time.  So allocation
patterns are not changed (verified by corner-case and random test cases in
userspace testing).

This solves a regression caused by lazy vunmap TLB purging introduced in
db64fe02 (mm: rewrite vmap layer).  That patch will leave extents in the
vmap allocator after they are vunmapped, and until a significant number
accumulate that can be flushed in a single batch.  So in a workload that
vmalloc/vfree frequently, a chain of extents will build up from
VMALLOC_START address, which have to be iterated over each time (giving an
O(n) type of behaviour).

After this patch, the search will start from where it left off, giving
closer to an amortized O(1).

This is verified to solve regressions reported Steven in GFS2, and Avi in
KVM.

Hugh's update:

: I tried out the recent mmotm, and on one machine was fortunate to hit
: the BUG_ON(first->va_start < addr) which seems to have been stalling
: your vmap area cache patch ever since May.

: I can get you addresses etc, I did dump a few out; but once I stared
: at them, it was easier just to look at the code: and I cannot see how
: you would be so sure that first->va_start < addr, once you've done
: that addr = ALIGN(max(...), align) above, if align is over 0x1000
: (align was 0x8000 or 0x4000 in the cases I hit: ioremaps like Steve).

: I originally got around it by just changing the
: 		if (first->va_start < addr) {
: to
: 		while (first->va_start < addr) {
: without thinking about it any further; but that seemed unsatisfactory,
: why would we want to loop here when we've got another very similar
: loop just below it?

: I am never going to admit how long I've spent trying to grasp your
: "while (n)" rbtree loop just above this, the one with the peculiar
: 		if (!first && tmp->va_start < addr + size)
: in.  That's unfamiliar to me, I'm guessing it's designed to save a
: subsequent rb_next() in a few circumstances (at risk of then setting
: a wrong cached_hole_size?); but they did appear few to me, and I didn't
: feel I could sign off something with that in when I don't grasp it,
: and it seems responsible for extra code and mistaken BUG_ON below it.

: I've reverted to the familiar rbtree loop that find_vma() does (but
: with va_end >= addr as you had, to respect the additional guard page):
: and then (given that cached_hole_size starts out 0) I don't see the
: need for any complications below it.  If you do want to keep that loop
: as you had it, please add a comment to explain what it's trying to do,
: and where addr is relative to first when you emerge from it.

: Aren't your tests "size <= cached_hole_size" and
: "addr + size > first->va_start" forgetting the guard page we want
: before the next area?  I've changed those.

: I have not changed your many "addr + size - 1 < addr" overflow tests,
: but have since come to wonder, shouldn't they be "addr + size < addr"
: tests - won't the vend checks go wrong if addr + size is 0?

: I have added a few comments - Wolfgang Wander's 2.6.13 description of
: 1363c3cd8603a913a27e2995dccbd70d5312d8e6 Avoiding mmap fragmentation
: helped me a lot, perhaps a pointer to that would be good too.  And I found
: it easier to understand when I renamed cached_start slightly and moved the
: overflow label down.

: This patch would go after your mm-vmap-area-cache.patch in mmotm.
: Trivially, nobody is going to get that BUG_ON with this patch, and it
: appears to work fine on my machines; but I have not given it anything like
: the testing you did on your original, and may have broken all the
: performance you were aiming for.  Please take a look and test it out
: integrate with yours if you're satisfied - thanks.

[akpm@linux-foundation.org: add locking comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reported-and-tested-by: Steven Whitehouse <swhiteho@redhat.com>
Reported-and-tested-by: Avi Kivity <avi@redhat.com>
Tested-by: "Barry J. Marson" <bmarson@redhat.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f9b1667..fc77ada 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -261,8 +261,15 @@ struct vmap_area {
 };
 
 static DEFINE_SPINLOCK(vmap_area_lock);
-static struct rb_root vmap_area_root = RB_ROOT;
 static LIST_HEAD(vmap_area_list);
+static struct rb_root vmap_area_root = RB_ROOT;
+
+/* The vmap cache globals are protected by vmap_area_lock */
+static struct rb_node *free_vmap_cache;
+static unsigned long cached_hole_size;
+static unsigned long cached_vstart;
+static unsigned long cached_align;
+
 static unsigned long vmap_area_pcpu_hole;
 
 static struct vmap_area *__find_vmap_area(unsigned long addr)
@@ -331,9 +338,11 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	struct rb_node *n;
 	unsigned long addr;
 	int purged = 0;
+	struct vmap_area *first;
 
 	BUG_ON(!size);
 	BUG_ON(size & ~PAGE_MASK);
+	BUG_ON(!is_power_of_2(align));
 
 	va = kmalloc_node(sizeof(struct vmap_area),
 			gfp_mask & GFP_RECLAIM_MASK, node);
@@ -341,79 +350,106 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 		return ERR_PTR(-ENOMEM);
 
 retry:
-	addr = ALIGN(vstart, align);
-
 	spin_lock(&vmap_area_lock);
-	if (addr + size - 1 < addr)
-		goto overflow;
+	/*
+	 * Invalidate cache if we have more permissive parameters.
+	 * cached_hole_size notes the largest hole noticed _below_
+	 * the vmap_area cached in free_vmap_cache: if size fits
+	 * into that hole, we want to scan from vstart to reuse
+	 * the hole instead of allocating above free_vmap_cache.
+	 * Note that __free_vmap_area may update free_vmap_cache
+	 * without updating cached_hole_size or cached_align.
+	 */
+	if (!free_vmap_cache ||
+			size < cached_hole_size ||
+			vstart < cached_vstart ||
+			align < cached_align) {
+nocache:
+		cached_hole_size = 0;
+		free_vmap_cache = NULL;
+	}
+	/* record if we encounter less permissive parameters */
+	cached_vstart = vstart;
+	cached_align = align;
+
+	/* find starting point for our search */
+	if (free_vmap_cache) {
+		first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
+		addr = ALIGN(first->va_end + PAGE_SIZE, align);
+		if (addr < vstart)
+			goto nocache;
+		if (addr + size - 1 < addr)
+			goto overflow;
+
+	} else {
+		addr = ALIGN(vstart, align);
+		if (addr + size - 1 < addr)
+			goto overflow;
 
-	/* XXX: could have a last_hole cache */
-	n = vmap_area_root.rb_node;
-	if (n) {
-		struct vmap_area *first = NULL;
+		n = vmap_area_root.rb_node;
+		first = NULL;
 
-		do {
+		while (n) {
 			struct vmap_area *tmp;
 			tmp = rb_entry(n, struct vmap_area, rb_node);
 			if (tmp->va_end >= addr) {
-				if (!first && tmp->va_start < addr + size)
-					first = tmp;
-				n = n->rb_left;
-			} else {
 				first = tmp;
+				if (tmp->va_start <= addr)
+					break;
+				n = n->rb_left;
+			} else
 				n = n->rb_right;
-			}
-		} while (n);
+		}
 
 		if (!first)
 			goto found;
-
-		if (first->va_end < addr) {
-			n = rb_next(&first->rb_node);
-			if (n)
-				first = rb_entry(n, struct vmap_area, rb_node);
-			else
-				goto found;
-		}
-
-		while (addr + size > first->va_start && addr + size <= vend) {
-			addr = ALIGN(first->va_end + PAGE_SIZE, align);
-			if (addr + size - 1 < addr)
-				goto overflow;
-
-			n = rb_next(&first->rb_node);
-			if (n)
-				first = rb_entry(n, struct vmap_area, rb_node);
-			else
-				goto found;
-		}
 	}
-found:
-	if (addr + size > vend) {
-overflow:
-		spin_unlock(&vmap_area_lock);
-		if (!purged) {
-			purge_vmap_area_lazy();
-			purged = 1;
-			goto retry;
-		}
-		if (printk_ratelimit())
-			printk(KERN_WARNING
-				"vmap allocation for size %lu failed: "
-				"use vmalloc=<size> to increase size.\n", size);
-		kfree(va);
-		return ERR_PTR(-EBUSY);
+
+	/* from the starting point, walk areas until a suitable hole is found */
+	while (addr + size >= first->va_start && addr + size <= vend) {
+		if (addr + cached_hole_size < first->va_start)
+			cached_hole_size = first->va_start - addr;
+		addr = ALIGN(first->va_end + PAGE_SIZE, align);
+		if (addr + size - 1 < addr)
+			goto overflow;
+
+		n = rb_next(&first->rb_node);
+		if (n)
+			first = rb_entry(n, struct vmap_area, rb_node);
+		else
+			goto found;
 	}
 
-	BUG_ON(addr & (align-1));
+found:
+	if (addr + size > vend)
+		goto overflow;
 
 	va->va_start = addr;
 	va->va_end = addr + size;
 	va->flags = 0;
 	__insert_vmap_area(va);
+	free_vmap_cache = &va->rb_node;
 	spin_unlock(&vmap_area_lock);
 
+	BUG_ON(va->va_start & (align-1));
+	BUG_ON(va->va_start < vstart);
+	BUG_ON(va->va_end > vend);
+
 	return va;
+
+overflow:
+	spin_unlock(&vmap_area_lock);
+	if (!purged) {
+		purge_vmap_area_lazy();
+		purged = 1;
+		goto retry;
+	}
+	if (printk_ratelimit())
+		printk(KERN_WARNING
+			"vmap allocation for size %lu failed: "
+			"use vmalloc=<size> to increase size.\n", size);
+	kfree(va);
+	return ERR_PTR(-EBUSY);
 }
 
 static void rcu_free_va(struct rcu_head *head)
@@ -426,6 +462,22 @@ static void rcu_free_va(struct rcu_head *head)
 static void __free_vmap_area(struct vmap_area *va)
 {
 	BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+
+	if (free_vmap_cache) {
+		if (va->va_end < cached_vstart) {
+			free_vmap_cache = NULL;
+		} else {
+			struct vmap_area *cache;
+			cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
+			if (va->va_start <= cache->va_start) {
+				free_vmap_cache = rb_prev(&va->rb_node);
+				/*
+				 * We don't try to update cached_hole_size or
+				 * cached_align, but it won't go very wrong.
+				 */
+			}
+		}
+	}
 	rb_erase(&va->rb_node, &vmap_area_root);
 	RB_CLEAR_NODE(&va->rb_node);
 	list_del_rcu(&va->list);
-- 
cgit v0.10.2


From d527caf22e48480b102c7c6ee5b9ba12170148f7 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 22 Mar 2011 16:30:38 -0700
Subject: mm: compaction: prevent kswapd compacting memory to reduce CPU usage

This patch reverts 5a03b051 ("thp: use compaction in kswapd for GFP_ATOMIC
order > 0") due to reports stating that kswapd CPU usage was higher and
IRQs were being disabled more frequently.  This was reported at
http://www.spinics.net/linux/fedora/alsa-user/msg09885.html.

Without this patch applied, CPU usage by kswapd hovers around the 20% mark
according to the tester (Arthur Marsh:
http://www.spinics.net/linux/fedora/alsa-user/msg09899.html).  With this
patch applied, it's around 2%.

The problem is not related to THP which specifies __GFP_NO_KSWAPD but is
triggered by high-order allocations hitting the low watermark for their
order and waking kswapd on kernels with CONFIG_COMPACTION set.  The most
common trigger for this is network cards configured for jumbo frames but
it's also possible it'll be triggered by fork-heavy workloads (order-1)
and some wireless cards which depend on order-1 allocations.

The symptoms for the user will be high CPU usage by kswapd in low-memory
situations which could be confused with another writeback problem.  While
a patch like 5a03b051 may be reintroduced in the future, this patch plays
it safe for now and reverts it.

[mel@csn.ul.ie: Beefed up the changelog]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reported-by: Arthur Marsh <arthur.marsh@internode.on.net>
Tested-by: Arthur Marsh <arthur.marsh@internode.on.net>
Cc: <stable@kernel.org>		[2.6.38.1]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index dfa2ed4..cc9f7a4 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,9 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-#define COMPACT_MODE_DIRECT_RECLAIM	0
-#define COMPACT_MODE_KSWAPD		1
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -28,8 +25,7 @@ extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			bool sync);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 extern unsigned long compact_zone_order(struct zone *zone, int order,
-					gfp_t gfp_mask, bool sync,
-					int compact_mode);
+					gfp_t gfp_mask, bool sync);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -74,8 +70,7 @@ static inline unsigned long compaction_suitable(struct zone *zone, int order)
 }
 
 static inline unsigned long compact_zone_order(struct zone *zone, int order,
-					       gfp_t gfp_mask, bool sync,
-					       int compact_mode)
+					       gfp_t gfp_mask, bool sync)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 8be430b..dcb058b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -42,8 +42,6 @@ struct compact_control {
 	unsigned int order;		/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-
-	int compact_mode;
 };
 
 static unsigned long release_freepages(struct list_head *freelist)
@@ -397,10 +395,7 @@ static int compact_finished(struct zone *zone,
 		return COMPACT_COMPLETE;
 
 	/* Compaction run is not finished if the watermark is not met */
-	if (cc->compact_mode != COMPACT_MODE_KSWAPD)
-		watermark = low_wmark_pages(zone);
-	else
-		watermark = high_wmark_pages(zone);
+	watermark = low_wmark_pages(zone);
 	watermark += (1 << cc->order);
 
 	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
@@ -413,15 +408,6 @@ static int compact_finished(struct zone *zone,
 	if (cc->order == -1)
 		return COMPACT_CONTINUE;
 
-	/*
-	 * Generating only one page of the right order is not enough
-	 * for kswapd, we must continue until we're above the high
-	 * watermark as a pool for high order GFP_ATOMIC allocations
-	 * too.
-	 */
-	if (cc->compact_mode == COMPACT_MODE_KSWAPD)
-		return COMPACT_CONTINUE;
-
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		/* Job done if page is free of the right migratetype */
@@ -543,8 +529,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync,
-				 int compact_mode)
+				 bool sync)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -553,7 +538,6 @@ unsigned long compact_zone_order(struct zone *zone,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
-		.compact_mode = compact_mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -599,8 +583,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync,
-					    COMPACT_MODE_DIRECT_RECLAIM);
+		status = compact_zone_order(zone, order, gfp_mask, sync);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
@@ -631,7 +614,6 @@ static int compact_node(int nid)
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
-			.compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
 		};
 
 		zone = &pgdat->node_zones[zoneid];
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6771ea7..3b4a41d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2397,7 +2397,6 @@ loop_again:
 		 * cause too much scanning of the lower zones.
 		 */
 		for (i = 0; i <= end_zone; i++) {
-			int compaction;
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 
@@ -2428,24 +2427,9 @@ loop_again:
 			sc.nr_reclaimed += reclaim_state->reclaimed_slab;
 			total_scanned += sc.nr_scanned;
 
-			compaction = 0;
-			if (order &&
-			    zone_watermark_ok(zone, 0,
-					       high_wmark_pages(zone),
-					      end_zone, 0) &&
-			    !zone_watermark_ok(zone, order,
-					       high_wmark_pages(zone),
-					       end_zone, 0)) {
-				compact_zone_order(zone,
-						   order,
-						   sc.gfp_mask, false,
-						   COMPACT_MODE_KSWAPD);
-				compaction = 1;
-			}
-
 			if (zone->all_unreclaimable)
 				continue;
-			if (!compaction && nr_slab == 0 &&
+			if (nr_slab == 0 &&
 			    !zone_reclaimable(zone))
 				zone->all_unreclaimable = 1;
 			/*
-- 
cgit v0.10.2


From 9d502c1c8d47b337c378c2ac8eaeee7918ad16b1 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:30:39 -0700
Subject: mm/compaction: check migrate_pages's return value instead of
 list_empty()

Many migrate_page's caller check return value instead of list_empy by
cf608ac19c ("mm: compaction: fix COMPACTPAGEFAILED counting").  This patch
makes compaction's migrate_pages consistent with others.  This patch
should not change old behavior.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/compaction.c b/mm/compaction.c
index dcb058b..38ce488 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -494,12 +494,13 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
 		unsigned long nr_migrate, nr_remaining;
+		int err;
 
 		if (!isolate_migratepages(zone, cc))
 			continue;
 
 		nr_migrate = cc->nr_migratepages;
-		migrate_pages(&cc->migratepages, compaction_alloc,
+		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
 				cc->sync);
 		update_nr_listpages(cc);
@@ -513,7 +514,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 						nr_remaining);
 
 		/* Release LRU pages not migrated */
-		if (!list_empty(&cc->migratepages)) {
+		if (err) {
 			putback_lru_pages(&cc->migratepages);
 			cc->nr_migratepages = 0;
 		}
-- 
cgit v0.10.2


From 504f52b5439aaf26d3e2c1d45ec10fce38c8dd27 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Mar 2011 16:30:41 -0700
Subject: mm: NUMA aware alloc_task_struct_node()

All kthreads being created from a single helper task, they all use memory
from a single node for their kernel stack and task struct.

This patch suite creates kthread_create_on_cpu(), adding a 'cpu' parameter
to parameters already used by kthread_create().

This parameter serves in allocating memory for the new kthread on its
memory node if available.

Users of this new function are : ksoftirqd, kworker, migration, pktgend...

This patch:

Add a node parameter to alloc_task_struct(), and change its name to
alloc_task_struct_node()

This change is needed to allow NUMA aware kthread_create_on_cpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index 3744f2e..4b789ab 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -137,7 +137,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define	KSTK_ESP(tsk)	((tsk)->thread.frame0->sp)
 
 /* Allocation and freeing of basic task resources. */
-extern struct task_struct *alloc_task_struct(void);
+extern struct task_struct *alloc_task_struct_node(int node);
 extern void free_task_struct(struct task_struct *p);
 
 #define cpu_relax()    barrier()
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index efad120..9d35975 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -44,9 +44,10 @@ asmlinkage void ret_from_fork(void);
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
-struct task_struct *alloc_task_struct(void)
+struct task_struct *alloc_task_struct_node(int node)
 {
-	struct task_struct *p = kmalloc(THREAD_SIZE, GFP_KERNEL);
+	struct task_struct *p = kmalloc_node(THREAD_SIZE, GFP_KERNEL, node);
+
 	if (p)
 		atomic_set((atomic_t *)(p+1), 1);
 	return p;
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index b6a5ba2..342004bb 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -84,7 +84,14 @@ struct thread_info {
 #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
 
 #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-#define alloc_task_struct()	((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
+#define alloc_task_struct_node(node)						\
+({										\
+	struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP,	\
+					     KERNEL_STACK_SIZE_ORDER);		\
+	struct task_struct *ret = page ? page_address(page) : NULL;		\
+										\
+	ret;
+})
 #define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
 
 #endif /* !__ASSEMBLY */
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index bed6688..d1d1b0d 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -66,7 +66,7 @@ struct thread_struct {
 	.request		= { 0 } \
 }
 
-extern struct task_struct *alloc_task_struct(void);
+extern struct task_struct *alloc_task_struct_node(int node);
 
 static inline void release_thread(struct task_struct *task)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 05b92c4..cffbe8a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -109,8 +109,10 @@ int nr_processes(void)
 }
 
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-# define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
-# define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
+# define alloc_task_struct_node(node)		\
+		kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
+# define free_task_struct(tsk)			\
+		kmem_cache_free(task_struct_cachep, (tsk))
 static struct kmem_cache *task_struct_cachep;
 #endif
 
@@ -249,12 +251,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	struct task_struct *tsk;
 	struct thread_info *ti;
 	unsigned long *stackend;
-
+	int node = numa_node_id();
 	int err;
 
 	prepare_to_copy(orig);
 
-	tsk = alloc_task_struct();
+	tsk = alloc_task_struct_node(node);
 	if (!tsk)
 		return NULL;
 
-- 
cgit v0.10.2


From b6a84016bd2598e35ead635147fa53619982648d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Mar 2011 16:30:42 -0700
Subject: mm: NUMA aware alloc_thread_info_node()

Add a node parameter to alloc_thread_info(), and change its name to
alloc_thread_info_node()

This change is needed to allow NUMA aware kthread_create_on_cpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 9177606..29b74a1 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -68,7 +68,7 @@ struct thread_info {
 #define init_thread_info	(init_thread_union.thread_info)
 
 /* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define alloc_thread_info(tsk, node) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
 #define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index 11f33ea..8582e9c 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -84,16 +84,11 @@ register struct thread_info *__current_thread_info asm("gr15");
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-	({							\
-		struct thread_info *ret;			\
-								\
-		ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
-								\
-		ret;						\
-	})
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk)	kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk)				\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info)	kfree(info)
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 342004bb..6392908 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -59,11 +59,12 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define alloc_thread_info_node(tsk, node)	\
+		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_info(tsk)	((struct thread_info *) 0)
+#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
 #define free_thread_info(ti)	/* nothing */
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 71faff5..0227dba 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -96,16 +96,11 @@ static inline struct thread_info *current_thread_info(void)
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-	({							\
-		struct thread_info *ret;			\
-	 							\
-	 	ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
-								\
-	 	ret;						\
-	 })
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info) kfree(info)
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d309556..d71160d 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -88,9 +88,11 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) \
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) \
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(info) kfree(info)
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index aa07a4a..8d53f09 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -124,9 +124,11 @@ static inline unsigned long current_stack_pointer(void)
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kzalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #else
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node)			\
+		kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #endif
 
 #define free_thread_info(ti)	kfree((ti))
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 65eb859..d8529ef 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -72,7 +72,7 @@ struct thread_info {
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
 extern void free_thread_info(struct thread_info *ti);
 
 #endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8303a6c..f74f355 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1218,11 +1218,11 @@ void __ppc64_runlatch_off(void)
 
 static struct kmem_cache *thread_info_cache;
 
-struct thread_info *alloc_thread_info(struct task_struct *tsk)
+struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
 {
 	struct thread_info *ti;
 
-	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node);
 	if (unlikely(ti == NULL))
 		return NULL;
 #ifdef CONFIG_DEBUG_STACK_USAGE
diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h
index 8570d08..2205c62 100644
--- a/arch/score/include/asm/thread_info.h
+++ b/arch/score/include/asm/thread_info.h
@@ -71,7 +71,7 @@ struct thread_info {
 register struct thread_info *__current_thread_info __asm__("r28");
 #define current_thread_info()	__current_thread_info
 
-#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
+#define alloc_thread_info_node(tsk, node) kmalloc_node(THREAD_SIZE, GFP_KERNEL, node)
 #define free_thread_info(info) kfree(info)
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index c2289469..ea2d508 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -95,7 +95,7 @@ static inline struct thread_info *current_thread_info(void)
 
 #endif
 
-extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node);
 extern void free_thread_info(struct thread_info *ti);
 extern void arch_task_cache_init(void);
 #define arch_task_cache_init arch_task_cache_init
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index dcb126d..f39ad57 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -32,16 +32,16 @@ void free_thread_xstate(struct task_struct *tsk)
 #if THREAD_SHIFT < PAGE_SHIFT
 static struct kmem_cache *thread_info_cache;
 
-struct thread_info *alloc_thread_info(struct task_struct *tsk)
+struct thread_info *alloc_thread_info(struct task_struct *tsk, int node)
 {
 	struct thread_info *ti;
-
-	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
-	if (unlikely(ti == NULL))
-		return NULL;
 #ifdef CONFIG_DEBUG_STACK_USAGE
-	memset(ti, 0, THREAD_SIZE);
+	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+	gfp_t mask = GFP_KERNEL;
 #endif
+
+	ti = kmem_cache_alloc_node(thread_info_cache, mask, node);
 	return ti;
 }
 
@@ -64,7 +64,9 @@ struct thread_info *alloc_thread_info(struct task_struct *tsk)
 #else
 	gfp_t mask = GFP_KERNEL;
 #endif
-	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+	struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+
+	return page ? page_address(page) : NULL;
 }
 
 void free_thread_info(struct thread_info *ti)
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 9dd0318..fa57532 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -82,8 +82,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
-#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
+BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info_node, int)
+#define alloc_thread_info_node(tsk, node) BTFIXUP_CALL(alloc_thread_info_node)(node)
 
 BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define free_thread_info(ti) BTFIXUP_CALL(free_thread_info)(ti)
@@ -92,7 +92,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 
 /*
  * Size of kernel stack for each process.
- * Observe the order of get_free_pages() in alloc_thread_info().
+ * Observe the order of get_free_pages() in alloc_thread_info_node().
  * The sun4 has 8K stack too, because it's short on memory, and 16K is a waste.
  */
 #define THREAD_SIZE		8192
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index fb2ea77..60d86be 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -146,21 +146,21 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-({								\
-	struct thread_info *ret;				\
-								\
-	ret = (struct thread_info *)				\
-	  __get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER);	\
-	if (ret)						\
-		memset(ret, 0, PAGE_SIZE<<__THREAD_INFO_ORDER);	\
-	ret;							\
-})
+#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
 #else
-#define alloc_thread_info(tsk) \
-	((struct thread_info *)__get_free_pages(GFP_KERNEL, __THREAD_INFO_ORDER))
+#define THREAD_FLAGS (GFP_KERNEL)
 #endif
 
+#define alloc_thread_info_node(tsk, node)				\
+({									\
+	struct page *page = alloc_pages_node(node, THREAD_FLAGS,	\
+					     __THREAD_INFO_ORDER);	\
+	struct thread_info *ret;					\
+									\
+	ret = page ? page_address(page) : NULL;				\
+	ret;								\
+})
+
 #define free_thread_info(ti) \
 	free_pages((unsigned long)(ti),__THREAD_INFO_ORDER)
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 92319aa..fe09fd8 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -650,7 +650,7 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
  * mappings on the kernel stack without any special code as we did
  * need on the sun4c.
  */
-static struct thread_info *srmmu_alloc_thread_info(void)
+static struct thread_info *srmmu_alloc_thread_info_node(int node)
 {
 	struct thread_info *ret;
 
@@ -2271,7 +2271,7 @@ void __init ld_mmu_srmmu(void)
 
 	BTFIXUPSET_CALL(mmu_info, srmmu_mmu_info, BTFIXUPCALL_NORM);
 
-	BTFIXUPSET_CALL(alloc_thread_info, srmmu_alloc_thread_info, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(alloc_thread_info_node, srmmu_alloc_thread_info_node, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_thread_info, srmmu_free_thread_info, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_CALL(pte_to_pgoff, srmmu_pte_to_pgoff, BTFIXUPCALL_NORM);
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index b5137cc..a2350b5 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -922,7 +922,7 @@ static inline void garbage_collect(int entry)
 	free_locked_segment(BUCKET_ADDR(entry));
 }
 
-static struct thread_info *sun4c_alloc_thread_info(void)
+static struct thread_info *sun4c_alloc_thread_info_node(int node)
 {
 	unsigned long addr, pages;
 	int entry;
@@ -2155,7 +2155,7 @@ void __init ld_mmu_sun4c(void)
 	BTFIXUPSET_CALL(__swp_offset, sun4c_swp_offset, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(__swp_entry, sun4c_swp_entry, BTFIXUPCALL_NORM);
 
-	BTFIXUPSET_CALL(alloc_thread_info, sun4c_alloc_thread_info, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(alloc_thread_info_node, sun4c_alloc_thread_info_node, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_CALL(mmu_info, sun4c_mmu_info, BTFIXUPCALL_NORM);
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 9e8e9c4..3405b52 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -84,7 +84,7 @@ register unsigned long stack_pointer __asm__("sp");
   ((struct thread_info *)(stack_pointer & -THREAD_SIZE))
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-extern struct thread_info *alloc_thread_info(struct task_struct *task);
+extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
 extern void free_thread_info(struct thread_info *info);
 
 /* Sit on a nap instruction until interrupted. */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index b9cd962..d006510 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -109,7 +109,7 @@ void cpu_idle(void)
 	}
 }
 
-struct thread_info *alloc_thread_info(struct task_struct *task)
+struct thread_info *alloc_thread_info_node(struct task_struct *task, int node)
 {
 	struct page *page;
 	gfp_t flags = GFP_KERNEL;
@@ -118,7 +118,7 @@ struct thread_info *alloc_thread_info(struct task_struct *task)
 	flags |= __GFP_ZERO;
 #endif
 
-	page = alloc_pages(flags, THREAD_SIZE_ORDER);
+	page = alloc_pages_node(node, flags, THREAD_SIZE_ORDER);
 	if (!page)
 		return NULL;
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f0b6e5d..1f2e61e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -161,8 +161,14 @@ struct thread_info {
 
 #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
 
-#define alloc_thread_info(tsk)						\
-	((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+#define alloc_thread_info_node(tsk, node)				\
+({									\
+	struct page *page = alloc_pages_node(node, THREAD_FLAGS,	\
+					     THREAD_ORDER);		\
+	struct thread_info *ret = page ? page_address(page) : NULL;	\
+									\
+	ret;								\
+})
 
 #ifdef CONFIG_X86_32
 
diff --git a/kernel/fork.c b/kernel/fork.c
index cffbe8a..cbc6adc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -117,14 +117,17 @@ static struct kmem_cache *task_struct_cachep;
 #endif
 
 #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+						  int node)
 {
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	gfp_t mask = GFP_KERNEL | __GFP_ZERO;
 #else
 	gfp_t mask = GFP_KERNEL;
 #endif
-	return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+	struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+
+	return page ? page_address(page) : NULL;
 }
 
 static inline void free_thread_info(struct thread_info *ti)
@@ -260,7 +263,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	if (!tsk)
 		return NULL;
 
-	ti = alloc_thread_info(tsk);
+	ti = alloc_thread_info_node(tsk, node);
 	if (!ti) {
 		free_task_struct(tsk);
 		return NULL;
-- 
cgit v0.10.2


From 207205a2ba2655652fe46a60b49838af6c16a919 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Mar 2011 16:30:44 -0700
Subject: kthread: NUMA aware kthread_create_on_node()

All kthreads being created from a single helper task, they all use memory
from a single node for their kernel stack and task struct.

This patch suite creates kthread_create_on_node(), adding a 'cpu' parameter
to parameters already used by kthread_create().

This parameter serves in allocating memory for the new kthread on its
memory node if possible.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 7ff16f7..1e923e5 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -4,10 +4,15 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[], ...)
-	__attribute__((format(printf, 3, 4)));
+struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
+					   void *data,
+					   int node,
+					   const char namefmt[], ...)
+	__attribute__((format(printf, 4, 5)));
+
+#define kthread_create(threadfn, data, namefmt, arg...) \
+	kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
+
 
 /**
  * kthread_run - create and wake a thread.
@@ -34,6 +39,7 @@ void *kthread_data(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
+extern int tsk_fork_get_node(struct task_struct *tsk);
 
 /*
  * Simple work processor based on kthread.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c15936f..4b601be 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1471,6 +1471,7 @@ struct task_struct {
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
 	short il_next;
+	short pref_node_fork;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
diff --git a/kernel/fork.c b/kernel/fork.c
index cbc6adc..a8f64f8e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
 #include <linux/tracehook.h>
 #include <linux/futex.h>
 #include <linux/compat.h>
+#include <linux/kthread.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/rcupdate.h>
 #include <linux/ptrace.h>
@@ -254,7 +255,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	struct task_struct *tsk;
 	struct thread_info *ti;
 	unsigned long *stackend;
-	int node = numa_node_id();
+	int node = tsk_fork_get_node(orig);
 	int err;
 
 	prepare_to_copy(orig);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c55afba..684ab3f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,6 +27,7 @@ struct kthread_create_info
 	/* Information passed to kthread() from kthreadd. */
 	int (*threadfn)(void *data);
 	void *data;
+	int node;
 
 	/* Result passed back to kthread_create() from kthreadd. */
 	struct task_struct *result;
@@ -98,10 +99,23 @@ static int kthread(void *_create)
 	do_exit(ret);
 }
 
+/* called from do_fork() to get node information for about to be created task */
+int tsk_fork_get_node(struct task_struct *tsk)
+{
+#ifdef CONFIG_NUMA
+	if (tsk == kthreadd_task)
+		return tsk->pref_node_fork;
+#endif
+	return numa_node_id();
+}
+
 static void create_kthread(struct kthread_create_info *create)
 {
 	int pid;
 
+#ifdef CONFIG_NUMA
+	current->pref_node_fork = create->node;
+#endif
 	/* We want our own signal handler (we take no signals by default). */
 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
 	if (pid < 0) {
@@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create)
 }
 
 /**
- * kthread_create - create a kthread.
+ * kthread_create_on_node - create a kthread.
  * @threadfn: the function to run until signal_pending(current).
  * @data: data ptr for @threadfn.
+ * @node: memory node number.
  * @namefmt: printf-style name for the thread.
  *
  * Description: This helper function creates and names a kernel
  * thread.  The thread will be stopped: use wake_up_process() to start
  * it.  See also kthread_run().
  *
+ * If thread is going to be bound on a particular cpu, give its node
+ * in @node, to get NUMA affinity for kthread stack, or else give -1.
  * When woken, the thread will run @threadfn() with @data as its
  * argument. @threadfn() can either call do_exit() directly if it is a
  * standalone thread for which noone will call kthread_stop(), or
@@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create)
  *
  * Returns a task_struct or ERR_PTR(-ENOMEM).
  */
-struct task_struct *kthread_create(int (*threadfn)(void *data),
-				   void *data,
-				   const char namefmt[],
-				   ...)
+struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
+					   void *data,
+					   int node,
+					   const char namefmt[],
+					   ...)
 {
 	struct kthread_create_info create;
 
 	create.threadfn = threadfn;
 	create.data = data;
+	create.node = node;
 	init_completion(&create.done);
 
 	spin_lock(&kthread_create_lock);
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 	}
 	return create.result;
 }
-EXPORT_SYMBOL(kthread_create);
+EXPORT_SYMBOL(kthread_create_on_node);
 
 /**
  * kthread_bind - bind a just-created kthread to a cpu.
-- 
cgit v0.10.2


From 94dcf29a11b3d20a28790598d701f98484a969da Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Mar 2011 16:30:45 -0700
Subject: kthread: use kthread_create_on_node()

ksoftirqd, kworker, migration, and pktgend kthreads can be created with
kthread_create_on_node(), to get proper NUMA affinities for their stack and
task_struct.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 56e5dec..735d870 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+		p = kthread_create_on_node(run_ksoftirqd,
+					   hcpu,
+					   cpu_to_node(hotcpu),
+					   "ksoftirqd/%d", hotcpu);
 		if (IS_ERR(p)) {
 			printk("ksoftirqd for %i failed\n", hotcpu);
 			return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b..e3516b2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 		BUG_ON(stopper->thread || stopper->enabled ||
 		       !list_empty(&stopper->works));
-		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
-				   cpu);
+		p = kthread_create_on_node(cpu_stopper_thread,
+					   stopper,
+					   cpu_to_node(cpu),
+					   "migration/%d", cpu);
 		if (IS_ERR(p))
 			return notifier_from_errno(PTR_ERR(p));
 		get_task_struct(p);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ca7ce9..04ef830 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	worker->id = id;
 
 	if (!on_unbound_cpu)
-		worker->task = kthread_create(worker_thread, worker,
-					      "kworker/%u:%d", gcwq->cpu, id);
+		worker->task = kthread_create_on_node(worker_thread,
+						      worker,
+						      cpu_to_node(gcwq->cpu),
+						      "kworker/%u:%d", gcwq->cpu, id);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d", id);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0c55eaa..aeeece7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu)
 	list_add_tail(&t->th_list, &pktgen_threads);
 	init_completion(&t->start_done);
 
-	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
+	p = kthread_create_on_node(pktgen_thread_worker,
+				   t,
+				   cpu_to_node(cpu),
+				   "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
 		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
-- 
cgit v0.10.2


From ddd588b5dd55f14320379961e47683db4e4c1d90 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:30:46 -0700
Subject: oom: suppress nodes that are not allowed from meminfo on oom kill

The oom killer is extremely verbose for machines with a large number of
cpus and/or nodes.  This verbosity can often be harmful if it causes other
important messages to be scrolled from the kernel log and incurs a
signicant time delay, specifically for kernels with CONFIG_NODES_SHIFT >
8.

This patch causes only memory information to be displayed for nodes that
are allowed by current's cpuset when dumping the VM state.  Information
for all other nodes is irrelevant to the oom condition; we don't care if
there's an abundance of memory elsewhere if we can't access it.

This only affects the behavior of dumping memory information when an oom
is triggered.  Other dumps, such as for sysrq+m, still display the
unfiltered form when using the existing show_mem() interface.

Additionally, the per-cpu pageset statistics are extremely verbose in oom
killer output, so it is now suppressed.  This removes

	nodes_weight(current->mems_allowed) * (1 + nr_cpus)

lines from the oom killer output.

Callers may use __show_mem(SHOW_MEM_FILTER_NODES) to filter disallowed
nodes.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 581703d..1f82adc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -859,7 +859,14 @@ extern void pagefault_out_of_memory(void);
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
+/*
+ * Flags passed to __show_mem() and __show_free_areas() to suppress output in
+ * various contexts.
+ */
+#define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
+
 extern void show_free_areas(void);
+extern void __show_free_areas(unsigned int flags);
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
@@ -1348,6 +1355,7 @@ extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(void);
+extern void __show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
diff --git a/lib/show_mem.c b/lib/show_mem.c
index fdc77c8..d8d602b 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -9,14 +9,14 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void show_mem(void)
+void __show_mem(unsigned int filter)
 {
 	pg_data_t *pgdat;
 	unsigned long total = 0, reserved = 0, shared = 0,
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	show_free_areas();
+	__show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
@@ -61,3 +61,8 @@ void show_mem(void)
 		quicklist_total_size());
 #endif
 }
+
+void show_mem(void)
+{
+	__show_mem(0);
+}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 33b5861..3100bc5 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -406,7 +406,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(current);
 	dump_stack();
 	mem_cgroup_print_oom_info(mem, p);
-	show_mem();
+	__show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
 		dump_tasks(mem, nodemask);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7945247..36be3ba 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2411,19 +2411,42 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 }
 #endif
 
+/*
+ * Determine whether the zone's node should be displayed or not, depending on
+ * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ */
+static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+{
+	bool ret = false;
+
+	if (!(flags & SHOW_MEM_FILTER_NODES))
+		goto out;
+
+	get_mems_allowed();
+	ret = !node_isset(zone->zone_pgdat->node_id,
+				cpuset_current_mems_allowed);
+	put_mems_allowed();
+out:
+	return ret;
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 
 /*
  * Show free area list (used inside shift_scroll-lock stuff)
  * We also calculate the percentage fragmentation. We do this by counting the
  * memory on each free list with the exception of the first item on the list.
+ * Suppresses nodes that are not allowed by current's cpuset if
+ * SHOW_MEM_FILTER_NODES is passed.
  */
-void show_free_areas(void)
+void __show_free_areas(unsigned int filter)
 {
 	int cpu;
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s per-cpu:\n", zone->name);
 
@@ -2465,6 +2488,8 @@ void show_free_areas(void)
 	for_each_populated_zone(zone) {
 		int i;
 
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s"
 			" free:%lukB"
@@ -2532,6 +2557,8 @@ void show_free_areas(void)
 	for_each_populated_zone(zone) {
  		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s: ", zone->name);
 
@@ -2551,6 +2578,11 @@ void show_free_areas(void)
 	show_swap_cache_info();
 }
 
+void show_free_areas(void)
+{
+	__show_free_areas(0);
+}
+
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
 	zoneref->zone = zone;
-- 
cgit v0.10.2


From 29423e77c06cee7d4e335ef4a7cbd949da978c91 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:30:47 -0700
Subject: oom: suppress show_mem() for many nodes in irq context on page alloc
 failure

When a page allocation failure occurs, show_mem() is called to dump the
state of the VM so users may understand what happened to get into that
condition.

This output, however, can be extremely verbose.  In irq context, it may
result in significant delays that incur NMI watchdog timeouts when the
machine is large (we use CONFIG_NODES_SHIFT > 8 here to define a "large"
machine since the length of the show_mem() output is proportional to the
number of possible nodes).

This patch suppresses the show_mem() call in irq context when the kernel
has CONFIG_NODES_SHIFT > 8.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 36be3ba..2aaafe8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1714,6 +1714,20 @@ try_next_zone:
 	return page;
 }
 
+/*
+ * Large machines with many possible nodes should not always dump per-node
+ * meminfo in irq context.
+ */
+static inline bool should_suppress_show_mem(void)
+{
+	bool ret = false;
+
+#if NODES_SHIFT > 8
+	ret = in_interrupt();
+#endif
+	return ret;
+}
+
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 				unsigned long pages_reclaimed)
@@ -2161,7 +2175,8 @@ nopage:
 			" order:%d, mode:0x%x\n",
 			current->comm, order, gfp_mask);
 		dump_stack();
-		show_mem();
+		if (!should_suppress_show_mem())
+			show_mem();
 	}
 	return page;
 got_pg:
-- 
cgit v0.10.2


From cbf978bfb12d7deca97d7333f65eda0381a072de Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:30:48 -0700
Subject: oom: suppress nodes that are not allowed from meminfo on page alloc
 failure

Displaying extremely verbose meminfo for all nodes on the system is
overkill for page allocation failures when the context restricts that
allocation to only a subset of nodes.  We don't particularly care about
the state of all nodes when some are not allowed in the current context,
they can have an abundance of memory but we can't allocate from that part
of memory.

This patch suppresses disallowed nodes from the meminfo dump on a page
allocation failure if the context requires it.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2aaafe8..36a168e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2171,12 +2171,25 @@ rebalance:
 
 nopage:
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
-		printk(KERN_WARNING "%s: page allocation failure."
-			" order:%d, mode:0x%x\n",
+		unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+		/*
+		 * This documents exceptions given to allocations in certain
+		 * contexts that are allowed to allocate outside current's set
+		 * of allowed nodes.
+		 */
+		if (!(gfp_mask & __GFP_NOMEMALLOC))
+			if (test_thread_flag(TIF_MEMDIE) ||
+			    (current->flags & (PF_MEMALLOC | PF_EXITING)))
+				filter &= ~SHOW_MEM_FILTER_NODES;
+		if (in_interrupt() || !wait)
+			filter &= ~SHOW_MEM_FILTER_NODES;
+
+		pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
 			current->comm, order, gfp_mask);
 		dump_stack();
 		if (!should_suppress_show_mem())
-			show_mem();
+			__show_mem(filter);
 	}
 	return page;
 got_pg:
-- 
cgit v0.10.2


From 5fda1bd5b8869574dad8e1f9f71e23bf0c186274 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 22 Mar 2011 16:30:49 -0700
Subject: mm: notifier_from_errno() cleanup

While looking at some other notifier callbacks I noticed this code could
use a simple cleanup.

notifier_from_errno() no longer needs the if (ret)/else conditional.  That
same conditional is now done in notifier_from_errno().

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 5bffada..59a3cd4 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -243,12 +243,7 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
 		break;
 	}
 
-	if (ret)
-		ret = notifier_from_errno(ret);
-	else
-		ret = NOTIFY_OK;
-
-	return ret;
+	return notifier_from_errno(ret);
 }
 
 #endif
diff --git a/mm/slab.c b/mm/slab.c
index a18ba57..568803f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1390,7 +1390,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
 		break;
 	}
 out:
-	return ret ? notifier_from_errno(ret) : NOTIFY_OK;
+	return notifier_from_errno(ret);
 }
 #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
 
-- 
cgit v0.10.2


From 318b275fbca1ab9ec0862de71420e0e92c3d1aa7 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 22 Mar 2011 16:30:51 -0700
Subject: mm: allow GUP to fail instead of waiting on a page

GUP user may want to try to acquire a reference to a page if it is already
in memory, but not if IO, to bring it in, is needed.  For example KVM may
tell vcpu to schedule another guest process if current one is trying to
access swapped out page.  Meanwhile, the page will be swapped in and the
guest process, that depends on it, will be able to run again.

This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and
FOLL_NOWAIT follow_page flags.  FAULT_FLAG_RETRY_NOWAIT, when used in
conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that
it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY
instead.

[akpm@linux-foundation.org: improve FOLL_NOWAIT comment]
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f82adc..901435e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -151,6 +151,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -1545,6 +1546,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
+#define FOLL_NOWAIT	0x20	/* if a disk transfer is needed, start the IO
+				 * and return without waiting upon it */
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d3..312b6eb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -621,8 +621,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 		__lock_page(page);
 		return 1;
 	} else {
-		up_read(&mm->mmap_sem);
-		wait_on_page_locked(page);
+		if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+			up_read(&mm->mmap_sem);
+			wait_on_page_locked(page);
+		}
 		return 0;
 	}
 }
diff --git a/mm/memory.c b/mm/memory.c
index e48945a..615be51 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1569,6 +1569,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 					fault_flags |= FAULT_FLAG_WRITE;
 				if (nonblocking)
 					fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+				if (foll_flags & FOLL_NOWAIT)
+					fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
 
 				ret = handle_mm_fault(mm, vma, start,
 							fault_flags);
@@ -1595,7 +1597,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 					tsk->min_flt++;
 
 				if (ret & VM_FAULT_RETRY) {
-					*nonblocking = 0;
+					if (nonblocking)
+						*nonblocking = 0;
 					return i;
 				}
 
-- 
cgit v0.10.2


From ef6a3c63112e865d632ff7c478ba7c7160cad0d1 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 22 Mar 2011 16:30:52 -0700
Subject: mm: add replace_page_cache_page() function

This function basically does:

     remove_from_page_cache(old);
     page_cache_release(old);
     add_to_page_cache_locked(new);

Except it does this atomically, so there's no possibility for the "add" to
fail because of a race.

If memory cgroups are enabled, then the memory cgroup charge is also moved
from the old page to the new.

This function is currently used by fuse to move pages into the page cache
on read, instead of copying the page contents.

[minchan.kim@gmail.com: add freepage() hook to replace_page_cache_page()]
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 213d3cf..640fc22 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -737,14 +737,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (WARN_ON(PageMlocked(oldpage)))
 		goto out_fallback_unlock;
 
-	remove_from_page_cache(oldpage);
-	page_cache_release(oldpage);
-
-	err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
+	err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
 	if (err) {
-		printk(KERN_WARNING "fuse_try_move_page: failed to add page");
-		goto out_fallback_unlock;
+		unlock_page(newpage);
+		return err;
 	}
+
 	page_cache_get(newpage);
 
 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f512e18..a1a1e53 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -96,7 +96,7 @@ extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem);
 
 extern int
 mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr);
+	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
 extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
@@ -249,7 +249,7 @@ static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem)
 
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-	struct mem_cgroup **ptr)
+	struct mem_cgroup **ptr, gfp_t gfp_mask)
 {
 	return 0;
 }
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9c66e99..26946ad 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -457,6 +457,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
+int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
diff --git a/mm/filemap.c b/mm/filemap.c
index 312b6eb..c1459f2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -387,6 +387,76 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 EXPORT_SYMBOL(filemap_write_and_wait_range);
 
 /**
+ * replace_page_cache_page - replace a pagecache page with a new one
+ * @old:	page to be replaced
+ * @new:	page to replace with
+ * @gfp_mask:	allocation mode
+ *
+ * This function replaces a page in the pagecache with a new one.  On
+ * success it acquires the pagecache reference for the new page and
+ * drops it for the old page.  Both the old and new pages must be
+ * locked.  This function does not add the new page to the LRU, the
+ * caller must do that.
+ *
+ * The remove + add is atomic.  The only way this function can fail is
+ * memory allocation failure.
+ */
+int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
+{
+	int error;
+	struct mem_cgroup *memcg = NULL;
+
+	VM_BUG_ON(!PageLocked(old));
+	VM_BUG_ON(!PageLocked(new));
+	VM_BUG_ON(new->mapping);
+
+	/*
+	 * This is not page migration, but prepare_migration and
+	 * end_migration does enough work for charge replacement.
+	 *
+	 * In the longer term we probably want a specialized function
+	 * for moving the charge from old to new in a more efficient
+	 * manner.
+	 */
+	error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
+	if (error)
+		return error;
+
+	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+	if (!error) {
+		struct address_space *mapping = old->mapping;
+		void (*freepage)(struct page *);
+
+		pgoff_t offset = old->index;
+		freepage = mapping->a_ops->freepage;
+
+		page_cache_get(new);
+		new->mapping = mapping;
+		new->index = offset;
+
+		spin_lock_irq(&mapping->tree_lock);
+		__remove_from_page_cache(old);
+		error = radix_tree_insert(&mapping->page_tree, offset, new);
+		BUG_ON(error);
+		mapping->nrpages++;
+		__inc_zone_page_state(new, NR_FILE_PAGES);
+		if (PageSwapBacked(new))
+			__inc_zone_page_state(new, NR_SHMEM);
+		spin_unlock_irq(&mapping->tree_lock);
+		radix_tree_preload_end();
+		if (freepage)
+			freepage(old);
+		page_cache_release(old);
+		mem_cgroup_end_migration(memcg, old, new, true);
+	} else {
+		mem_cgroup_end_migration(memcg, old, new, false);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(replace_page_cache_page);
+
+/**
  * add_to_page_cache_locked - add a locked page to the pagecache
  * @page:	page to add
  * @mapping:	the page's address_space
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da53a25..6ef5c53 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2883,7 +2883,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
  * page belongs to.
  */
 int mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr)
+	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
 {
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
@@ -2940,7 +2940,7 @@ int mem_cgroup_prepare_migration(struct page *page,
 		return 0;
 
 	*ptr = mem;
-	ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE);
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, ptr, false, PAGE_SIZE);
 	css_put(&mem->css);/* drop extra refcnt */
 	if (ret || *ptr == NULL) {
 		if (PageAnon(page)) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 352de555..8aacce3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -678,7 +678,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	}
 
 	/* charge against new page */
-	charge = mem_cgroup_prepare_migration(page, newpage, &mem);
+	charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
 	if (charge == -ENOMEM) {
 		rc = -ENOMEM;
 		goto unlock;
-- 
cgit v0.10.2


From 97cecb5a254fec22d28ef32235d888bfbfd7c783 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:30:53 -0700
Subject: mm: introduce delete_from_page_cache()

Presently we increase the page refcount in add_to_page_cache() but don't
decrease it in remove_from_page_cache().  Such asymmetry adds confusion,
requiring that callers notice it and a comment explaining why they release
a page reference.  It's not a good API.

A long time ago, Hugh tried it (http://lkml.org/lkml/2004/10/24/140) but
gave up because reiser4's drop_page() had to unlock the page between
removing it from page cache and doing the page_cache_release().  But now
the situation is changed.  I think at least things in current mainline
don't have any obstacles.  The problem is for out-of-mainline filesystems
- if they have done such things as reiser4, this patch could be a problem
but they will discover this at compile time since we remove
remove_from_page_cache().

This patch:

This function works as just wrapper remove_from_page_cache().  The
difference is that it decreases page references in itself.  So caller have
to make sure it has a page reference before calling.

This patch is ready for removing remove_from_page_cache().

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Edward Shishkin <edward.shishkin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26946ad..0aa258f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -455,6 +455,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
+extern void delete_from_page_cache(struct page *page);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
diff --git a/mm/filemap.c b/mm/filemap.c
index c1459f2..e7b5978 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -155,6 +155,22 @@ void remove_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(remove_from_page_cache);
 
+/**
+ * delete_from_page_cache - delete page from page cache
+ * @page: the page which the kernel is trying to remove from page cache
+ *
+ * This must be called only on pages that have
+ * been verified to be in the page cache and locked.
+ * It will never put the page into the free list,
+ * the caller has a reference on the page.
+ */
+void delete_from_page_cache(struct page *page)
+{
+	remove_from_page_cache(page);
+	page_cache_release(page);
+}
+EXPORT_SYMBOL(delete_from_page_cache);
+
 static int sync_page(void *word)
 {
 	struct address_space *mapping;
-- 
cgit v0.10.2


From bd65cb86c98a79bc61afd0d80166005f125e9064 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:30:54 -0700
Subject: mm: hugetlbfs: change remove_from_page_cache

This patch series changes remove_from_page_cache()'s page ref counting
rule.  Page cache ref count is decreased in delete_from_page_cache().  So
we don't need to decrease the page reference in callers.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: William Irwin <wli@holomorphy.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9885082..b9eeb1c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,8 +332,7 @@ static void truncate_huge_page(struct page *page)
 {
 	cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
 	ClearPageUptodate(page);
-	remove_from_page_cache(page);
-	put_page(page);
+	delete_from_page_cache(page);
 }
 
 static void truncate_hugepages(struct inode *inode, loff_t lstart)
-- 
cgit v0.10.2


From 4c73b1bc6bb14aab7888ebe6bffe957cf7c07fa0 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:40 -0700
Subject: mm: shmem: change remove_from_page_cache

This patch series changes remove_from_page_cache()'s page ref counting
rule.  Page cache ref count is decreased in delete_from_page_cache().  So
we don't need to decrease the page reference in callers.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index 048a95a..8859358 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1081,7 +1081,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	shmem_recalc_inode(inode);
 
 	if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
-		remove_from_page_cache(page);
+		delete_from_page_cache(page);
 		shmem_swp_set(info, entry, swap.val);
 		shmem_swp_unmap(entry);
 		if (list_empty(&info->swaplist))
@@ -1091,7 +1091,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		spin_unlock(&info->lock);
 		swap_shmem_alloc(swap);
 		BUG_ON(page_mapped(page));
-		page_cache_release(page);	/* pagecache ref */
 		swap_writepage(page, wbc);
 		if (inode) {
 			mutex_lock(&shmem_swaplist_mutex);
-- 
cgit v0.10.2


From 5adc7b518b54f7af2b8395d2035898340d96b1d5 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:41 -0700
Subject: mm: truncate: change remove_from_page_cache

This patch series changes remove_from_page_cache()'s page ref counting
rule.  Page cache ref count is decreased in delete_from_page_cache().  So
we don't need to decrease the page reference in callers.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/truncate.c b/mm/truncate.c
index d64296b..7c617b5 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -106,9 +106,8 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 	cancel_dirty_page(page, PAGE_CACHE_SIZE);
 
 	clear_page_mlock(page);
-	remove_from_page_cache(page);
 	ClearPageMappedToDisk(page);
-	page_cache_release(page);	/* pagecache ref */
+	delete_from_page_cache(page);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 702cfbf93aaf3a091b0c64c8766c1ade0a820c38 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:43 -0700
Subject: mm: goodbye remove_from_page_cache()

Now delete_from_page_cache() replaces remove_from_page_cache().  So we
remove remove_from_page_cache so fs or something out of mainline will
notice it when compile time and can fix it.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0aa258f..ea36ff2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -456,7 +456,6 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void delete_from_page_cache(struct page *page);
-extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index e7b5978..1cfb8fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -137,7 +137,15 @@ void __remove_from_page_cache(struct page *page)
 	}
 }
 
-void remove_from_page_cache(struct page *page)
+/**
+ * delete_from_page_cache - delete page from page cache
+ * @page: the page which the kernel is trying to remove from page cache
+ *
+ * This must be called only on pages that have been verified to be in the page
+ * cache and locked.  It will never put the page into the free list, the caller
+ * has a reference on the page.
+ */
+void delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	void (*freepage)(struct page *);
@@ -152,21 +160,6 @@ void remove_from_page_cache(struct page *page)
 
 	if (freepage)
 		freepage(page);
-}
-EXPORT_SYMBOL(remove_from_page_cache);
-
-/**
- * delete_from_page_cache - delete page from page cache
- * @page: the page which the kernel is trying to remove from page cache
- *
- * This must be called only on pages that have
- * been verified to be in the page cache and locked.
- * It will never put the page into the free list,
- * the caller has a reference on the page.
- */
-void delete_from_page_cache(struct page *page)
-{
-	remove_from_page_cache(page);
 	page_cache_release(page);
 }
 EXPORT_SYMBOL(delete_from_page_cache);
-- 
cgit v0.10.2


From e64a782fec684c29a8204c51b3cb554dce588592 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:44 -0700
Subject: mm: change __remove_from_page_cache()

Now we renamed remove_from_page_cache with delete_from_page_cache.  As
consistency of __remove_from_swap_cache and remove_from_swap_cache, we
change internal page cache handling function name, too.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ea36ff2..29ebba5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -456,7 +456,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
 extern void delete_from_page_cache(struct page *page);
-extern void __remove_from_page_cache(struct page *page);
+extern void __delete_from_page_cache(struct page *page);
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 1cfb8fd..cb476e7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -108,11 +108,11 @@
  */
 
 /*
- * Remove a page from the page cache and free it. Caller has to make
+ * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
  * is safe.  The caller must hold the mapping's tree_lock.
  */
-void __remove_from_page_cache(struct page *page)
+void __delete_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
@@ -154,7 +154,7 @@ void delete_from_page_cache(struct page *page)
 
 	freepage = mapping->a_ops->freepage;
 	spin_lock_irq(&mapping->tree_lock);
-	__remove_from_page_cache(page);
+	__delete_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
 	mem_cgroup_uncharge_cache_page(page);
 
@@ -444,7 +444,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->index = offset;
 
 		spin_lock_irq(&mapping->tree_lock);
-		__remove_from_page_cache(old);
+		__delete_from_page_cache(old);
 		error = radix_tree_insert(&mapping->page_tree, offset, new);
 		BUG_ON(error);
 		mapping->nrpages++;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 99ccb44..e0af336 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1130,7 +1130,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 
 	/*
 	 * Now take care of user space mappings.
-	 * Abort on fail: __remove_from_page_cache() assumes unmapped page.
+	 * Abort on fail: __delete_from_page_cache() assumes unmapped page.
 	 */
 	if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
 		printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
diff --git a/mm/truncate.c b/mm/truncate.c
index 7c617b5..3d2ae1f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -388,7 +388,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 
 	clear_page_mlock(page);
 	BUG_ON(page_has_private(page));
-	__remove_from_page_cache(page);
+	__delete_from_page_cache(page);
 	spin_unlock_irq(&mapping->tree_lock);
 	mem_cgroup_uncharge_cache_page(page);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3b4a41d..665b090 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -514,7 +514,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
 
 		freepage = mapping->a_ops->freepage;
 
-		__remove_from_page_cache(page);
+		__delete_from_page_cache(page);
 		spin_unlock_irq(&mapping->tree_lock);
 		mem_cgroup_uncharge_cache_page(page);
 
-- 
cgit v0.10.2


From 1d16871d8c96deadc5f9753b6b096074f2cbcbe1 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 22 Mar 2011 16:32:45 -0700
Subject: mm: batch-free pcp list if possible

free_pcppages_bulk() frees pages from pcp lists in a round-robin fashion
by keeping batch_free counter.  But it doesn't need to spin if there is
only one non-empty list.  This can be checked by batch_free ==
MIGRATE_PCPTYPES.

[akpm@linux-foundation.org: fix comment]
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 36a168e..426056a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -614,6 +614,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			list = &pcp->lists[migratetype];
 		} while (list_empty(list));
 
+		/* This is the only non-empty list. Free them all. */
+		if (batch_free == MIGRATE_PCPTYPES)
+			batch_free = to_free;
+
 		do {
 			page = list_entry(list->prev, struct page, lru);
 			/* must delete as __free_one_page list manipulates */
-- 
cgit v0.10.2


From 7bc32f6f90dae67730645da67bfd44304f810f93 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 22 Mar 2011 16:32:46 -0700
Subject: mm: debug-pagealloc: fix kconfig dependency warning

Fix kconfig dependency warning to satisfy dependencies:

warning: (PAGE_POISONING) selects DEBUG_PAGEALLOC which has unmet
direct dependencies (DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC &&
(!HIBERNATION || !PPC && !SPARC) && !KMEMCHECK)

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index af7cfb4..8b1a477 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -1,27 +1,24 @@
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
-	depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC
-	depends on !HIBERNATION || !PPC && !SPARC
+	depends on DEBUG_KERNEL
+	depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
 	depends on !KMEMCHECK
+	select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	---help---
 	  Unmap pages from the kernel linear mapping after free_pages().
 	  This results in a large slowdown, but helps to find certain types
 	  of memory corruption.
 
+	  For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC,
+	  fill the pages with poison patterns after free_pages() and verify
+	  the patterns before alloc_pages().  Additionally,
+	  this option cannot be enabled in combination with hibernation as
+	  that would result in incorrect warnings of memory corruption after
+	  a resume because free pages are not saved to the suspend image.
+
 config WANT_PAGE_DEBUG_FLAGS
 	bool
 
 config PAGE_POISONING
-	bool "Debug page memory allocations"
-	depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC
-	depends on !HIBERNATION
-	select DEBUG_PAGEALLOC
+	bool
 	select WANT_PAGE_DEBUG_FLAGS
-	---help---
-	   Fill the pages with poison patterns after free_pages() and verify
-	   the patterns before alloc_pages(). This results in a large slowdown,
-	   but helps to find certain types of memory corruption.
-
-	   This option cannot be enabled in combination with hibernation as
-	   that would result in incorrect warnings of memory corruption after
-	   a resume because free pages are not saved to the suspend image.
-- 
cgit v0.10.2


From 9e60109f125013b6c571f399a15a8b0fe1ffa4e6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 22 Mar 2011 16:32:46 -0700
Subject: mm: rename drop_anon_vma() to put_anon_vma()

The normal code pattern used in the kernel is: get/put.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index e9fd04c..b9b23dd 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -87,7 +87,7 @@ static inline void get_anon_vma(struct anon_vma *anon_vma)
 	atomic_inc(&anon_vma->external_refcount);
 }
 
-void drop_anon_vma(struct anon_vma *);
+void put_anon_vma(struct anon_vma *);
 #else
 static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
 {
@@ -102,7 +102,7 @@ static inline void get_anon_vma(struct anon_vma *anon_vma)
 {
 }
 
-static inline void drop_anon_vma(struct anon_vma *anon_vma)
+static inline void put_anon_vma(struct anon_vma *anon_vma)
 {
 }
 #endif /* CONFIG_KSM */
diff --git a/mm/ksm.c b/mm/ksm.c
index c2b2a94..1bbe785 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -301,20 +301,6 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 	return rmap_item->address & STABLE_FLAG;
 }
 
-static void hold_anon_vma(struct rmap_item *rmap_item,
-			  struct anon_vma *anon_vma)
-{
-	rmap_item->anon_vma = anon_vma;
-	get_anon_vma(anon_vma);
-}
-
-static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
-{
-	struct anon_vma *anon_vma = rmap_item->anon_vma;
-
-	drop_anon_vma(anon_vma);
-}
-
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -397,7 +383,7 @@ static void break_cow(struct rmap_item *rmap_item)
 	 * It is not an accident that whenever we want to break COW
 	 * to undo, we also need to drop a reference to the anon_vma.
 	 */
-	ksm_drop_anon_vma(rmap_item);
+	put_anon_vma(rmap_item->anon_vma);
 
 	down_read(&mm->mmap_sem);
 	if (ksm_test_exit(mm))
@@ -466,7 +452,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
 			ksm_pages_sharing--;
 		else
 			ksm_pages_shared--;
-		ksm_drop_anon_vma(rmap_item);
+		put_anon_vma(rmap_item->anon_vma);
 		rmap_item->address &= PAGE_MASK;
 		cond_resched();
 	}
@@ -554,7 +540,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		else
 			ksm_pages_shared--;
 
-		ksm_drop_anon_vma(rmap_item);
+		put_anon_vma(rmap_item->anon_vma);
 		rmap_item->address &= PAGE_MASK;
 
 	} else if (rmap_item->address & UNSTABLE_FLAG) {
@@ -949,7 +935,8 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
 		goto out;
 
 	/* Must get reference to anon_vma while still holding mmap_sem */
-	hold_anon_vma(rmap_item, vma->anon_vma);
+	rmap_item->anon_vma = vma->anon_vma;
+	get_anon_vma(vma->anon_vma);
 out:
 	up_read(&mm->mmap_sem);
 	return err;
diff --git a/mm/migrate.c b/mm/migrate.c
index 8aacce3..7d2983f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -764,7 +764,7 @@ skip_unmap:
 
 	/* Drop an anon_vma reference if we took one */
 	if (anon_vma)
-		drop_anon_vma(anon_vma);
+		put_anon_vma(anon_vma);
 
 uncharge:
 	if (!charge)
@@ -856,7 +856,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 		remove_migration_ptes(hpage, hpage);
 
 	if (anon_vma)
-		drop_anon_vma(anon_vma);
+		put_anon_vma(anon_vma);
 out:
 	unlock_page(hpage);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 941bf82..ad416af 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -278,7 +278,7 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
 	if (empty) {
 		/* We no longer need the root anon_vma */
 		if (anon_vma->root != anon_vma)
-			drop_anon_vma(anon_vma->root);
+			put_anon_vma(anon_vma->root);
 		anon_vma_free(anon_vma);
 	}
 }
@@ -1493,7 +1493,7 @@ int try_to_munlock(struct page *page)
  * we know we are the last user, nobody else can get a reference and we
  * can do the freeing without the lock.
  */
-void drop_anon_vma(struct anon_vma *anon_vma)
+void put_anon_vma(struct anon_vma *anon_vma)
 {
 	BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
 	if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
-- 
cgit v0.10.2


From 83813267c699ab11cc65a6d9d0f42db42f0862b3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 22 Mar 2011 16:32:48 -0700
Subject: mm: move anon_vma ref out from under CONFIG_foo

We need the anon_vma refcount unconditionally to simplify the anon_vma
lifetime rules.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b9b23dd..fd56111 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -27,18 +27,15 @@
 struct anon_vma {
 	struct anon_vma *root;	/* Root of this anon_vma tree */
 	spinlock_t lock;	/* Serialize access to vma list */
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-
 	/*
-	 * The external_refcount is taken by either KSM or page migration
-	 * to take a reference to an anon_vma when there is no
+	 * The refcount is taken on an anon_vma when there is no
 	 * guarantee that the vma of page tables will exist for
 	 * the duration of the operation. A caller that takes
 	 * the reference is responsible for clearing up the
 	 * anon_vma if they are the last user on release
 	 */
-	atomic_t external_refcount;
-#endif
+	atomic_t refcount;
+
 	/*
 	 * NOTE: the LSB of the head.next is set by
 	 * mm_take_all_locks() _after_ taking the above lock. So the
@@ -71,41 +68,12 @@ struct anon_vma_chain {
 };
 
 #ifdef CONFIG_MMU
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
-	atomic_set(&anon_vma->external_refcount, 0);
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
-	return atomic_read(&anon_vma->external_refcount);
-}
-
 static inline void get_anon_vma(struct anon_vma *anon_vma)
 {
-	atomic_inc(&anon_vma->external_refcount);
+	atomic_inc(&anon_vma->refcount);
 }
 
 void put_anon_vma(struct anon_vma *);
-#else
-static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
-{
-}
-
-static inline int anonvma_external_refcount(struct anon_vma *anon_vma)
-{
-	return 0;
-}
-
-static inline void get_anon_vma(struct anon_vma *anon_vma)
-{
-}
-
-static inline void put_anon_vma(struct anon_vma *anon_vma)
-{
-}
-#endif /* CONFIG_KSM */
 
 static inline struct anon_vma *page_anon_vma(struct page *page)
 {
diff --git a/mm/rmap.c b/mm/rmap.c
index ad416af..873cd9e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -272,7 +272,7 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
 	list_del(&anon_vma_chain->same_anon_vma);
 
 	/* We must garbage collect the anon_vma if it's empty */
-	empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
+	empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
 	anon_vma_unlock(anon_vma);
 
 	if (empty) {
@@ -303,7 +303,7 @@ static void anon_vma_ctor(void *data)
 	struct anon_vma *anon_vma = data;
 
 	spin_lock_init(&anon_vma->lock);
-	anonvma_external_refcount_init(anon_vma);
+	atomic_set(&anon_vma->refcount, 0);
 	INIT_LIST_HEAD(&anon_vma->head);
 }
 
@@ -1486,7 +1486,6 @@ int try_to_munlock(struct page *page)
 		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
-#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
 /*
  * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
  * if necessary.  Be careful to do all the tests under the lock.  Once
@@ -1495,8 +1494,8 @@ int try_to_munlock(struct page *page)
  */
 void put_anon_vma(struct anon_vma *anon_vma)
 {
-	BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0);
-	if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+	BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
+	if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
 		struct anon_vma *root = anon_vma->root;
 		int empty = list_empty(&anon_vma->head);
 		int last_root_user = 0;
@@ -1507,8 +1506,8 @@ void put_anon_vma(struct anon_vma *anon_vma)
 		 * the refcount on the root and check if we need to free it.
 		 */
 		if (empty && anon_vma != root) {
-			BUG_ON(atomic_read(&root->external_refcount) <= 0);
-			last_root_user = atomic_dec_and_test(&root->external_refcount);
+			BUG_ON(atomic_read(&root->refcount) <= 0);
+			last_root_user = atomic_dec_and_test(&root->refcount);
 			root_empty = list_empty(&root->head);
 		}
 		anon_vma_unlock(anon_vma);
@@ -1520,7 +1519,6 @@ void put_anon_vma(struct anon_vma *anon_vma)
 		}
 	}
 }
-#endif
 
 #ifdef CONFIG_MIGRATION
 /*
-- 
cgit v0.10.2


From 01d8b20dec5f4019283e244aba50ba86fe6ead6e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 22 Mar 2011 16:32:49 -0700
Subject: mm: simplify anon_vma refcounts

This patch changes the anon_vma refcount to be 0 when the object is free.
It does this by adding 1 ref to being in use in the anon_vma structure
(iow.  the anon_vma->head list is not empty).

This allows a simpler release scheme without having to check both the
refcount and the list as well as avoids taking a ref for each entry on the
list.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index fd56111..830e65d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -73,7 +73,13 @@ static inline void get_anon_vma(struct anon_vma *anon_vma)
 	atomic_inc(&anon_vma->refcount);
 }
 
-void put_anon_vma(struct anon_vma *);
+void __put_anon_vma(struct anon_vma *anon_vma);
+
+static inline void put_anon_vma(struct anon_vma *anon_vma)
+{
+	if (atomic_dec_and_test(&anon_vma->refcount))
+		__put_anon_vma(anon_vma);
+}
 
 static inline struct anon_vma *page_anon_vma(struct page *page)
 {
@@ -116,7 +122,6 @@ void unlink_anon_vmas(struct vm_area_struct *);
 int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
 int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 void __anon_vma_link(struct vm_area_struct *);
-void anon_vma_free(struct anon_vma *);
 
 static inline void anon_vma_merge(struct vm_area_struct *vma,
 				  struct vm_area_struct *next)
@@ -125,6 +130,8 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,
 	unlink_anon_vmas(next);
 }
 
+struct anon_vma *page_get_anon_vma(struct page *page);
+
 /*
  * rmap interfaces called when adding or removing pte of page
  */
diff --git a/mm/rmap.c b/mm/rmap.c
index 873cd9e..4a8e99a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -67,11 +67,24 @@ static struct kmem_cache *anon_vma_chain_cachep;
 
 static inline struct anon_vma *anon_vma_alloc(void)
 {
-	return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+	struct anon_vma *anon_vma;
+
+	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
+	if (anon_vma) {
+		atomic_set(&anon_vma->refcount, 1);
+		/*
+		 * Initialise the anon_vma root to point to itself. If called
+		 * from fork, the root will be reset to the parents anon_vma.
+		 */
+		anon_vma->root = anon_vma;
+	}
+
+	return anon_vma;
 }
 
-void anon_vma_free(struct anon_vma *anon_vma)
+static inline void anon_vma_free(struct anon_vma *anon_vma)
 {
+	VM_BUG_ON(atomic_read(&anon_vma->refcount));
 	kmem_cache_free(anon_vma_cachep, anon_vma);
 }
 
@@ -133,11 +146,6 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 			if (unlikely(!anon_vma))
 				goto out_enomem_free_avc;
 			allocated = anon_vma;
-			/*
-			 * This VMA had no anon_vma yet.  This anon_vma is
-			 * the root of any anon_vma tree that might form.
-			 */
-			anon_vma->root = anon_vma;
 		}
 
 		anon_vma_lock(anon_vma);
@@ -156,7 +164,7 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		anon_vma_unlock(anon_vma);
 
 		if (unlikely(allocated))
-			anon_vma_free(allocated);
+			put_anon_vma(allocated);
 		if (unlikely(avc))
 			anon_vma_chain_free(avc);
 	}
@@ -241,9 +249,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	 */
 	anon_vma->root = pvma->anon_vma->root;
 	/*
-	 * With KSM refcounts, an anon_vma can stay around longer than the
-	 * process it belongs to.  The root anon_vma needs to be pinned
-	 * until this anon_vma is freed, because the lock lives in the root.
+	 * With refcounts, an anon_vma can stay around longer than the
+	 * process it belongs to. The root anon_vma needs to be pinned until
+	 * this anon_vma is freed, because the lock lives in the root.
 	 */
 	get_anon_vma(anon_vma->root);
 	/* Mark this anon_vma as the one where our new (COWed) pages go. */
@@ -253,7 +261,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	return 0;
 
  out_error_free_anon_vma:
-	anon_vma_free(anon_vma);
+	put_anon_vma(anon_vma);
  out_error:
 	unlink_anon_vmas(vma);
 	return -ENOMEM;
@@ -272,15 +280,11 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
 	list_del(&anon_vma_chain->same_anon_vma);
 
 	/* We must garbage collect the anon_vma if it's empty */
-	empty = list_empty(&anon_vma->head) && !atomic_read(&anon_vma->refcount);
+	empty = list_empty(&anon_vma->head);
 	anon_vma_unlock(anon_vma);
 
-	if (empty) {
-		/* We no longer need the root anon_vma */
-		if (anon_vma->root != anon_vma)
-			put_anon_vma(anon_vma->root);
-		anon_vma_free(anon_vma);
-	}
+	if (empty)
+		put_anon_vma(anon_vma);
 }
 
 void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1486,38 +1490,14 @@ int try_to_munlock(struct page *page)
 		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
-/*
- * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
- * if necessary.  Be careful to do all the tests under the lock.  Once
- * we know we are the last user, nobody else can get a reference and we
- * can do the freeing without the lock.
- */
-void put_anon_vma(struct anon_vma *anon_vma)
+void __put_anon_vma(struct anon_vma *anon_vma)
 {
-	BUG_ON(atomic_read(&anon_vma->refcount) <= 0);
-	if (atomic_dec_and_lock(&anon_vma->refcount, &anon_vma->root->lock)) {
-		struct anon_vma *root = anon_vma->root;
-		int empty = list_empty(&anon_vma->head);
-		int last_root_user = 0;
-		int root_empty = 0;
+	struct anon_vma *root = anon_vma->root;
 
-		/*
-		 * The refcount on a non-root anon_vma got dropped.  Drop
-		 * the refcount on the root and check if we need to free it.
-		 */
-		if (empty && anon_vma != root) {
-			BUG_ON(atomic_read(&root->refcount) <= 0);
-			last_root_user = atomic_dec_and_test(&root->refcount);
-			root_empty = list_empty(&root->head);
-		}
-		anon_vma_unlock(anon_vma);
+	if (root != anon_vma && atomic_dec_and_test(&root->refcount))
+		anon_vma_free(root);
 
-		if (empty) {
-			anon_vma_free(anon_vma);
-			if (root_empty && last_root_user)
-				anon_vma_free(root);
-		}
-	}
+	anon_vma_free(anon_vma);
 }
 
 #ifdef CONFIG_MIGRATION
-- 
cgit v0.10.2


From cb240452bfc2ae9de7c840dd0fb3f5b33ce03c31 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Tue, 22 Mar 2011 16:32:49 -0700
Subject: mm: remove unused TestSetPageLocked() interface

TestSetPageLocked() isn't being used anywhere.  Also, using it would
likely be an error, since the proper interface trylock_page() provides
stronger ordering guarantees.

Signed-off-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0db8037..811183d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -196,7 +196,7 @@ static inline int __TestClearPage##uname(struct page *page) { return 0; }
 
 struct page;	/* forward declaration */
 
-TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
+TESTPAGEFLAG(Locked, locked)
 PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
-- 
cgit v0.10.2


From 481b4bb5e370aa69c1dc276bd08871ec01b41d2a Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 22 Mar 2011 16:32:50 -0700
Subject: mm: mm_struct: remove 16 bytes of alignment padding on 64 bit builds

Reorder mm_struct to remove 16 bytes of alignment padding on 64 bit
builds.  On my config this shrinks mm_struct by enough to fit in one
fewer cache lines and allows more objects per slab in mm_struct
kmem_cache under SLUB.

slabinfo before patch :-
    Sizes (bytes)     Slabs
    --------------------------------
    Object :     848  Total  :       9
    SlabObj:     896  Full   :       2
    SlabSiz:   16384  Partial:       5
    Loss   :      48  CpuSlab:       2
    Align  :      64  Objects:      18

 slabinfo after :-
    Sizes (bytes)     Slabs
    --------------------------------
    Object :     832  Total  :       7
    SlabObj:     832  Full   :       2
    SlabSiz:   16384  Partial:       3
    Loss   :       0  CpuSlab:       2
    Align  :      64  Objects:      19

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26bc4e2..02aa561 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -237,8 +237,9 @@ struct mm_struct {
 	atomic_t mm_users;			/* How many users with user space? */
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
-	struct rw_semaphore mmap_sem;
+
 	spinlock_t page_table_lock;		/* Protects page tables and some counters */
+	struct rw_semaphore mmap_sem;
 
 	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
 						 * together off init_mm.mmlist, and are protected
@@ -281,6 +282,9 @@ struct mm_struct {
 	unsigned int token_priority;
 	unsigned int last_interval;
 
+	/* How many tasks sharing this mm are OOM_DISABLE */
+	atomic_t oom_disable_count;
+
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
@@ -313,8 +317,6 @@ struct mm_struct {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
-	/* How many tasks sharing this mm are OOM_DISABLE */
-	atomic_t oom_disable_count;
 };
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
-- 
cgit v0.10.2


From 315601809d124d046abd6c3ffa346d0dbd7aa29d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:52 -0700
Subject: mm: deactivate invalidated pages

Recently, there are reported problem about thrashing.
(http://marc.info/?l=rsync&m=128885034930933&w=2) It happens by backup
workloads(ex, nightly rsync).  That's because the workload makes just
use-once pages and touches pages twice.  It promotes the page into active
list so that it results in working set page eviction.

Some app developer want to support POSIX_FADV_NOREUSE.  But other OSes
don't support it, either.
(http://marc.info/?l=linux-mm&m=128928979512086&w=2)

By other approach, app developers use POSIX_FADV_DONTNEED.  But it has a
problem.  If kernel meets page is writing during invalidate_mapping_pages,
it can't work.  It makes for application programmer to use it since they
always have to sync data before calling fadivse(..POSIX_FADV_DONTNEED) to
make sure the pages could be discardable.  At last, they can't use
deferred write of kernel so that they could see performance loss.
(http://insights.oetiker.ch/linux/fadvise.html)

In fact, invalidation is very big hint to reclaimer.  It means we don't
use the page any more.  So let's move the writing page into inactive
list's head if we can't truncate it right now.

Why I move page to head of lru on this patch, Dirty/Writeback page would
be flushed sooner or later.  It can prevent writeout of pageout which is
less effective than flusher's writeout.

Originally, I reused lru_demote of Peter with some change so added his
Signed-off-by.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Reported-by: Ben Gamari <bgamari.foss@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d55932..c335055 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -215,6 +215,7 @@ extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern int lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
+extern void deactivate_page(struct page *page);
 extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index c02f936..4aea806 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -39,6 +39,7 @@ int page_cluster;
 
 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -347,6 +348,60 @@ void add_page_to_unevictable_list(struct page *page)
 }
 
 /*
+ * If the page can not be invalidated, it is moved to the
+ * inactive list to speed up its reclaim.  It is moved to the
+ * head of the list, rather than the tail, to give the flusher
+ * threads some time to write it out, as this is much more
+ * effective than the single-page writeout from reclaim.
+ */
+static void lru_deactivate(struct page *page, struct zone *zone)
+{
+	int lru, file;
+
+	if (!PageLRU(page) || !PageActive(page))
+		return;
+
+	/* Some processes are using the page */
+	if (page_mapped(page))
+		return;
+
+	file = page_is_file_cache(page);
+	lru = page_lru_base_type(page);
+	del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
+	ClearPageActive(page);
+	ClearPageReferenced(page);
+	add_page_to_lru_list(zone, page, lru);
+	__count_vm_event(PGDEACTIVATE);
+
+	update_page_reclaim_stat(zone, page, file, 0);
+}
+
+static void ____pagevec_lru_deactivate(struct pagevec *pvec)
+{
+	int i;
+	struct zone *zone = NULL;
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		struct zone *pagezone = page_zone(page);
+
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
+		lru_deactivate(page, zone);
+	}
+	if (zone)
+		spin_unlock_irq(&zone->lru_lock);
+
+	release_pages(pvec->pages, pvec->nr, pvec->cold);
+	pagevec_reinit(pvec);
+}
+
+
+/*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
  * disabled; or "cpu" is being hot-unplugged, and is already dead.
@@ -372,6 +427,29 @@ static void drain_cpu_pagevecs(int cpu)
 		pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
+
+	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+	if (pagevec_count(pvec))
+		____pagevec_lru_deactivate(pvec);
+}
+
+/**
+ * deactivate_page - forcefully deactivate a page
+ * @page: page to deactivate
+ *
+ * This function hints the VM that @page is a good reclaim candidate,
+ * for example if its invalidation fails due to the page being dirty
+ * or under writeback.
+ */
+void deactivate_page(struct page *page)
+{
+	if (likely(get_page_unless_zero(page))) {
+		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+		if (!pagevec_add(pvec, page))
+			____pagevec_lru_deactivate(pvec);
+		put_cpu_var(lru_deactivate_pvecs);
+	}
 }
 
 void lru_add_drain(void)
diff --git a/mm/truncate.c b/mm/truncate.c
index 3d2ae1f..a956675 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -321,11 +321,12 @@ EXPORT_SYMBOL(truncate_inode_pages);
  * pagetables.
  */
 unsigned long invalidate_mapping_pages(struct address_space *mapping,
-				       pgoff_t start, pgoff_t end)
+		pgoff_t start, pgoff_t end)
 {
 	struct pagevec pvec;
 	pgoff_t next = start;
-	unsigned long ret = 0;
+	unsigned long ret;
+	unsigned long count = 0;
 	int i;
 
 	pagevec_init(&pvec, 0);
@@ -352,9 +353,15 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (lock_failed)
 				continue;
 
-			ret += invalidate_inode_page(page);
-
+			ret = invalidate_inode_page(page);
 			unlock_page(page);
+			/*
+			 * Invalidation is a hint that the page is no longer
+			 * of interest and try to speed up its reclaim.
+			 */
+			if (!ret)
+				deactivate_page(page);
+			count += ret;
 			if (next > end)
 				break;
 		}
@@ -362,7 +369,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 		mem_cgroup_uncharge_end();
 		cond_resched();
 	}
-	return ret;
+	return count;
 }
 EXPORT_SYMBOL(invalidate_mapping_pages);
 
-- 
cgit v0.10.2


From 3f58a82943337fb6e79acfa5346719a97d3c0b98 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:53 -0700
Subject: memcg: move memcg reclaimable page into tail of inactive list

The rotate_reclaimable_page function moves just written out pages, which
the VM wanted to reclaim, to the end of the inactive list.  That way the
VM will find those pages first next time it needs to free memory.

This patch applies the rule in memcg.  It can help to prevent unnecessary
working page eviction of memcg.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a1a1e53..5bb7be2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -62,6 +62,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
 extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
+extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
 extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
 extern void mem_cgroup_del_lru(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page,
@@ -211,6 +212,11 @@ static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
 	return ;
 }
 
+static inline inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+{
+	return ;
+}
+
 static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
 {
 	return ;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ef5c53..9e0f05e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -829,6 +829,32 @@ void mem_cgroup_del_lru(struct page *page)
 	mem_cgroup_del_lru_list(page, page_lru(page));
 }
 
+/*
+ * Writeback is about to end against a page which has been marked for immediate
+ * reclaim.  If it still appears to be reclaimable, move it to the tail of the
+ * inactive list.
+ */
+void mem_cgroup_rotate_reclaimable_page(struct page *page)
+{
+	struct mem_cgroup_per_zone *mz;
+	struct page_cgroup *pc;
+	enum lru_list lru = page_lru(page);
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(page);
+	/* unused or root page is not rotated. */
+	if (!PageCgroupUsed(pc))
+		return;
+	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+	smp_rmb();
+	if (mem_cgroup_is_root(pc->mem_cgroup))
+		return;
+	mz = page_cgroup_zoneinfo(pc);
+	list_move_tail(&pc->lru, &mz->lists[lru]);
+}
+
 void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz;
diff --git a/mm/swap.c b/mm/swap.c
index 4aea806..1b9e4eb 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -200,8 +200,9 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_lru_base_type(page);
+			enum lru_list lru = page_lru_base_type(page);
 			list_move_tail(&page->lru, &zone->lru[lru].list);
+			mem_cgroup_rotate_reclaimable_page(page);
 			pgmoved++;
 		}
 	}
-- 
cgit v0.10.2


From 278df9f451dc71dcd002246be48358a473504ad0 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 22 Mar 2011 16:32:54 -0700
Subject: mm: reclaim invalidated page ASAP

invalidate_mapping_pages is very big hint to reclaimer.  It means user
doesn't want to use the page any more.  So in order to prevent working set
page eviction, this patch move the page into tail of inactive list by
PG_reclaim.

Please, remember that pages in inactive list are working set as well as
active list.  If we don't move pages into inactive list's tail, pages near
by tail of inactive list can be evicted although we have a big clue about
useless pages.  It's totally bad.

Now PG_readahead/PG_reclaim is shared.  fe3cba17 added ClearPageReclaim
into clear_page_dirty_for_io for preventing fast reclaiming readahead
marker page.

In this series, PG_reclaim is used by invalidated page, too.  If VM find
the page is invalidated and it's dirty, it sets PG_reclaim to reclaim
asap.  Then, when the dirty page will be writeback,
clear_page_dirty_for_io will clear PG_reclaim unconditionally.  It
disturbs this serie's goal.

I think it's okay to clear PG_readahead when the page is dirty, not
writeback time.  So this patch moves ClearPageReadahead.  In v4,
ClearPageReadahead in set_page_dirty has a problem which is reported by
Steven Barrett.  It's due to compound page.  Some driver(ex, audio) calls
set_page_dirty with compound page which isn't on LRU.  but my patch does
ClearPageRelcaim on compound page.  In non-CONFIG_PAGEFLAGS_EXTENDED, it
breaks PageTail flag.

I think it doesn't affect THP and pass my test with THP enabling but Cced
Andrea for double check.

Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Reported-by: Steven Barrett <damentz@liquorix.net>
Reviewed-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2cb01f6..b437fe6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1211,6 +1211,17 @@ int set_page_dirty(struct page *page)
 
 	if (likely(mapping)) {
 		int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
+		/*
+		 * readahead/lru_deactivate_page could remain
+		 * PG_readahead/PG_reclaim due to race with end_page_writeback
+		 * About readahead, if the page is written, the flags would be
+		 * reset. So no problem.
+		 * About lru_deactivate_page, if the page is redirty, the flag
+		 * will be reset. So no problem. but if the page is used by readahead
+		 * it will confuse readahead and make it restart the size rampup
+		 * process. But it's a trivial problem.
+		 */
+		ClearPageReclaim(page);
 #ifdef CONFIG_BLOCK
 		if (!spd)
 			spd = __set_page_dirty_buffers;
@@ -1266,7 +1277,6 @@ int clear_page_dirty_for_io(struct page *page)
 
 	BUG_ON(!PageLocked(page));
 
-	ClearPageReclaim(page);
 	if (mapping && mapping_cap_account_dirty(mapping)) {
 		/*
 		 * Yes, Virginia, this is indeed insane.
diff --git a/mm/swap.c b/mm/swap.c
index 1b9e4eb..0a33714 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -354,26 +354,61 @@ void add_page_to_unevictable_list(struct page *page)
  * head of the list, rather than the tail, to give the flusher
  * threads some time to write it out, as this is much more
  * effective than the single-page writeout from reclaim.
+ *
+ * If the page isn't page_mapped and dirty/writeback, the page
+ * could reclaim asap using PG_reclaim.
+ *
+ * 1. active, mapped page -> none
+ * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
+ * 3. inactive, mapped page -> none
+ * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
+ * 5. inactive, clean -> inactive, tail
+ * 6. Others -> none
+ *
+ * In 4, why it moves inactive's head, the VM expects the page would
+ * be write it out by flusher threads as this is much more effective
+ * than the single-page writeout from reclaim.
  */
 static void lru_deactivate(struct page *page, struct zone *zone)
 {
 	int lru, file;
+	bool active;
 
-	if (!PageLRU(page) || !PageActive(page))
+	if (!PageLRU(page))
 		return;
 
 	/* Some processes are using the page */
 	if (page_mapped(page))
 		return;
 
+	active = PageActive(page);
+
 	file = page_is_file_cache(page);
 	lru = page_lru_base_type(page);
-	del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
+	del_page_from_lru_list(zone, page, lru + active);
 	ClearPageActive(page);
 	ClearPageReferenced(page);
 	add_page_to_lru_list(zone, page, lru);
-	__count_vm_event(PGDEACTIVATE);
 
+	if (PageWriteback(page) || PageDirty(page)) {
+		/*
+		 * PG_reclaim could be raced with end_page_writeback
+		 * It can make readahead confusing.  But race window
+		 * is _really_ small and  it's non-critical problem.
+		 */
+		SetPageReclaim(page);
+	} else {
+		/*
+		 * The page's writeback ends up during pagevec
+		 * We moves tha page into tail of inactive.
+		 */
+		list_move_tail(&page->lru, &zone->lru[lru].list);
+		mem_cgroup_rotate_reclaimable_page(page);
+		__count_vm_event(PGROTATED);
+	}
+
+	if (active)
+		__count_vm_event(PGDEACTIVATE);
 	update_page_reclaim_stat(zone, page, file, 0);
 }
 
-- 
cgit v0.10.2


From 033193275b3ffcfe7f3fde7b569f3d207f6cd6a0 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:32:56 -0700
Subject: pagewalk: only split huge pages when necessary

Right now, if a mm_walk has either ->pte_entry or ->pmd_entry set, it will
unconditionally split any transparent huge pages it runs in to.  In
practice, that means that anyone doing a

	cat /proc/$pid/smaps

will unconditionally break down every huge page in the process and depend
on khugepaged to re-collapse it later.  This is fairly suboptimal.

This patch changes that behavior.  It teaches each ->pmd_entry handler
(there are five) that they must break down the THPs themselves.  Also, the
_generic_ code will never break down a THP unless a ->pte_entry handler is
actually set.

This means that the ->pmd_entry handlers can now choose to deal with THPs
without breaking them down.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Cc: Michael J Wolf <mjwolf@us.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 60b9148..78fd362 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -343,6 +343,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	struct page *page;
 	int mapcount;
 
+	split_huge_page_pmd(walk->mm, pmd);
+
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
@@ -467,6 +469,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	spinlock_t *ptl;
 	struct page *page;
 
+	split_huge_page_pmd(walk->mm, pmd);
+
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
@@ -623,6 +627,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	pte_t *pte;
 	int err = 0;
 
+	split_huge_page_pmd(walk->mm, pmd);
+
 	/* find the first VMA at or above 'addr' */
 	vma = find_vma(walk->mm, addr);
 	for (; addr != end; addr += PAGE_SIZE) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 901435e..294104e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -914,6 +914,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
  * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
  * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
  * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ *	       this handler is required to be able to handle
+ *	       pmd_trans_huge() pmds.  They may simply choose to
+ *	       split_huge_page() instead of handling it explicitly.
  * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
  * @pte_hole: if set, called for each hole at all levels
  * @hugetlb_entry: if set, called for each hugetlb entry
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9e0f05e..e1ee6ad 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4763,7 +4763,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	VM_BUG_ON(pmd_trans_huge(*pmd));
+	split_huge_page_pmd(walk->mm, pmd);
+
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
 		if (is_target_pte_for_mc(vma, addr, *pte, NULL))
@@ -4925,8 +4926,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	pte_t *pte;
 	spinlock_t *ptl;
 
+	split_huge_page_pmd(walk->mm, pmd);
 retry:
-	VM_BUG_ON(pmd_trans_huge(*pmd));
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; addr += PAGE_SIZE) {
 		pte_t ptent = *(pte++);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 7cfa6ae..c3450d5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -33,19 +33,35 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 
 	pmd = pmd_offset(pud, addr);
 	do {
+again:
 		next = pmd_addr_end(addr, end);
-		split_huge_page_pmd(walk->mm, pmd);
-		if (pmd_none_or_clear_bad(pmd)) {
+		if (pmd_none(*pmd)) {
 			if (walk->pte_hole)
 				err = walk->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
+		/*
+		 * This implies that each ->pmd_entry() handler
+		 * needs to know about pmd_trans_huge() pmds
+		 */
 		if (walk->pmd_entry)
 			err = walk->pmd_entry(pmd, addr, next, walk);
-		if (!err && walk->pte_entry)
-			err = walk_pte_range(pmd, addr, next, walk);
+		if (err)
+			break;
+
+		/*
+		 * Check this here so we only break down trans_huge
+		 * pages when we _need_ to
+		 */
+		if (!walk->pte_entry)
+			continue;
+
+		split_huge_page_pmd(walk->mm, pmd);
+		if (pmd_none_or_clear_bad(pmd))
+			goto again;
+		err = walk_pte_range(pmd, addr, next, walk);
 		if (err)
 			break;
 	} while (pmd++, addr = next, addr != end);
-- 
cgit v0.10.2


From ae11c4d9f646064cf086e2f8cd4b3c475df7739c Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:32:58 -0700
Subject: smaps: break out smaps_pte_entry() from smaps_pte_range()

We will use smaps_pte_entry() in a moment to handle both small and
transparent large pages.  But, we must break it out of smaps_pte_range()
first.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Cc: Michael J Wolf <mjwolf@us.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 78fd362..5cd06fa 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -333,56 +333,63 @@ struct mem_size_stats {
 	u64 pss;
 };
 
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
-			   struct mm_walk *walk)
+
+static void smaps_pte_entry(pte_t ptent, unsigned long addr,
+		struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
-	pte_t *pte, ptent;
-	spinlock_t *ptl;
 	struct page *page;
 	int mapcount;
 
-	split_huge_page_pmd(walk->mm, pmd);
-
-	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	for (; addr != end; pte++, addr += PAGE_SIZE) {
-		ptent = *pte;
+	if (is_swap_pte(ptent)) {
+		mss->swap += PAGE_SIZE;
+		return;
+	}
 
-		if (is_swap_pte(ptent)) {
-			mss->swap += PAGE_SIZE;
-			continue;
-		}
+	if (!pte_present(ptent))
+		return;
+
+	page = vm_normal_page(vma, addr, ptent);
+	if (!page)
+		return;
+
+	if (PageAnon(page))
+		mss->anonymous += PAGE_SIZE;
+
+	mss->resident += PAGE_SIZE;
+	/* Accumulate the size in pages that have been accessed. */
+	if (pte_young(ptent) || PageReferenced(page))
+		mss->referenced += PAGE_SIZE;
+	mapcount = page_mapcount(page);
+	if (mapcount >= 2) {
+		if (pte_dirty(ptent) || PageDirty(page))
+			mss->shared_dirty += PAGE_SIZE;
+		else
+			mss->shared_clean += PAGE_SIZE;
+		mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+	} else {
+		if (pte_dirty(ptent) || PageDirty(page))
+			mss->private_dirty += PAGE_SIZE;
+		else
+			mss->private_clean += PAGE_SIZE;
+		mss->pss += (PAGE_SIZE << PSS_SHIFT);
+	}
+}
 
-		if (!pte_present(ptent))
-			continue;
+static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			   struct mm_walk *walk)
+{
+	struct mem_size_stats *mss = walk->private;
+	struct vm_area_struct *vma = mss->vma;
+	pte_t *pte;
+	spinlock_t *ptl;
 
-		page = vm_normal_page(vma, addr, ptent);
-		if (!page)
-			continue;
+	split_huge_page_pmd(walk->mm, pmd);
 
-		if (PageAnon(page))
-			mss->anonymous += PAGE_SIZE;
-
-		mss->resident += PAGE_SIZE;
-		/* Accumulate the size in pages that have been accessed. */
-		if (pte_young(ptent) || PageReferenced(page))
-			mss->referenced += PAGE_SIZE;
-		mapcount = page_mapcount(page);
-		if (mapcount >= 2) {
-			if (pte_dirty(ptent) || PageDirty(page))
-				mss->shared_dirty += PAGE_SIZE;
-			else
-				mss->shared_clean += PAGE_SIZE;
-			mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
-		} else {
-			if (pte_dirty(ptent) || PageDirty(page))
-				mss->private_dirty += PAGE_SIZE;
-			else
-				mss->private_clean += PAGE_SIZE;
-			mss->pss += (PAGE_SIZE << PSS_SHIFT);
-		}
-	}
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE)
+		smaps_pte_entry(*pte, addr, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
 	return 0;
-- 
cgit v0.10.2


From 3c9acc7849b1eab7ffc75e933404c5f32865d9a2 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:32:59 -0700
Subject: smaps: pass pte size argument in to smaps_pte_entry()

Add an argument to the new smaps_pte_entry() function to let it account in
things other than PAGE_SIZE units.  I changed all of the PAGE_SIZE sites,
even though not all of them can be reached for transparent huge pages,
just so this will continue to work without changes as THPs are improved.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Cc: Michael J Wolf <mjwolf@us.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5cd06fa..d7e2af3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -335,7 +335,7 @@ struct mem_size_stats {
 
 
 static void smaps_pte_entry(pte_t ptent, unsigned long addr,
-		struct mm_walk *walk)
+		unsigned long ptent_size, struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
@@ -343,7 +343,7 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
 	int mapcount;
 
 	if (is_swap_pte(ptent)) {
-		mss->swap += PAGE_SIZE;
+		mss->swap += ptent_size;
 		return;
 	}
 
@@ -355,25 +355,25 @@ static void smaps_pte_entry(pte_t ptent, unsigned long addr,
 		return;
 
 	if (PageAnon(page))
-		mss->anonymous += PAGE_SIZE;
+		mss->anonymous += ptent_size;
 
-	mss->resident += PAGE_SIZE;
+	mss->resident += ptent_size;
 	/* Accumulate the size in pages that have been accessed. */
 	if (pte_young(ptent) || PageReferenced(page))
-		mss->referenced += PAGE_SIZE;
+		mss->referenced += ptent_size;
 	mapcount = page_mapcount(page);
 	if (mapcount >= 2) {
 		if (pte_dirty(ptent) || PageDirty(page))
-			mss->shared_dirty += PAGE_SIZE;
+			mss->shared_dirty += ptent_size;
 		else
-			mss->shared_clean += PAGE_SIZE;
-		mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+			mss->shared_clean += ptent_size;
+		mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
 	} else {
 		if (pte_dirty(ptent) || PageDirty(page))
-			mss->private_dirty += PAGE_SIZE;
+			mss->private_dirty += ptent_size;
 		else
-			mss->private_clean += PAGE_SIZE;
-		mss->pss += (PAGE_SIZE << PSS_SHIFT);
+			mss->private_clean += ptent_size;
+		mss->pss += (ptent_size << PSS_SHIFT);
 	}
 }
 
@@ -389,7 +389,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
-		smaps_pte_entry(*pte, addr, walk);
+		smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
 	return 0;
-- 
cgit v0.10.2


From 22e057c5923e60debad318cbeaee33033b110bc8 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:33:00 -0700
Subject: smaps: teach smaps_pte_range() about THP pmds

This adds code to explicitly detect and handle pmd_trans_huge() pmds.  It
then passes HPAGE_SIZE units in to the smap_pte_entry() function instead
of PAGE_SIZE.

This means that using /proc/$pid/smaps now will no longer cause THPs to be
broken down in to small pages.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michael J Wolf <mjwolf@us.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index d7e2af3..26f9cc0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
+#include <linux/huge_mm.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
@@ -7,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
+#include <linux/rmap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 
@@ -385,8 +387,25 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	pte_t *pte;
 	spinlock_t *ptl;
 
-	split_huge_page_pmd(walk->mm, pmd);
-
+	spin_lock(&walk->mm->page_table_lock);
+	if (pmd_trans_huge(*pmd)) {
+		if (pmd_trans_splitting(*pmd)) {
+			spin_unlock(&walk->mm->page_table_lock);
+			wait_split_huge_page(vma->anon_vma, pmd);
+		} else {
+			smaps_pte_entry(*(pte_t *)pmd, addr,
+					HPAGE_PMD_SIZE, walk);
+			spin_unlock(&walk->mm->page_table_lock);
+			return 0;
+		}
+	} else {
+		spin_unlock(&walk->mm->page_table_lock);
+	}
+	/*
+	 * The mmap_sem held all the way back in m_start() is what
+	 * keeps khugepaged out of here and from collapsing things
+	 * in here.
+	 */
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
 		smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
-- 
cgit v0.10.2


From 4031a219d8913da40ade5a6e5b538cc61e975cc8 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:33:01 -0700
Subject: smaps: have smaps show transparent huge pages

Now that the mere act of _looking_ at /proc/$pid/smaps will not destroy
transparent huge pages, tell how much of the VMA is actually mapped with
them.

This way, we can make sure that we're getting THPs where we
expect to see them.

Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Cc: Michael J Wolf <mjwolf@us.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 26f9cc0..93381aa 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -331,6 +331,7 @@ struct mem_size_stats {
 	unsigned long private_dirty;
 	unsigned long referenced;
 	unsigned long anonymous;
+	unsigned long anonymous_thp;
 	unsigned long swap;
 	u64 pss;
 };
@@ -396,6 +397,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			smaps_pte_entry(*(pte_t *)pmd, addr,
 					HPAGE_PMD_SIZE, walk);
 			spin_unlock(&walk->mm->page_table_lock);
+			mss->anonymous_thp += HPAGE_PMD_SIZE;
 			return 0;
 		}
 	} else {
@@ -444,6 +446,7 @@ static int show_smap(struct seq_file *m, void *v)
 		   "Private_Dirty:  %8lu kB\n"
 		   "Referenced:     %8lu kB\n"
 		   "Anonymous:      %8lu kB\n"
+		   "AnonHugePages:  %8lu kB\n"
 		   "Swap:           %8lu kB\n"
 		   "KernelPageSize: %8lu kB\n"
 		   "MMUPageSize:    %8lu kB\n"
@@ -457,6 +460,7 @@ static int show_smap(struct seq_file *m, void *v)
 		   mss.private_dirty >> 10,
 		   mss.referenced >> 10,
 		   mss.anonymous >> 10,
+		   mss.anonymous_thp >> 10,
 		   mss.swap >> 10,
 		   vma_kernel_pagesize(vma) >> 10,
 		   vma_mmu_pagesize(vma) >> 10,
-- 
cgit v0.10.2


From 7571966189e54adf0a8bc1384d6f13f44052ba63 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 22 Mar 2011 16:33:02 -0700
Subject: mempolicy: remove redundant check in __mpol_equal()

The 'flags' field is already checked, no need to do it again.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Cc: Bob Liu <lliubbo@gmail.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 78062ab..959a8b8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1979,8 +1979,7 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
 	case MPOL_INTERLEAVE:
 		return nodes_equal(a->v.nodes, b->v.nodes);
 	case MPOL_PREFERRED:
-		return a->v.preferred_node == b->v.preferred_node &&
-			a->flags == b->flags;
+		return a->v.preferred_node == b->v.preferred_node;
 	default:
 		BUG();
 		return 0;
-- 
cgit v0.10.2


From 8afdcece4911e51cfff2b50a269418914cab8a3f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 22 Mar 2011 16:33:04 -0700
Subject: mm: vmscan: kswapd should not free an excessive number of pages when
 balancing small zones

When reclaiming for order-0 pages, kswapd requires that all zones be
balanced.  Each cycle through balance_pgdat() does background ageing on
all zones if necessary and applies equal pressure on the inactive zone
unless a lot of pages are free already.

A "lot of free pages" is defined as a "balance gap" above the high
watermark which is currently 7*high_watermark.  Historically this was
reasonable as min_free_kbytes was small.  However, on systems using huge
pages, it is recommended that min_free_kbytes is higher and it is tuned
with hugeadm --set-recommended-min_free_kbytes.  With the introduction of
transparent huge page support, this recommended value is also applied.  On
X86-64 with 4G of memory, min_free_kbytes becomes 67584 so one would
expect around 68M of memory to be free.  The Normal zone is approximately
35000 pages so under even normal memory pressure such as copying a large
file, it gets exhausted quickly.  As it is getting exhausted, kswapd
applies pressure equally to all zones, including the DMA32 zone.  DMA32 is
approximately 700,000 pages with a high watermark of around 23,000 pages.
In this situation, kswapd will reclaim around (23000*8 where 8 is the high
watermark + balance gap of 7 * high watermark) pages or 718M of pages
before the zone is ignored.  What the user sees is that free memory far
higher than it should be.

To avoid an excessive number of pages being reclaimed from the larger
zones, explicitely defines the "balance gap" to be either 1% of the zone
or the low watermark for the zone, whichever is smaller.  While kswapd
will check all zones to apply pressure, it'll ignore zones that meets the
(high_wmark + balance_gap) watermark.

To test this, 80G were copied from a partition and the amount of memory
being used was recorded.  A comparison of a patch and unpatched kernel can
be seen at
http://www.csn.ul.ie/~mel/postings/minfree-20110222/memory-usage-hydra.ps
and shows that kswapd is not reclaiming as much memory with the patch
applied.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: "Chen, Tim C" <tim.c.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/swap.h b/include/linux/swap.h
index c335055..ed6ebe6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -155,6 +155,15 @@ enum {
 #define SWAP_CLUSTER_MAX 32
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
+/*
+ * Ratio between the present memory in the zone and the "gap" that
+ * we're allowing kswapd to shrink in addition to the per-zone high
+ * wmark, even for zones that already have the high wmark satisfied,
+ * in order to provide better per-zone lru behavior. We are ok to
+ * spend not more than 1% of the memory for this zone balancing "gap".
+ */
+#define KSWAPD_ZONE_BALANCE_GAP_RATIO 100
+
 #define SWAP_MAP_MAX	0x3e	/* Max duplication count, in first swap_map */
 #define SWAP_MAP_BAD	0x3f	/* Note pageblock is bad, in first swap_map */
 #define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 665b090..060e4c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2399,6 +2399,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
+			unsigned long balance_gap;
 
 			if (!populated_zone(zone))
 				continue;
@@ -2415,11 +2416,20 @@ loop_again:
 			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
 
 			/*
-			 * We put equal pressure on every zone, unless one
-			 * zone has way too many pages free already.
+			 * We put equal pressure on every zone, unless
+			 * one zone has way too many pages free
+			 * already. The "too many pages" is defined
+			 * as the high wmark plus a "gap" where the
+			 * gap is either the low watermark or 1%
+			 * of the zone, whichever is smaller.
 			 */
+			balance_gap = min(low_wmark_pages(zone),
+				(zone->present_pages +
+					KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+				KSWAPD_ZONE_BALANCE_GAP_RATIO);
 			if (!zone_watermark_ok_safe(zone, order,
-					8*high_wmark_pages(zone), end_zone, 0))
+					high_wmark_pages(zone) + balance_gap,
+					end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
-- 
cgit v0.10.2


From c033a93c0d961fc7ec5b0872649143e061d97dd4 Mon Sep 17 00:00:00 2001
From: Petr Holasek <pholasek@redhat.com>
Date: Tue, 22 Mar 2011 16:33:05 -0700
Subject: hugetlbfs: correct handling of negative input to
 /proc/sys/vm/nr_hugepages

When the user inserts a negative value into /proc/sys/vm/nr_hugepages it
will cause the kernel to allocate as many hugepages as possible and to
then update /proc/meminfo to reflect this.

This changes the behavior so that the negative input will result in
nr_hugepages value being unchanged.

Signed-off-by: Petr Holasek <pholasek@redhat.com>
Signed-off-by: Anton Arapov <anton@redhat.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Eric B Munson <emunson@mgebm.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb0b7c1..06de5aa 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1872,8 +1872,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 	unsigned long tmp;
 	int ret;
 
-	if (!write)
-		tmp = h->max_huge_pages;
+	tmp = h->max_huge_pages;
 
 	if (write && h->order >= MAX_ORDER)
 		return -EINVAL;
@@ -1938,8 +1937,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 	unsigned long tmp;
 	int ret;
 
-	if (!write)
-		tmp = h->nr_overcommit_huge_pages;
+	tmp = h->nr_overcommit_huge_pages;
 
 	if (write && h->order >= MAX_ORDER)
 		return -EINVAL;
-- 
cgit v0.10.2


From 9d8aa4ea855e0d64bb6926acb5618e6d1e2ed344 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Mar 2011 16:33:06 -0700
Subject: mm: remove worrying dead code from find_get_pages()

The radix_tree_deref_retry() case in find_get_pages() has a strange little
excrescence, not seen in the other gang lookups: it looks like the start
of an abandoned attempt to guarantee forward progress in a case that
cannot arise.

ret should always be 0 here: if it isn't, then going back to restart will
leak references to pages already gotten.  There used to be a comment
saying nr_found is necessarily 1 here: that's not quite true, but the
radix_tree_deref_retry() case is peculiar to the entry at index 0, when we
race with it being moved out of the radix_tree root or back.

Remove the worrisome two lines, add a brief comment here and in
find_get_pages_contig() and find_get_pages_tag(), and a WARN_ON in
find_get_pages() should it ever be seen elsewhere than at 0.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Salman Qazi <sqazi@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/filemap.c b/mm/filemap.c
index cb476e7..a293181 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -863,9 +863,13 @@ repeat:
 		page = radix_tree_deref_slot((void **)pages[i]);
 		if (unlikely(!page))
 			continue;
+
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
 		if (radix_tree_deref_retry(page)) {
-			if (ret)
-				start = pages[ret-1]->index;
+			WARN_ON(start | i);
 			goto restart;
 		}
 
@@ -915,6 +919,11 @@ repeat:
 		page = radix_tree_deref_slot((void **)pages[i]);
 		if (unlikely(!page))
 			continue;
+
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
 		if (radix_tree_deref_retry(page))
 			goto restart;
 
@@ -975,6 +984,11 @@ repeat:
 		page = radix_tree_deref_slot((void **)pages[i]);
 		if (unlikely(!page))
 			continue;
+
+		/*
+		 * This can only trigger when the entry at index 0 moves out
+		 * of or back to the root: none yet gotten, safe to restart.
+		 */
 		if (radix_tree_deref_retry(page))
 			goto restart;
 
-- 
cgit v0.10.2


From 5b280c0cc70062967bb9d630b216375b18db3a0b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Mar 2011 16:33:07 -0700
Subject: mm: don't return 0 too early from find_get_pages()

Callers of find_get_pages(), or its wrapper pagevec_lookup() - notably
truncate_inode_pages_range() - stop looking further when it returns 0.

But if an interrupt comes just after its radix_tree_gang_lookup_slot(),
especially if we have preemptible RCU enabled, isn't it conceivable that
all 14 pages returned could be removed from the page cache by
shrink_page_list(), before find_get_pages() gets to process them?  So
causing it to return 0 although there may be plenty more pages beyond.

Make find_get_pages() and find_get_pages_tag() check for this unlikely
case, and restart should it occur; but callers of find_get_pages_contig()
have no such expectation, it's okay for that to return 0 early.

I have not seen this in practice, just worried by the possibility.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Salman Qazi <sqazi@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/filemap.c b/mm/filemap.c
index a293181..f807afd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -885,6 +885,13 @@ repeat:
 		pages[ret] = page;
 		ret++;
 	}
+
+	/*
+	 * If all entries were removed before we could secure them,
+	 * try again, because callers stop trying once 0 is returned.
+	 */
+	if (unlikely(!ret && nr_found))
+		goto restart;
 	rcu_read_unlock();
 	return ret;
 }
@@ -1004,6 +1011,13 @@ repeat:
 		pages[ret] = page;
 		ret++;
 	}
+
+	/*
+	 * If all entries were removed before we could secure them,
+	 * try again, because callers stop trying once 0 is returned.
+	 */
+	if (unlikely(!ret && nr_found))
+		goto restart;
 	rcu_read_unlock();
 
 	if (ret)
-- 
cgit v0.10.2


From 602605a42ea4c299aeed4d806c49fb9dd18cd204 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Tue, 22 Mar 2011 16:33:08 -0700
Subject: mm: compaction: minimise the time IRQs are disabled while isolating
 free pages

compaction_alloc() isolates free pages to be used as migration targets.
While its scanning, IRQs are disabled on the mistaken assumption the
scanning should be short.  Analysis showed that IRQs were in fact being
disabled for substantial time.  A simple test was run using large
anonymous mappings with transparent hugepage support enabled to trigger
frequent compactions.  A monitor sampled what the worst IRQ-off latencies
were and a post-processing tool found the following;

  Total sampled time IRQs off (not real total time): 22355
  Event compaction_alloc..compaction_alloc                 8409 us count 1
  Event compaction_alloc..compaction_alloc                 7341 us count 1
  Event compaction_alloc..compaction_alloc                 2463 us count 1
  Event compaction_alloc..compaction_alloc                 2054 us count 1
  Event shrink_inactive_list..shrink_zone                  1864 us count 1
  Event shrink_inactive_list..shrink_zone                    88 us count 1
  Event save_args..call_softirq                              36 us count 1
  Event save_args..call_softirq                              35 us count 2
  Event __make_request..__blk_run_queue                      24 us count 1
  Event __alloc_pages_nodemask..__alloc_pages_nodemask        6 us count 1

i.e.  compaction is disabled IRQs for a prolonged period of time - 8ms in
one instance.  The full report generated by the tool can be found at

 http://www.csn.ul.ie/~mel/postings/minfree-20110225/irqsoff-vanilla-micro.report

This patch reduces the time IRQs are disabled by simply disabling IRQs at
the last possible minute.  An updated IRQs-off summary report then looks
like;

  Total sampled time IRQs off (not real total time): 5493
  Event shrink_inactive_list..shrink_zone                  1596 us count 1
  Event shrink_inactive_list..shrink_zone                  1530 us count 1
  Event shrink_inactive_list..shrink_zone                   956 us count 1
  Event shrink_inactive_list..shrink_zone                   541 us count 1
  Event shrink_inactive_list..shrink_zone                   531 us count 1
  Event split_huge_page..add_to_swap                        232 us count 1
  Event save_args..call_softirq                              36 us count 1
  Event save_args..call_softirq                              35 us count 2
  Event __wake_up..__wake_up                                  1 us count 1

A full report is again available at

  http://www.csn.ul.ie/~mel/postings/minfree-20110225/irqsoff-minimiseirq-free-v1r4-micro.report

As should be obvious, IRQ disabled latencies due to compaction are
almost elimimnated for this particular test.

[aarcange@redhat.com: Fix initialisation of isolated]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujisu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Arthur Marsh <arthur.marsh@internode.on.net>
Cc: Clemens Ladisch <cladisch@googlemail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/compaction.c b/mm/compaction.c
index 38ce488..b27802e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -153,7 +153,6 @@ static void isolate_freepages(struct zone *zone,
 	 * pages on cc->migratepages. We stop searching if the migrate
 	 * and free page scanners meet or enough free pages are isolated.
 	 */
-	spin_lock_irqsave(&zone->lock, flags);
 	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
 					pfn -= pageblock_nr_pages) {
 		unsigned long isolated;
@@ -176,9 +175,19 @@ static void isolate_freepages(struct zone *zone,
 		if (!suitable_migration_target(page))
 			continue;
 
-		/* Found a block suitable for isolating free pages from */
-		isolated = isolate_freepages_block(zone, pfn, freelist);
-		nr_freepages += isolated;
+		/*
+		 * Found a block suitable for isolating free pages from. Now
+		 * we disabled interrupts, double check things are ok and
+		 * isolate the pages. This is to minimise the time IRQs
+		 * are disabled
+		 */
+		isolated = 0;
+		spin_lock_irqsave(&zone->lock, flags);
+		if (suitable_migration_target(page)) {
+			isolated = isolate_freepages_block(zone, pfn, freelist);
+			nr_freepages += isolated;
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
 
 		/*
 		 * Record the highest PFN we isolated pages from. When next
@@ -188,7 +197,6 @@ static void isolate_freepages(struct zone *zone,
 		if (isolated)
 			high_pfn = max(high_pfn, pfn);
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
 
 	/* split_free_page does not map the pages */
 	list_for_each_entry(page, freelist, lru) {
-- 
cgit v0.10.2


From b2eef8c0d09101bbbff2531c097543aedde0b525 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 22 Mar 2011 16:33:10 -0700
Subject: mm: compaction: minimise the time IRQs are disabled while isolating
 pages for migration

compaction_alloc() isolates pages for migration in isolate_migratepages.
While it's scanning, IRQs are disabled on the mistaken assumption the
scanning should be short.  Tests show this to be true for the most part
but contention times on the LRU lock can be increased.  Before this patch,
the IRQ disabled times for a simple test looked like

  Total sampled time IRQs off (not real total time): 5493
  Event shrink_inactive_list..shrink_zone                  1596 us count 1
  Event shrink_inactive_list..shrink_zone                  1530 us count 1
  Event shrink_inactive_list..shrink_zone                   956 us count 1
  Event shrink_inactive_list..shrink_zone                   541 us count 1
  Event shrink_inactive_list..shrink_zone                   531 us count 1
  Event split_huge_page..add_to_swap                        232 us count 1
  Event save_args..call_softirq                              36 us count 1
  Event save_args..call_softirq                              35 us count 2
  Event __wake_up..__wake_up                                  1 us count 1

This patch reduces the worst-case IRQs-disabled latencies by releasing the
lock every SWAP_CLUSTER_MAX pages that are scanned and releasing the CPU if
necessary. The cost of this is that the processing performing compaction will
be slower but IRQs being disabled for too long a time has worse consequences
as the following report shows;

  Total sampled time IRQs off (not real total time): 4367
  Event shrink_inactive_list..shrink_zone                   881 us count 1
  Event shrink_inactive_list..shrink_zone                   875 us count 1
  Event shrink_inactive_list..shrink_zone                   868 us count 1
  Event shrink_inactive_list..shrink_zone                   555 us count 1
  Event split_huge_page..add_to_swap                        495 us count 1
  Event compact_zone..compact_zone_order                    269 us count 1
  Event split_huge_page..add_to_swap                        266 us count 1
  Event shrink_inactive_list..shrink_zone                    85 us count 1
  Event save_args..call_softirq                              36 us count 2
  Event __wake_up..__wake_up                                  1 us count 1

[akpm@linux-foundation.org: simplify with s/unlocked/locked/]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Arthur Marsh <arthur.marsh@internode.on.net>
Cc: Clemens Ladisch <cladisch@googlemail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/compaction.c b/mm/compaction.c
index b27802e..021a296 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -277,9 +277,27 @@ static unsigned long isolate_migratepages(struct zone *zone,
 	}
 
 	/* Time to isolate some pages for migration */
+	cond_resched();
 	spin_lock_irq(&zone->lru_lock);
 	for (; low_pfn < end_pfn; low_pfn++) {
 		struct page *page;
+		bool locked = true;
+
+		/* give a chance to irqs before checking need_resched() */
+		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+			spin_unlock_irq(&zone->lru_lock);
+			locked = false;
+		}
+		if (need_resched() || spin_is_contended(&zone->lru_lock)) {
+			if (locked)
+				spin_unlock_irq(&zone->lru_lock);
+			cond_resched();
+			spin_lock_irq(&zone->lru_lock);
+			if (fatal_signal_pending(current))
+				break;
+		} else if (!locked)
+			spin_lock_irq(&zone->lru_lock);
+
 		if (!pfn_valid_within(low_pfn))
 			continue;
 		nr_scanned++;
-- 
cgit v0.10.2


From 11bc82d67d1150767901bca54a24466621d763d7 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 22 Mar 2011 16:33:11 -0700
Subject: mm: compaction: Use async migration for __GFP_NO_KSWAPD and enforce
 no writeback

__GFP_NO_KSWAPD allocations are usually very expensive and not mandatory
to succeed as they have graceful fallback.  Waiting for I/O in those,
tends to be overkill in terms of latencies, so we can reduce their latency
by disabling sync migrate.

Unfortunately, even with async migration it's still possible for the
process to be blocked waiting for a request slot (e.g.  get_request_wait
in the block layer) when ->writepage is called.  To prevent
__GFP_NO_KSWAPD blocking, this patch prevents ->writepage being called on
dirty page cache for asynchronous migration.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=31142

[mel@csn.ul.ie: Avoid writebacks for NFS, retry locked pages, use bool]
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Arthur Marsh <arthur.marsh@internode.on.net>
Cc: Clemens Ladisch <cladisch@googlemail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reported-by: Alex Villacis Lasso <avillaci@ceibo.fiec.espol.edu.ec>
Tested-by: Alex Villacis Lasso <avillaci@ceibo.fiec.espol.edu.ec>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/migrate.c b/mm/migrate.c
index 7d2983f..89e5c3f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -564,7 +564,7 @@ static int fallback_migrate_page(struct address_space *mapping,
  *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-						int remap_swapcache)
+					int remap_swapcache, bool sync)
 {
 	struct address_space *mapping;
 	int rc;
@@ -586,18 +586,28 @@ static int move_to_new_page(struct page *newpage, struct page *page,
 	mapping = page_mapping(page);
 	if (!mapping)
 		rc = migrate_page(mapping, newpage, page);
-	else if (mapping->a_ops->migratepage)
+	else {
 		/*
-		 * Most pages have a mapping and most filesystems
-		 * should provide a migration function. Anonymous
-		 * pages are part of swap space which also has its
-		 * own migration function. This is the most common
-		 * path for page migration.
+		 * Do not writeback pages if !sync and migratepage is
+		 * not pointing to migrate_page() which is nonblocking
+		 * (swapcache/tmpfs uses migratepage = migrate_page).
 		 */
-		rc = mapping->a_ops->migratepage(mapping,
-						newpage, page);
-	else
-		rc = fallback_migrate_page(mapping, newpage, page);
+		if (PageDirty(page) && !sync &&
+		    mapping->a_ops->migratepage != migrate_page)
+			rc = -EBUSY;
+		else if (mapping->a_ops->migratepage)
+			/*
+			 * Most pages have a mapping and most filesystems
+			 * should provide a migration function. Anonymous
+			 * pages are part of swap space which also has its
+			 * own migration function. This is the most common
+			 * path for page migration.
+			 */
+			rc = mapping->a_ops->migratepage(mapping,
+							newpage, page);
+		else
+			rc = fallback_migrate_page(mapping, newpage, page);
+	}
 
 	if (rc) {
 		newpage->mapping = NULL;
@@ -641,7 +651,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	rc = -EAGAIN;
 
 	if (!trylock_page(page)) {
-		if (!force)
+		if (!force || !sync)
 			goto move_newpage;
 
 		/*
@@ -686,7 +696,15 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	BUG_ON(charge);
 
 	if (PageWriteback(page)) {
-		if (!force || !sync)
+		/*
+		 * For !sync, there is no point retrying as the retry loop
+		 * is expected to be too short for PageWriteback to be cleared
+		 */
+		if (!sync) {
+			rc = -EBUSY;
+			goto uncharge;
+		}
+		if (!force)
 			goto uncharge;
 		wait_on_page_writeback(page);
 	}
@@ -757,7 +775,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 
 skip_unmap:
 	if (!page_mapped(page))
-		rc = move_to_new_page(newpage, page, remap_swapcache);
+		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
 
 	if (rc && remap_swapcache)
 		remove_migration_ptes(page, page);
@@ -850,7 +868,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1);
+		rc = move_to_new_page(new_hpage, hpage, 1, sync);
 
 	if (rc)
 		remove_migration_ptes(hpage, hpage);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 426056a..6d0032b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2103,7 +2103,7 @@ rebalance:
 					sync_migration);
 	if (page)
 		goto got_pg;
-	sync_migration = true;
+	sync_migration = !(gfp_mask & __GFP_NO_KSWAPD);
 
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order,
-- 
cgit v0.10.2


From 78afd5612deb8268bafc8b6507d72341d5ed9aac Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 22 Mar 2011 16:33:12 -0700
Subject: mm: add __GFP_OTHER_NODE flag

Add a new __GFP_OTHER_NODE flag to tell the low level numa statistics in
zone_statistics() that an allocation is on behalf of another thread.  This
way the local and remote counters can be still correct, even when
background daemons like khugepaged are changing memory mappings.

This only affects the accounting, but I think it's worth doing that right
to avoid confusing users.

I first tried to just pass down the right node, but this required a lot of
changes to pass down this parameter and at least one addition of a 10th
argument to a 9 argument function.  Using the flag is a lot less
intrusive.

Open: should be also used for migration?

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index dca3176..bfb8f93 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -35,6 +35,7 @@ struct vm_area_struct;
 #define ___GFP_NOTRACK		0
 #endif
 #define ___GFP_NO_KSWAPD	0x400000u
+#define ___GFP_OTHER_NODE	0x800000u
 
 /*
  * GFP bitmasks..
@@ -83,6 +84,7 @@ struct vm_area_struct;
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
+#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
 
 /*
  * This may seem redundant, but it's a way of annotating false positives vs.
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 833e676..461c011 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -220,12 +220,12 @@ static inline unsigned long node_page_state(int node,
 		zone_page_state(&zones[ZONE_MOVABLE], item);
 }
 
-extern void zone_statistics(struct zone *, struct zone *);
+extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
 
 #else
 
 #define node_page_state(node, item) global_page_state(item)
-#define zone_statistics(_zl,_z) do { } while (0)
+#define zone_statistics(_zl, _z, gfp) do { } while (0)
 
 #endif /* CONFIG_NUMA */
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d0032b..136a547 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1337,7 +1337,7 @@ again:
 	}
 
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
-	zone_statistics(preferred_zone, zone);
+	zone_statistics(preferred_zone, zone, gfp_flags);
 	local_irq_restore(flags);
 
 	VM_BUG_ON(bad_range(zone, page));
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0c3b504..772b39b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -500,8 +500,12 @@ void refresh_cpu_vm_stats(int cpu)
  * z 	    = the zone from which the allocation occurred.
  *
  * Must be called with interrupts disabled.
+ *
+ * When __GFP_OTHER_NODE is set assume the node of the preferred
+ * zone is the local node. This is useful for daemons who allocate
+ * memory on behalf of other processes.
  */
-void zone_statistics(struct zone *preferred_zone, struct zone *z)
+void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
 {
 	if (z->zone_pgdat == preferred_zone->zone_pgdat) {
 		__inc_zone_state(z, NUMA_HIT);
@@ -509,7 +513,8 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z)
 		__inc_zone_state(z, NUMA_MISS);
 		__inc_zone_state(preferred_zone, NUMA_FOREIGN);
 	}
-	if (z->node == numa_node_id())
+	if (z->node == ((flags & __GFP_OTHER_NODE) ?
+			preferred_zone->node : numa_node_id()))
 		__inc_zone_state(z, NUMA_LOCAL);
 	else
 		__inc_zone_state(z, NUMA_OTHER);
-- 
cgit v0.10.2


From cc5d462f7777c06c5cf0b55d736be325cda747b3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 22 Mar 2011 16:33:13 -0700
Subject: mm: use __GFP_OTHER_NODE for transparent huge pages

Pass __GFP_OTHER_NODE for transparent hugepages NUMA allocations done by the
hugepages daemon.  This way the low level accounting for local versus
remote pages works correctly.

Contains improvements from Andrea Arcangeli

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 113e35c..0a619e0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -643,23 +643,24 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
 	return ret;
 }
 
-static inline gfp_t alloc_hugepage_gfpmask(int defrag)
+static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
 {
-	return GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT);
+	return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
 }
 
 static inline struct page *alloc_hugepage_vma(int defrag,
 					      struct vm_area_struct *vma,
-					      unsigned long haddr, int nd)
+					      unsigned long haddr, int nd,
+					      gfp_t extra_gfp)
 {
-	return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
+	return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp),
 			       HPAGE_PMD_ORDER, vma, haddr, nd);
 }
 
 #ifndef CONFIG_NUMA
 static inline struct page *alloc_hugepage(int defrag)
 {
-	return alloc_pages(alloc_hugepage_gfpmask(defrag),
+	return alloc_pages(alloc_hugepage_gfpmask(defrag, 0),
 			   HPAGE_PMD_ORDER);
 }
 #endif
@@ -678,7 +679,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (unlikely(khugepaged_enter(vma)))
 			return VM_FAULT_OOM;
 		page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-					  vma, haddr, numa_node_id());
+					  vma, haddr, numa_node_id(), 0);
 		if (unlikely(!page))
 			goto out;
 		if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,7 +800,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 	}
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
-		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
+		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
+					       __GFP_OTHER_NODE,
 					       vma, address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
 			     mem_cgroup_newpage_charge(pages[i], mm,
@@ -902,7 +904,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow())
 		new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-					      vma, haddr, numa_node_id());
+					      vma, haddr, numa_node_id(), 0);
 	else
 		new_page = NULL;
 
@@ -1779,7 +1781,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 * scalability.
 	 */
 	new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
-				      node);
+				      node, __GFP_OTHER_NODE);
 	if (unlikely(!new_page)) {
 		up_read(&mm->mmap_sem);
 		*hpage = ERR_PTR(-ENOMEM);
-- 
cgit v0.10.2


From 803d0c8351b47b72b8b018457a33b342557b90a2 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:14 -0700
Subject: sys_swapon: use vzalloc() instead of vmalloc/memset

This patch series refactors the sys_swapon function.

sys_swapon is currently a very large function, with 313 lines (more than
12 25-line screens), which can make it a bit hard to read. This patch
series reduces this size by half, by extracting large chunks of related
code to new helper functions.

One of these chunks of code was nearly identical to the part of
sys_swapoff which is used in case of a failure return from
try_to_unuse(), so this patch series also makes both share the same
code.

As a side effect of all this refactoring, the compiled code gets a bit
smaller (from v1 of this patch series):

   text       data        bss        dec        hex    filename
  14012        944        276      15232       3b80    mm/swapfile.o.before
  13941        944        276      15161       3b39    mm/swapfile.o.after

This patch:

Use vzalloc() instead of vmalloc/memset.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6d6d28c..99eb5048 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2047,13 +2047,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap;
 
 	/* OK, set up the swap map and apply the bad block list */
-	swap_map = vmalloc(maxpages);
+	swap_map = vzalloc(maxpages);
 	if (!swap_map) {
 		error = -ENOMEM;
 		goto bad_swap;
 	}
 
-	memset(swap_map, 0, maxpages);
 	nr_good_pages = maxpages - 1;	/* omit header page */
 
 	for (i = 0; i < swap_header->info.nr_badpages; i++) {
-- 
cgit v0.10.2


From 80b0df12b808bf8e8391afae1b43f5e529f76d89 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:15 -0700
Subject: sys_swapon: remove changelog from function comment

Changelogs belong in the git history instead of in the source code.

Also, "The swapon system call" is redundant with
"SYSCALL_DEFINE2(swapon, ...)".

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Gaah. That's a _historical_ comment. But the patch-series depends on removal ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 99eb5048..9714644 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1844,11 +1844,6 @@ static int __init max_swapfiles_check(void)
 late_initcall(max_swapfiles_check);
 #endif
 
-/*
- * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
- *
- * The swapon system call
- */
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
-- 
cgit v0.10.2


From e8e6c2ec403ecfaa226857d8204344c98fe12b7b Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:16 -0700
Subject: sys_swapon: do not depend on "type" after allocation

Within sys_swapon, after the swap_info entry has been allocated, we
always have type == p->type and swap_info[type] == p. Use this fact to
reduce the dependency on the "type" local variable within the function,
as a preparation to move the allocation of the swap_info entry to a
separate function.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujisu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9714644..5238d8d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1927,7 +1927,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	for (i = 0; i < nr_swapfiles; i++) {
 		struct swap_info_struct *q = swap_info[i];
 
-		if (i == type || !q->swap_file)
+		if (q == p || !q->swap_file)
 			continue;
 		if (mapping == q->swap_file->f_mapping)
 			goto bad_swap;
@@ -2062,7 +2062,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
-	error = swap_cgroup_swapon(type, maxpages);
+	error = swap_cgroup_swapon(p->type, maxpages);
 	if (error)
 		goto bad_swap;
 
@@ -2120,9 +2120,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	p->next = i;
 	if (prev < 0)
-		swap_list.head = swap_list.next = type;
+		swap_list.head = swap_list.next = p->type;
 	else
-		swap_info[prev]->next = type;
+		swap_info[prev]->next = p->type;
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	atomic_inc(&proc_poll_event);
@@ -2136,7 +2136,7 @@ bad_swap:
 		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 	}
 	destroy_swap_extents(p);
-	swap_cgroup_swapoff(type);
+	swap_cgroup_swapoff(p->type);
 bad_swap_2:
 	spin_lock(&swap_lock);
 	p->swap_file = NULL;
-- 
cgit v0.10.2


From 53cbb2435f161f2a8b36af8f6d2c46dc59d0d757 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:17 -0700
Subject: sys_swapon: separate swap_info allocation

Move the swap_info allocation to its own function. Only code movement,
no functional changes.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5238d8d..6d1c3c6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1844,33 +1844,15 @@ static int __init max_swapfiles_check(void)
 late_initcall(max_swapfiles_check);
 #endif
 
-SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+static struct swap_info_struct *alloc_swap_info(void)
 {
 	struct swap_info_struct *p;
-	char *name = NULL;
-	struct block_device *bdev = NULL;
-	struct file *swap_file = NULL;
-	struct address_space *mapping;
 	unsigned int type;
-	int i, prev;
 	int error;
-	union swap_header *swap_header;
-	unsigned int nr_good_pages;
-	int nr_extents = 0;
-	sector_t span;
-	unsigned long maxpages;
-	unsigned long swapfilepages;
-	unsigned char *swap_map = NULL;
-	struct page *page = NULL;
-	struct inode *inode = NULL;
-	int did_down = 0;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	spin_lock(&swap_lock);
 	for (type = 0; type < nr_swapfiles; type++) {
@@ -1906,6 +1888,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	p->next = -1;
 	spin_unlock(&swap_lock);
 
+	return p;
+
+out:
+	return ERR_PTR(error);
+}
+
+SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+{
+	struct swap_info_struct *p;
+	char *name = NULL;
+	struct block_device *bdev = NULL;
+	struct file *swap_file = NULL;
+	struct address_space *mapping;
+	int i, prev;
+	int error;
+	union swap_header *swap_header;
+	unsigned int nr_good_pages;
+	int nr_extents = 0;
+	sector_t span;
+	unsigned long maxpages;
+	unsigned long swapfilepages;
+	unsigned char *swap_map = NULL;
+	struct page *page = NULL;
+	struct inode *inode = NULL;
+	int did_down = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	p = alloc_swap_info();
+	if (IS_ERR(p)) {
+		error = PTR_ERR(p);
+		goto out;
+	}
+
 	name = getname(specialfile);
 	error = PTR_ERR(name);
 	if (IS_ERR(name)) {
-- 
cgit v0.10.2


From 2542e5134d2c19a9e6a4e641ef78cac6bccebd9b Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:18 -0700
Subject: sys_swapon: simplify error return from swap_info allocation

At this point in sys_swapon, there is nothing to free. Return directly
instead of jumping to the cleanup block at the end of the function.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6d1c3c6..4d457d6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1918,10 +1918,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		return -EPERM;
 
 	p = alloc_swap_info();
-	if (IS_ERR(p)) {
-		error = PTR_ERR(p);
-		goto out;
-	}
+	if (IS_ERR(p))
+		return PTR_ERR(p);
 
 	name = getname(specialfile);
 	error = PTR_ERR(name);
-- 
cgit v0.10.2


From 730c0581c82dbc5be2f41a2d85bde6bad11bc8a4 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:19 -0700
Subject: sys_swapon: simplify error flow in alloc_swap_info()

Since there is no cleanup to do, there is no reason to jump to a label.
Return directly instead.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4d457d6..4d89c4c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1848,7 +1848,6 @@ static struct swap_info_struct *alloc_swap_info(void)
 {
 	struct swap_info_struct *p;
 	unsigned int type;
-	int error;
 
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
@@ -1859,11 +1858,10 @@ static struct swap_info_struct *alloc_swap_info(void)
 		if (!(swap_info[type]->flags & SWP_USED))
 			break;
 	}
-	error = -EPERM;
 	if (type >= MAX_SWAPFILES) {
 		spin_unlock(&swap_lock);
 		kfree(p);
-		goto out;
+		return ERR_PTR(-EPERM);
 	}
 	if (type >= nr_swapfiles) {
 		p->type = type;
@@ -1889,9 +1887,6 @@ static struct swap_info_struct *alloc_swap_info(void)
 	spin_unlock(&swap_lock);
 
 	return p;
-
-out:
-	return ERR_PTR(error);
 }
 
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
-- 
cgit v0.10.2


From 28b36bd741bd44db30e12b1048a659aa346e9b76 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:20 -0700
Subject: sys_swapon: remove initial value of name variable

Now there is nothing which jumps to the cleanup blocks before the name
variable is set. There is no need to set it initially to NULL anymore.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4d89c4c..6f25ece 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1892,7 +1892,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
-	char *name = NULL;
+	char *name;
 	struct block_device *bdev = NULL;
 	struct file *swap_file = NULL;
 	struct address_space *mapping;
-- 
cgit v0.10.2


From 83ef99befc324803a54cf2a5fab5a322df3a99d6 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:21 -0700
Subject: sys_swapon: remove did_down variable

Since mutex_lock(&inode->i_mutex) is called just after setting inode,
did_down is always equivalent to (inode && S_ISREG(inode->i_mode)).

Use this fact to remove the did_down variable.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6f25ece..b261e55 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1907,7 +1907,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	unsigned char *swap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
-	int did_down = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1962,7 +1961,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
 		mutex_lock(&inode->i_mutex);
-		did_down = 1;
 		if (IS_SWAPFILE(inode)) {
 			error = -EBUSY;
 			goto bad_swap;
@@ -2154,10 +2152,8 @@ bad_swap_2:
 	spin_unlock(&swap_lock);
 	vfree(swap_map);
 	if (swap_file) {
-		if (did_down) {
+		if (inode && S_ISREG(inode->i_mode))
 			mutex_unlock(&inode->i_mutex);
-			did_down = 0;
-		}
 		filp_close(swap_file, NULL);
 	}
 out:
@@ -2167,7 +2163,7 @@ out:
 	}
 	if (name)
 		putname(name);
-	if (did_down) {
+	if (inode && S_ISREG(inode->i_mode)) {
 		if (!error)
 			inode->i_flags |= S_SWAPFILE;
 		mutex_unlock(&inode->i_mutex);
-- 
cgit v0.10.2


From 7de7fb6b3422e6d1dac00666a992834085e745dc Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:22 -0700
Subject: sys_swapon: move setting of error nearer use

Move the setting of the error variable nearer the goto in a few places.

Avoids calling PTR_ERR() if not IS_ERR() in two places, and makes the
error condition more explicit in two other places.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index b261e55..e4dc949 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1916,14 +1916,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		return PTR_ERR(p);
 
 	name = getname(specialfile);
-	error = PTR_ERR(name);
 	if (IS_ERR(name)) {
+		error = PTR_ERR(name);
 		name = NULL;
 		goto bad_swap_2;
 	}
 	swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
-	error = PTR_ERR(swap_file);
 	if (IS_ERR(swap_file)) {
+		error = PTR_ERR(swap_file);
 		swap_file = NULL;
 		goto bad_swap_2;
 	}
@@ -1932,17 +1932,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	mapping = swap_file->f_mapping;
 	inode = mapping->host;
 
-	error = -EBUSY;
 	for (i = 0; i < nr_swapfiles; i++) {
 		struct swap_info_struct *q = swap_info[i];
 
 		if (q == p || !q->swap_file)
 			continue;
-		if (mapping == q->swap_file->f_mapping)
+		if (mapping == q->swap_file->f_mapping) {
+			error = -EBUSY;
 			goto bad_swap;
+		}
 	}
 
-	error = -EINVAL;
 	if (S_ISBLK(inode->i_mode)) {
 		bdev = bdgrab(I_BDEV(inode));
 		error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
@@ -1966,6 +1966,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 			goto bad_swap;
 		}
 	} else {
+		error = -EINVAL;
 		goto bad_swap;
 	}
 
-- 
cgit v0.10.2


From f2090d2df51d7cdb2f952dcfdcd8baaac0aec444 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:23 -0700
Subject: sys_swapon: remove bdev variable

The bdev variable is always equivalent to (S_ISBLK(inode->i_mode) ?
p->bdev : NULL), as long as it being set is moved to a bit earlier. Use
this fact to remove the bdev variable.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index e4dc949..a314d42 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1893,7 +1893,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
 	char *name;
-	struct block_device *bdev = NULL;
 	struct file *swap_file = NULL;
 	struct address_space *mapping;
 	int i, prev;
@@ -1944,19 +1943,19 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 
 	if (S_ISBLK(inode->i_mode)) {
-		bdev = bdgrab(I_BDEV(inode));
-		error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+		p->bdev = bdgrab(I_BDEV(inode));
+		error = blkdev_get(p->bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
 				   sys_swapon);
 		if (error < 0) {
-			bdev = NULL;
+			p->bdev = NULL;
 			error = -EINVAL;
 			goto bad_swap;
 		}
-		p->old_block_size = block_size(bdev);
-		error = set_blocksize(bdev, PAGE_SIZE);
+		p->old_block_size = block_size(p->bdev);
+		error = set_blocksize(p->bdev, PAGE_SIZE);
 		if (error < 0)
 			goto bad_swap;
-		p->bdev = bdev;
 		p->flags |= SWP_BLKDEV;
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
@@ -2140,9 +2139,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	error = 0;
 	goto out;
 bad_swap:
-	if (bdev) {
-		set_blocksize(bdev, p->old_block_size);
-		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	if (S_ISBLK(inode->i_mode) && p->bdev) {
+		set_blocksize(p->bdev, p->old_block_size);
+		blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 	}
 	destroy_swap_extents(p);
 	swap_cgroup_swapoff(p->type);
-- 
cgit v0.10.2


From 9b01c350af4fb00fe2ab66ff9bf16058c50b69bd Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:24 -0700
Subject: sys_swapon: do only cleanup in the cleanup blocks

The only way error is 0 in the cleanup blocks is when the function is
returning successfully. In this case, the cleanup blocks were setting
S_SWAPFILE in the S_ISREG case. But this is not a cleanup.

Move the setting of S_SWAPFILE to just before the "goto out;" to make
this more clear. At this point, we do not need to test for inode because
it will never be NULL.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index a314d42..e356e5e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2136,6 +2136,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
+	if (S_ISREG(inode->i_mode))
+		inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap:
@@ -2163,11 +2165,8 @@ out:
 	}
 	if (name)
 		putname(name);
-	if (inode && S_ISREG(inode->i_mode)) {
-		if (!error)
-			inode->i_flags |= S_SWAPFILE;
+	if (inode && S_ISREG(inode->i_mode))
 		mutex_unlock(&inode->i_mutex);
-	}
 	return error;
 }
 
-- 
cgit v0.10.2


From bd69010b042a60ca41a890df1b10019e94746c2f Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:25 -0700
Subject: sys_swapon: use a single error label

sys_swapon currently has two error labels, bad_swap and bad_swap_2.
bad_swap does the same as bad_swap_2 plus destroy_swap_extents() and
swap_cgroup_swapoff(); both are noops in the places where bad_swap_2 is
jumped to. With a single extra test for inode (matching the one in the
S_ISREG case below), all the error paths in the function can go to
bad_swap.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index e356e5e..1459077 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1918,13 +1918,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (IS_ERR(name)) {
 		error = PTR_ERR(name);
 		name = NULL;
-		goto bad_swap_2;
+		goto bad_swap;
 	}
 	swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
 	if (IS_ERR(swap_file)) {
 		error = PTR_ERR(swap_file);
 		swap_file = NULL;
-		goto bad_swap_2;
+		goto bad_swap;
 	}
 
 	p->swap_file = swap_file;
@@ -2141,13 +2141,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	error = 0;
 	goto out;
 bad_swap:
-	if (S_ISBLK(inode->i_mode) && p->bdev) {
+	if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
 		set_blocksize(p->bdev, p->old_block_size);
 		blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 	}
 	destroy_swap_extents(p);
 	swap_cgroup_swapoff(p->type);
-bad_swap_2:
 	spin_lock(&swap_lock);
 	p->swap_file = NULL;
 	p->flags = 0;
-- 
cgit v0.10.2


From 4d0e1e10752ca487d83d1ab2a1d4ae6d664e1cc0 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:26 -0700
Subject: sys_swapon: separate bdev claim and inode lock

Move the code which claims the bdev (S_ISBLK) or locks the inode
(S_ISREG) to a separate function. Only code movement, no functional
changes.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1459077..fc687b2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1889,6 +1889,43 @@ static struct swap_info_struct *alloc_swap_info(void)
 	return p;
 }
 
+static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
+{
+	int error;
+
+	if (S_ISBLK(inode->i_mode)) {
+		p->bdev = bdgrab(I_BDEV(inode));
+		error = blkdev_get(p->bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+				   sys_swapon);
+		if (error < 0) {
+			p->bdev = NULL;
+			error = -EINVAL;
+			goto bad_swap;
+		}
+		p->old_block_size = block_size(p->bdev);
+		error = set_blocksize(p->bdev, PAGE_SIZE);
+		if (error < 0)
+			goto bad_swap;
+		p->flags |= SWP_BLKDEV;
+	} else if (S_ISREG(inode->i_mode)) {
+		p->bdev = inode->i_sb->s_bdev;
+		mutex_lock(&inode->i_mutex);
+		if (IS_SWAPFILE(inode)) {
+			error = -EBUSY;
+			goto bad_swap;
+		}
+	} else {
+		error = -EINVAL;
+		goto bad_swap;
+	}
+
+	return 0;
+
+bad_swap:
+	return error;
+}
+
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
@@ -1942,32 +1979,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
-	if (S_ISBLK(inode->i_mode)) {
-		p->bdev = bdgrab(I_BDEV(inode));
-		error = blkdev_get(p->bdev,
-				   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-				   sys_swapon);
-		if (error < 0) {
-			p->bdev = NULL;
-			error = -EINVAL;
-			goto bad_swap;
-		}
-		p->old_block_size = block_size(p->bdev);
-		error = set_blocksize(p->bdev, PAGE_SIZE);
-		if (error < 0)
-			goto bad_swap;
-		p->flags |= SWP_BLKDEV;
-	} else if (S_ISREG(inode->i_mode)) {
-		p->bdev = inode->i_sb->s_bdev;
-		mutex_lock(&inode->i_mutex);
-		if (IS_SWAPFILE(inode)) {
-			error = -EBUSY;
-			goto bad_swap;
-		}
-	} else {
-		error = -EINVAL;
+	error = claim_swapfile(p, inode);
+	if (unlikely(error))
 		goto bad_swap;
-	}
 
 	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
 
-- 
cgit v0.10.2


From 87ade72a799a9a895b4a60918c32b43d3dfc3324 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:27 -0700
Subject: sys_swapon: simplify error flow in claim_swapfile()

Since there is no cleanup to do, there is no reason to jump to a label.
Return directly instead.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index fc687b2..61a604c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1900,30 +1900,22 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 				   sys_swapon);
 		if (error < 0) {
 			p->bdev = NULL;
-			error = -EINVAL;
-			goto bad_swap;
+			return -EINVAL;
 		}
 		p->old_block_size = block_size(p->bdev);
 		error = set_blocksize(p->bdev, PAGE_SIZE);
 		if (error < 0)
-			goto bad_swap;
+			return error;
 		p->flags |= SWP_BLKDEV;
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
 		mutex_lock(&inode->i_mutex);
-		if (IS_SWAPFILE(inode)) {
-			error = -EBUSY;
-			goto bad_swap;
-		}
-	} else {
-		error = -EINVAL;
-		goto bad_swap;
-	}
+		if (IS_SWAPFILE(inode))
+			return -EBUSY;
+	} else
+		return -EINVAL;
 
 	return 0;
-
-bad_swap:
-	return error;
 }
 
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
-- 
cgit v0.10.2


From 5de771e41f0fc2243c39585357b73f0ff757b280 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:28 -0700
Subject: sys_swapon: move setting of swapfilepages near use

There is no reason I can see to read inode->i_size long before it is
needed. Move its read to just before it is needed, to reduce the
variable lifetime.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 61a604c..4dade51 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1975,8 +1975,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (unlikely(error))
 		goto bad_swap;
 
-	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
-
 	/*
 	 * Read the swap header.
 	 */
@@ -2045,6 +2043,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	error = -EINVAL;
 	if (!maxpages)
 		goto bad_swap;
+	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
 	if (swapfilepages && maxpages > swapfilepages) {
 		printk(KERN_WARNING
 		       "Swap area shorter than signature indicates\n");
-- 
cgit v0.10.2


From ca8bd38bf6f05481c4155fc444178151884f65d0 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:29 -0700
Subject: sys_swapon: separate parsing of swapfile header

Move the code which parses and checks the swapfile header (except for
the bad block list) to a separate function. Only code movement, no
functional changes.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4dade51..5e13bff 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1918,6 +1918,82 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+static unsigned long read_swap_header(struct swap_info_struct *p,
+					union swap_header *swap_header,
+					struct inode *inode)
+{
+	int i;
+	unsigned long maxpages;
+	unsigned long swapfilepages;
+
+	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
+		printk(KERN_ERR "Unable to find swap-space signature\n");
+		goto bad_swap;
+	}
+
+	/* swap partition endianess hack... */
+	if (swab32(swap_header->info.version) == 1) {
+		swab32s(&swap_header->info.version);
+		swab32s(&swap_header->info.last_page);
+		swab32s(&swap_header->info.nr_badpages);
+		for (i = 0; i < swap_header->info.nr_badpages; i++)
+			swab32s(&swap_header->info.badpages[i]);
+	}
+	/* Check the swap header's sub-version */
+	if (swap_header->info.version != 1) {
+		printk(KERN_WARNING
+		       "Unable to handle swap header version %d\n",
+		       swap_header->info.version);
+		goto bad_swap;
+	}
+
+	p->lowest_bit  = 1;
+	p->cluster_next = 1;
+	p->cluster_nr = 0;
+
+	/*
+	 * Find out how many pages are allowed for a single swap
+	 * device. There are two limiting factors: 1) the number of
+	 * bits for the swap offset in the swp_entry_t type and
+	 * 2) the number of bits in the a swap pte as defined by
+	 * the different architectures. In order to find the
+	 * largest possible bit mask a swap entry with swap type 0
+	 * and swap offset ~0UL is created, encoded to a swap pte,
+	 * decoded to a swp_entry_t again and finally the swap
+	 * offset is extracted. This will mask all the bits from
+	 * the initial ~0UL mask that can't be encoded in either
+	 * the swp_entry_t or the architecture definition of a
+	 * swap pte.
+	 */
+	maxpages = swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	if (maxpages > swap_header->info.last_page) {
+		maxpages = swap_header->info.last_page + 1;
+		/* p->max is an unsigned int: don't overflow it */
+		if ((unsigned int)maxpages == 0)
+			maxpages = UINT_MAX;
+	}
+	p->highest_bit = maxpages - 1;
+
+	if (!maxpages)
+		goto bad_swap;
+	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
+	if (swapfilepages && maxpages > swapfilepages) {
+		printk(KERN_WARNING
+		       "Swap area shorter than signature indicates\n");
+		goto bad_swap;
+	}
+	if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
+		goto bad_swap;
+	if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+		goto bad_swap;
+
+	return maxpages;
+
+bad_swap:
+	return 0;
+}
+
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
@@ -1931,7 +2007,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	int nr_extents = 0;
 	sector_t span;
 	unsigned long maxpages;
-	unsigned long swapfilepages;
 	unsigned char *swap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
@@ -1989,71 +2064,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 	swap_header = kmap(page);
 
-	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
-		printk(KERN_ERR "Unable to find swap-space signature\n");
+	maxpages = read_swap_header(p, swap_header, inode);
+	if (unlikely(!maxpages)) {
 		error = -EINVAL;
 		goto bad_swap;
 	}
 
-	/* swap partition endianess hack... */
-	if (swab32(swap_header->info.version) == 1) {
-		swab32s(&swap_header->info.version);
-		swab32s(&swap_header->info.last_page);
-		swab32s(&swap_header->info.nr_badpages);
-		for (i = 0; i < swap_header->info.nr_badpages; i++)
-			swab32s(&swap_header->info.badpages[i]);
-	}
-	/* Check the swap header's sub-version */
-	if (swap_header->info.version != 1) {
-		printk(KERN_WARNING
-		       "Unable to handle swap header version %d\n",
-		       swap_header->info.version);
-		error = -EINVAL;
-		goto bad_swap;
-	}
-
-	p->lowest_bit  = 1;
-	p->cluster_next = 1;
-	p->cluster_nr = 0;
-
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number of
-	 * bits for the swap offset in the swp_entry_t type and
-	 * 2) the number of bits in the a swap pte as defined by
-	 * the different architectures. In order to find the
-	 * largest possible bit mask a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
-	if (maxpages > swap_header->info.last_page) {
-		maxpages = swap_header->info.last_page + 1;
-		/* p->max is an unsigned int: don't overflow it */
-		if ((unsigned int)maxpages == 0)
-			maxpages = UINT_MAX;
-	}
-	p->highest_bit = maxpages - 1;
-
-	error = -EINVAL;
-	if (!maxpages)
-		goto bad_swap;
-	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
-	if (swapfilepages && maxpages > swapfilepages) {
-		printk(KERN_WARNING
-		       "Swap area shorter than signature indicates\n");
-		goto bad_swap;
-	}
-	if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
-		goto bad_swap;
-	if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
-		goto bad_swap;
-
 	/* OK, set up the swap map and apply the bad block list */
 	swap_map = vzalloc(maxpages);
 	if (!swap_map) {
-- 
cgit v0.10.2


From 38719025384cf7121331bd6d41c062d3c5f7bb91 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:30 -0700
Subject: sys_swapon: simplify error flow in read_swap_header()

Since there is no cleanup to do, there is no reason to jump to a label.
Return directly instead.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 5e13bff..058a9df 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1928,7 +1928,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 
 	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
 		printk(KERN_ERR "Unable to find swap-space signature\n");
-		goto bad_swap;
+		return 0;
 	}
 
 	/* swap partition endianess hack... */
@@ -1944,7 +1944,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 		printk(KERN_WARNING
 		       "Unable to handle swap header version %d\n",
 		       swap_header->info.version);
-		goto bad_swap;
+		return 0;
 	}
 
 	p->lowest_bit  = 1;
@@ -1976,22 +1976,19 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->highest_bit = maxpages - 1;
 
 	if (!maxpages)
-		goto bad_swap;
+		return 0;
 	swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
 	if (swapfilepages && maxpages > swapfilepages) {
 		printk(KERN_WARNING
 		       "Swap area shorter than signature indicates\n");
-		goto bad_swap;
+		return 0;
 	}
 	if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
-		goto bad_swap;
+		return 0;
 	if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
-		goto bad_swap;
+		return 0;
 
 	return maxpages;
-
-bad_swap:
-	return 0;
 }
 
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
-- 
cgit v0.10.2


From 1421ef3cd15b87ef949e965efeb1e527479d3f75 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:31 -0700
Subject: sys_swapon: call swap_cgroup_swapon() earlier

The call to swap_cgroup_swapon is in the middle of loading the swap map
and extents. As it only does memory allocation and does not depend on
the swapfile layout (map/extents), it can be called earlier (or later).

Move it to just after the allocation of swap_map, since it is
conceptually similar (allocates a map).

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 058a9df..10f2b33 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2074,6 +2074,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		goto bad_swap;
 	}
 
+	error = swap_cgroup_swapon(p->type, maxpages);
+	if (error)
+		goto bad_swap;
+
 	nr_good_pages = maxpages - 1;	/* omit header page */
 
 	for (i = 0; i < swap_header->info.nr_badpages; i++) {
@@ -2088,10 +2092,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
-	error = swap_cgroup_swapon(p->type, maxpages);
-	if (error)
-		goto bad_swap;
-
 	if (nr_good_pages) {
 		swap_map[0] = SWAP_MAP_BAD;
 		p->max = maxpages;
-- 
cgit v0.10.2


From 915d4d7bc0d719f2f0907273c01967d38751c625 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:32 -0700
Subject: sys_swapon: separate parsing of bad blocks and extents

Move the code which parses the bad block list and the extents to a
separate function. Only code movement, no functional changes.

This change uses the fact that, after the success path, nr_good_pages ==
p->pages.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 10f2b33..a0f39f9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1991,6 +1991,54 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	return maxpages;
 }
 
+static int setup_swap_map_and_extents(struct swap_info_struct *p,
+					union swap_header *swap_header,
+					unsigned char *swap_map,
+					unsigned long maxpages,
+					sector_t *span)
+{
+	int i;
+	int error;
+	unsigned int nr_good_pages;
+	int nr_extents;
+
+	nr_good_pages = maxpages - 1;	/* omit header page */
+
+	for (i = 0; i < swap_header->info.nr_badpages; i++) {
+		unsigned int page_nr = swap_header->info.badpages[i];
+		if (page_nr == 0 || page_nr > swap_header->info.last_page) {
+			error = -EINVAL;
+			goto bad_swap;
+		}
+		if (page_nr < maxpages) {
+			swap_map[page_nr] = SWAP_MAP_BAD;
+			nr_good_pages--;
+		}
+	}
+
+	if (nr_good_pages) {
+		swap_map[0] = SWAP_MAP_BAD;
+		p->max = maxpages;
+		p->pages = nr_good_pages;
+		nr_extents = setup_swap_extents(p, span);
+		if (nr_extents < 0) {
+			error = nr_extents;
+			goto bad_swap;
+		}
+		nr_good_pages = p->pages;
+	}
+	if (!nr_good_pages) {
+		printk(KERN_WARNING "Empty swap-file\n");
+		error = -EINVAL;
+		goto bad_swap;
+	}
+
+	return nr_extents;
+
+bad_swap:
+	return error;
+}
+
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 {
 	struct swap_info_struct *p;
@@ -2001,7 +2049,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	int error;
 	union swap_header *swap_header;
 	unsigned int nr_good_pages;
-	int nr_extents = 0;
+	int nr_extents;
 	sector_t span;
 	unsigned long maxpages;
 	unsigned char *swap_map = NULL;
@@ -2078,36 +2126,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (error)
 		goto bad_swap;
 
-	nr_good_pages = maxpages - 1;	/* omit header page */
-
-	for (i = 0; i < swap_header->info.nr_badpages; i++) {
-		unsigned int page_nr = swap_header->info.badpages[i];
-		if (page_nr == 0 || page_nr > swap_header->info.last_page) {
-			error = -EINVAL;
-			goto bad_swap;
-		}
-		if (page_nr < maxpages) {
-			swap_map[page_nr] = SWAP_MAP_BAD;
-			nr_good_pages--;
-		}
-	}
-
-	if (nr_good_pages) {
-		swap_map[0] = SWAP_MAP_BAD;
-		p->max = maxpages;
-		p->pages = nr_good_pages;
-		nr_extents = setup_swap_extents(p, &span);
-		if (nr_extents < 0) {
-			error = nr_extents;
-			goto bad_swap;
-		}
-		nr_good_pages = p->pages;
-	}
-	if (!nr_good_pages) {
-		printk(KERN_WARNING "Empty swap-file\n");
-		error = -EINVAL;
+	nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
+		maxpages, &span);
+	if (unlikely(nr_extents < 0)) {
+		error = nr_extents;
 		goto bad_swap;
 	}
+	nr_good_pages = p->pages;
 
 	if (p->bdev) {
 		if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
-- 
cgit v0.10.2


From bdb8e3f68320f897de3f3a4c363fe2802037f21d Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:33 -0700
Subject: sys_swapon: simplify error flow in setup_swap_map_and_extents()

Since there is no cleanup to do, there is no reason to jump to a label.
Return directly instead.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index a0f39f9..be0d0a2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1998,7 +1998,6 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
 					sector_t *span)
 {
 	int i;
-	int error;
 	unsigned int nr_good_pages;
 	int nr_extents;
 
@@ -2006,10 +2005,8 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
 
 	for (i = 0; i < swap_header->info.nr_badpages; i++) {
 		unsigned int page_nr = swap_header->info.badpages[i];
-		if (page_nr == 0 || page_nr > swap_header->info.last_page) {
-			error = -EINVAL;
-			goto bad_swap;
-		}
+		if (page_nr == 0 || page_nr > swap_header->info.last_page)
+			return -EINVAL;
 		if (page_nr < maxpages) {
 			swap_map[page_nr] = SWAP_MAP_BAD;
 			nr_good_pages--;
@@ -2021,22 +2018,16 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
 		p->max = maxpages;
 		p->pages = nr_good_pages;
 		nr_extents = setup_swap_extents(p, span);
-		if (nr_extents < 0) {
-			error = nr_extents;
-			goto bad_swap;
-		}
+		if (nr_extents < 0)
+			return nr_extents;
 		nr_good_pages = p->pages;
 	}
 	if (!nr_good_pages) {
 		printk(KERN_WARNING "Empty swap-file\n");
-		error = -EINVAL;
-		goto bad_swap;
+		return -EINVAL;
 	}
 
 	return nr_extents;
-
-bad_swap:
-	return error;
 }
 
 SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
-- 
cgit v0.10.2


From 9c8100ef26ba9012b8677a383179a0cf169fc7f3 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:34 -0700
Subject: sys_swapon: remove nr_good_pages variable

It still exists within setup_swap_map_and_extents(), but after it
nr_good_pages == p->pages.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index be0d0a2..9e3613b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2039,7 +2039,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	int i, prev;
 	int error;
 	union swap_header *swap_header;
-	unsigned int nr_good_pages;
 	int nr_extents;
 	sector_t span;
 	unsigned long maxpages;
@@ -2123,7 +2122,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		error = nr_extents;
 		goto bad_swap;
 	}
-	nr_good_pages = p->pages;
 
 	if (p->bdev) {
 		if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2143,12 +2141,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		p->prio = --least_priority;
 	p->swap_map = swap_map;
 	p->flags |= SWP_WRITEOK;
-	nr_swap_pages += nr_good_pages;
-	total_swap_pages += nr_good_pages;
+	nr_swap_pages += p->pages;
+	total_swap_pages += p->pages;
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
 			"Priority:%d extents:%d across:%lluk %s%s\n",
-		nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
+		p->pages<<(PAGE_SHIFT-10), name, p->prio,
 		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
 		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
 		(p->flags & SWP_DISCARDABLE) ? "D" : "");
-- 
cgit v0.10.2


From c69dbfb84e88503468b6c481aecdb48d76ad5bc6 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:35 -0700
Subject: sys_swapon: move printk outside lock

The block in sys_swapon which does the final adjustments to the
swap_info_struct and to swap_list is the same as the block which
re-inserts it again at sys_swapoff on failure of try_to_unuse(). To be
able to make both share the same code, move the printk() call in the
middle of it to just after it.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9e3613b..df3dc7a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2144,13 +2144,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	nr_swap_pages += p->pages;
 	total_swap_pages += p->pages;
 
-	printk(KERN_INFO "Adding %uk swap on %s.  "
-			"Priority:%d extents:%d across:%lluk %s%s\n",
-		p->pages<<(PAGE_SHIFT-10), name, p->prio,
-		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
-		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
-		(p->flags & SWP_DISCARDABLE) ? "D" : "");
-
 	/* insert swap space into swap_list: */
 	prev = -1;
 	for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
@@ -2164,6 +2157,14 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	else
 		swap_info[prev]->next = p->type;
 	spin_unlock(&swap_lock);
+
+	printk(KERN_INFO "Adding %uk swap on %s.  "
+			"Priority:%d extents:%d across:%lluk %s%s\n",
+		p->pages<<(PAGE_SHIFT-10), name, p->prio,
+		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
+		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
+		(p->flags & SWP_DISCARDABLE) ? "D" : "");
+
 	mutex_unlock(&swapon_mutex);
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
-- 
cgit v0.10.2


From c6a2b64ba5d09a1e281e85988ffd650655fa0f39 Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:36 -0700
Subject: sys_swapoff: change order to match sys_swapon

The block in sys_swapon which does the final adjustments to the
swap_info_struct and to swap_list is the same as the block which
re-inserts it again at sys_swapoff on failure of try_to_unuse(), except
for the order of the operations within the lock. Since the order should
not matter, arbitrarily change sys_swapoff to match sys_swapon, in
preparation to making both share the same code.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index df3dc7a..465d972 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1625,6 +1625,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		spin_lock(&swap_lock);
 		if (p->prio < 0)
 			p->prio = --least_priority;
+		p->flags |= SWP_WRITEOK;
+		nr_swap_pages += p->pages;
+		total_swap_pages += p->pages;
+
 		prev = -1;
 		for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
 			if (p->prio >= swap_info[i]->prio)
@@ -1636,9 +1640,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 			swap_list.head = swap_list.next = type;
 		else
 			swap_info[prev]->next = type;
-		nr_swap_pages += p->pages;
-		total_swap_pages += p->pages;
-		p->flags |= SWP_WRITEOK;
 		spin_unlock(&swap_lock);
 		goto out_dput;
 	}
-- 
cgit v0.10.2


From 40531542e2832419566c997af0808513f6f2815d Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:37 -0700
Subject: sys_swapon: separate final enabling of the swapfile

The block in sys_swapon which does the final adjustments to the
swap_info_struct and to swap_list is the same as the block which
re-inserts it again at sys_swapoff on failure of try_to_unuse(). Move
this code to a separate function, and use it both in sys_swapon and
sys_swapoff.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Tested-by: Eric B Munson <emunson@mgebm.net>
Acked-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 465d972..7243044 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1550,6 +1550,36 @@ bad_bmap:
 	goto out;
 }
 
+static void enable_swap_info(struct swap_info_struct *p, int prio,
+				unsigned char *swap_map)
+{
+	int i, prev;
+
+	spin_lock(&swap_lock);
+	if (prio >= 0)
+		p->prio = prio;
+	else
+		p->prio = --least_priority;
+	p->swap_map = swap_map;
+	p->flags |= SWP_WRITEOK;
+	nr_swap_pages += p->pages;
+	total_swap_pages += p->pages;
+
+	/* insert swap space into swap_list: */
+	prev = -1;
+	for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
+		if (p->prio >= swap_info[i]->prio)
+			break;
+		prev = i;
+	}
+	p->next = i;
+	if (prev < 0)
+		swap_list.head = swap_list.next = p->type;
+	else
+		swap_info[prev]->next = p->type;
+	spin_unlock(&swap_lock);
+}
+
 SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 {
 	struct swap_info_struct *p = NULL;
@@ -1621,26 +1651,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	current->flags &= ~PF_OOM_ORIGIN;
 
 	if (err) {
+		/*
+		 * reading p->prio and p->swap_map outside the lock is
+		 * safe here because only sys_swapon and sys_swapoff
+		 * change them, and there can be no other sys_swapon or
+		 * sys_swapoff for this swap_info_struct at this point.
+		 */
 		/* re-insert swap space back into swap_list */
-		spin_lock(&swap_lock);
-		if (p->prio < 0)
-			p->prio = --least_priority;
-		p->flags |= SWP_WRITEOK;
-		nr_swap_pages += p->pages;
-		total_swap_pages += p->pages;
-
-		prev = -1;
-		for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
-			if (p->prio >= swap_info[i]->prio)
-				break;
-			prev = i;
-		}
-		p->next = i;
-		if (prev < 0)
-			swap_list.head = swap_list.next = type;
-		else
-			swap_info[prev]->next = type;
-		spin_unlock(&swap_lock);
+		enable_swap_info(p, p->prio, p->swap_map);
 		goto out_dput;
 	}
 
@@ -2037,7 +2055,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	char *name;
 	struct file *swap_file = NULL;
 	struct address_space *mapping;
-	int i, prev;
+	int i;
+	int prio;
 	int error;
 	union swap_header *swap_header;
 	int nr_extents;
@@ -2134,30 +2153,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	}
 
 	mutex_lock(&swapon_mutex);
-	spin_lock(&swap_lock);
+	prio = -1;
 	if (swap_flags & SWAP_FLAG_PREFER)
-		p->prio =
+		prio =
 		  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
-	else
-		p->prio = --least_priority;
-	p->swap_map = swap_map;
-	p->flags |= SWP_WRITEOK;
-	nr_swap_pages += p->pages;
-	total_swap_pages += p->pages;
-
-	/* insert swap space into swap_list: */
-	prev = -1;
-	for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
-		if (p->prio >= swap_info[i]->prio)
-			break;
-		prev = i;
-	}
-	p->next = i;
-	if (prev < 0)
-		swap_list.head = swap_list.next = p->type;
-	else
-		swap_info[prev]->next = p->type;
-	spin_unlock(&swap_lock);
+	enable_swap_info(p, prio, swap_map);
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
 			"Priority:%d extents:%d across:%lluk %s%s\n",
-- 
cgit v0.10.2


From 24b8ff7c27d9e975540656e377de44a2a181a01f Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 16:33:38 -0700
Subject: mm: remove inline from scan_swap_map()

scan_swap_map() is a large function (224 lines), with several loops and a
complex control flow involving several gotos.

Given all that, it is a bit silly that it is marked as inline.  The
compiler agrees with me: on a x86-64 compile, it did not inline the
function.

Remove the "inline" and let the compiler decide instead.

Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7243044..aafcf36 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -212,8 +212,8 @@ static int wait_for_discard(void *word)
 #define SWAPFILE_CLUSTER	256
 #define LATENCY_LIMIT		256
 
-static inline unsigned long scan_swap_map(struct swap_info_struct *si,
-					  unsigned char usage)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+				   unsigned char usage)
 {
 	unsigned long offset;
 	unsigned long scan_base;
-- 
cgit v0.10.2


From cf15b07cf448e19dcb31a19f0cbaf898b08ce975 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Tue, 22 Mar 2011 16:33:40 -0700
Subject: writeback: make mapping->writeback_index to point to the last written
 page

For range-cyclic writeback (e.g.  kupdate), the writeback code sets a
continuation point of the next writeback to mapping->writeback_index which
is set the page after the last written page.  This happens so that we
evenly write the whole file even if pages in it get continuously
redirtied.

However, in some cases, sequential writer is writing in the middle of the
page and it just redirties the last written page by continuing from that.
For example with an application which uses a file as a big ring buffer we
see:

[1st writeback session]
       ...
       flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898514 + 8
       flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898522 + 8
       flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898530 + 8
       flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898538 + 8
       flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898546 + 8
     kworker/0:1-11    4571: block_rq_issue: 8,0 W 0 () 94898514 + 40
>>     flush-8:0-2743  4571: block_bio_queue: 8,0 W 94898554 + 8
>>     flush-8:0-2743  4571: block_rq_issue: 8,0 W 0 () 94898554 + 8

[2nd writeback session after 35sec]
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94898562 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94898570 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94898578 + 8
       ...
     kworker/0:1-11    4606: block_rq_issue: 8,0 W 0 () 94898562 + 640
     kworker/0:1-11    4606: block_rq_issue: 8,0 W 0 () 94899202 + 72
       ...
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94899962 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94899970 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94899978 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94899986 + 8
       flush-8:0-2743  4606: block_bio_queue: 8,0 W 94899994 + 8
     kworker/0:1-11    4606: block_rq_issue: 8,0 W 0 () 94899962 + 40
>>     flush-8:0-2743  4606: block_bio_queue: 8,0 W 94898554 + 8
>>     flush-8:0-2743  4606: block_rq_issue: 8,0 W 0 () 94898554 + 8

So we seeked back to 94898554 after we wrote all the pages at the end of
the file.

This extra seek seems unnecessary.  If we continue writeback from the last
written page, we can avoid it and do not cause harm to other cases.  The
original intent of even writeout over the whole file is preserved and if
the page does not get redirtied pagevec_lookup_tag() just skips it.

As an exceptional case, when I/O error happens, set done_index to the next
page as the comment in the code suggests.

Tested-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b437fe6..632b464 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -927,7 +927,7 @@ retry:
 				break;
 			}
 
-			done_index = page->index + 1;
+			done_index = page->index;
 
 			lock_page(page);
 
@@ -977,6 +977,7 @@ continue_unlock:
 					 * not be suitable for data integrity
 					 * writeout).
 					 */
+					done_index = page->index + 1;
 					done = 1;
 					break;
 				}
-- 
cgit v0.10.2


From a42931bf9c02fbf3628a27a2a5c55d2b83e4ff20 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Tue, 22 Mar 2011 16:33:41 -0700
Subject: vmalloc: remove confusing comment on vwrite()

KM_USER1 is never used for vwrite() path so the caller doesn't need to
guarantee it is not used.  Only the caller should guarantee is KM_USER0
and it is commented already.

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index fc77ada..5d60302 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2003,8 +2003,6 @@ finished:
  *	should know vmalloc() area is valid and can use memcpy().
  *	This is for routines which have to access vmalloc area without
  *	any informaion, as /dev/kmem.
- *
- *	The caller should guarantee KM_USER1 is not used.
  */
 
 long vwrite(char *buf, char *addr, unsigned long count)
-- 
cgit v0.10.2


From 84be48d84a53044e13aa8816aab201ab5480815d Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Tue, 22 Mar 2011 16:33:41 -0700
Subject: mm/page_alloc.c: use list_move() instead of list_del()/list_add()
 combination

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 136a547..3a58221 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -867,9 +867,8 @@ static int move_freepages(struct zone *zone,
 		}
 
 		order = page_order(page);
-		list_del(&page->lru);
-		list_add(&page->lru,
-			&zone->free_area[order].free_list[migratetype]);
+		list_move(&page->lru,
+			  &zone->free_area[order].free_list[migratetype]);
 		page += 1 << order;
 		pages_moved += 1 << order;
 	}
-- 
cgit v0.10.2


From 8f7a66051b7523108c5aefb08c6a637e54aedc47 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 22 Mar 2011 16:33:43 -0700
Subject: mm/memblock: properly handle overlaps and fix error path

Currently memblock_reserve() or memblock_free() don't handle overlaps of
any kind.  There is some special casing for coalescing exactly adjacent
regions but that's about it.

This is annoying because typically memblock_reserve() is used to mark
regions passed by the firmware as reserved and we all know how much we can
trust our firmwares...

Also, with the current code, if we do something it doesn't handle right
such as trying to memblock_reserve() a large range spanning multiple
existing smaller reserved regions for example, or doing overlapping
reservations, it can silently corrupt the internal region array, causing
odd errors much later on, such as allocations returning reserved regions
etc...

This patch rewrites the underlying functions that add or remove a region
to the arrays.  The new code is a lot more robust as it fully handles
overlapping regions.  It's also, imho, simpler than the previous
implementation.

In addition, while doing so, I found a bug where if we fail to double the
array while adding a region, we would remove the last region of the array
rather than the region we just allocated.  This fixes it too.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memblock.c b/mm/memblock.c
index 4618fda..a0562d1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -58,28 +58,6 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p
 	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
 }
 
-static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
-			       phys_addr_t base2, phys_addr_t size2)
-{
-	if (base2 == base1 + size1)
-		return 1;
-	else if (base1 == base2 + size2)
-		return -1;
-
-	return 0;
-}
-
-static long __init_memblock memblock_regions_adjacent(struct memblock_type *type,
-				 unsigned long r1, unsigned long r2)
-{
-	phys_addr_t base1 = type->regions[r1].base;
-	phys_addr_t size1 = type->regions[r1].size;
-	phys_addr_t base2 = type->regions[r2].base;
-	phys_addr_t size2 = type->regions[r2].size;
-
-	return memblock_addrs_adjacent(base1, size1, base2, size2);
-}
-
 long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
 {
 	unsigned long i;
@@ -206,14 +184,13 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 		type->regions[i].size = type->regions[i + 1].size;
 	}
 	type->cnt--;
-}
 
-/* Assumption: base addr of region 1 < base addr of region 2 */
-static void __init_memblock memblock_coalesce_regions(struct memblock_type *type,
-		unsigned long r1, unsigned long r2)
-{
-	type->regions[r1].size += type->regions[r2].size;
-	memblock_remove_region(type, r2);
+	/* Special case for empty arrays */
+	if (type->cnt == 0) {
+		type->cnt = 1;
+		type->regions[0].base = 0;
+		type->regions[0].size = 0;
+	}
 }
 
 /* Defined below but needed now */
@@ -276,7 +253,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
 		return 0;
 
 	/* Add the new reserved region now. Should not fail ! */
-	BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0);
+	BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size));
 
 	/* If the array wasn't our static init one, then free it. We only do
 	 * that before SLAB is available as later on, we don't know whether
@@ -296,58 +273,99 @@ extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1
 	return 1;
 }
 
-static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock memblock_add_region(struct memblock_type *type,
+						phys_addr_t base, phys_addr_t size)
 {
-	unsigned long coalesced = 0;
-	long adjacent, i;
-
-	if ((type->cnt == 1) && (type->regions[0].size == 0)) {
-		type->regions[0].base = base;
-		type->regions[0].size = size;
-		return 0;
-	}
+	phys_addr_t end = base + size;
+	int i, slot = -1;
 
-	/* First try and coalesce this MEMBLOCK with another. */
+	/* First try and coalesce this MEMBLOCK with others */
 	for (i = 0; i < type->cnt; i++) {
-		phys_addr_t rgnbase = type->regions[i].base;
-		phys_addr_t rgnsize = type->regions[i].size;
+		struct memblock_region *rgn = &type->regions[i];
+		phys_addr_t rend = rgn->base + rgn->size;
+
+		/* Exit if there's no possible hits */
+		if (rgn->base > end || rgn->size == 0)
+			break;
 
-		if ((rgnbase == base) && (rgnsize == size))
-			/* Already have this region, so we're done */
+		/* Check if we are fully enclosed within an existing
+		 * block
+		 */
+		if (rgn->base <= base && rend >= end)
 			return 0;
 
-		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
-		/* Check if arch allows coalescing */
-		if (adjacent != 0 && type == &memblock.memory &&
-		    !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize))
-			break;
-		if (adjacent > 0) {
-			type->regions[i].base -= size;
-			type->regions[i].size += size;
-			coalesced++;
-			break;
-		} else if (adjacent < 0) {
-			type->regions[i].size += size;
-			coalesced++;
-			break;
+		/* Check if we overlap or are adjacent with the bottom
+		 * of a block.
+		 */
+		if (base < rgn->base && end >= rgn->base) {
+			/* If we can't coalesce, create a new block */
+			if (!memblock_memory_can_coalesce(base, size,
+							  rgn->base,
+							  rgn->size)) {
+				/* Overlap & can't coalesce are mutually
+				 * exclusive, if you do that, be prepared
+				 * for trouble
+				 */
+				WARN_ON(end != rgn->base);
+				goto new_block;
+			}
+			/* We extend the bottom of the block down to our
+			 * base
+			 */
+			rgn->base = base;
+			rgn->size = rend - base;
+
+			/* Return if we have nothing else to allocate
+			 * (fully coalesced)
+			 */
+			if (rend >= end)
+				return 0;
+
+			/* We continue processing from the end of the
+			 * coalesced block.
+			 */
+			base = rend;
+			size = end - base;
+		}
+
+		/* Now check if we overlap or are adjacent with the
+		 * top of a block
+		 */
+		if (base <= rend && end >= rend) {
+			/* If we can't coalesce, create a new block */
+			if (!memblock_memory_can_coalesce(rgn->base,
+							  rgn->size,
+							  base, size)) {
+				/* Overlap & can't coalesce are mutually
+				 * exclusive, if you do that, be prepared
+				 * for trouble
+				 */
+				WARN_ON(rend != base);
+				goto new_block;
+			}
+			/* We adjust our base down to enclose the
+			 * original block and destroy it. It will be
+			 * part of our new allocation. Since we've
+			 * freed an entry, we know we won't fail
+			 * to allocate one later, so we won't risk
+			 * losing the original block allocation.
+			 */
+			size += (base - rgn->base);
+			base = rgn->base;
+			memblock_remove_region(type, i--);
 		}
 	}
 
-	/* If we plugged a hole, we may want to also coalesce with the
-	 * next region
+	/* If the array is empty, special case, replace the fake
+	 * filler region and return
 	 */
-	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) &&
-	    ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base,
-							     type->regions[i].size,
-							     type->regions[i+1].base,
-							     type->regions[i+1].size)))) {
-		memblock_coalesce_regions(type, i, i+1);
-		coalesced++;
+	if ((type->cnt == 1) && (type->regions[0].size == 0)) {
+		type->regions[0].base = base;
+		type->regions[0].size = size;
+		return 0;
 	}
 
-	if (coalesced)
-		return coalesced;
-
+ new_block:
 	/* If we are out of space, we fail. It's too late to resize the array
 	 * but then this shouldn't have happened in the first place.
 	 */
@@ -362,13 +380,14 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
 		} else {
 			type->regions[i+1].base = base;
 			type->regions[i+1].size = size;
+			slot = i + 1;
 			break;
 		}
 	}
-
 	if (base < type->regions[0].base) {
 		type->regions[0].base = base;
 		type->regions[0].size = size;
+		slot = 0;
 	}
 	type->cnt++;
 
@@ -376,7 +395,8 @@ static long __init_memblock memblock_add_region(struct memblock_type *type, phys
 	 * our allocation and return an error
 	 */
 	if (type->cnt == type->max && memblock_double_array(type)) {
-		type->cnt--;
+		BUG_ON(slot < 0);
+		memblock_remove_region(type, slot);
 		return -1;
 	}
 
@@ -389,52 +409,55 @@ long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 
 }
 
-static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
+static long __init_memblock __memblock_remove(struct memblock_type *type,
+					      phys_addr_t base, phys_addr_t size)
 {
-	phys_addr_t rgnbegin, rgnend;
 	phys_addr_t end = base + size;
 	int i;
 
-	rgnbegin = rgnend = 0; /* supress gcc warnings */
-
-	/* Find the region where (base, size) belongs to */
-	for (i=0; i < type->cnt; i++) {
-		rgnbegin = type->regions[i].base;
-		rgnend = rgnbegin + type->regions[i].size;
+	/* Walk through the array for collisions */
+	for (i = 0; i < type->cnt; i++) {
+		struct memblock_region *rgn = &type->regions[i];
+		phys_addr_t rend = rgn->base + rgn->size;
 
-		if ((rgnbegin <= base) && (end <= rgnend))
+		/* Nothing more to do, exit */
+		if (rgn->base > end || rgn->size == 0)
 			break;
-	}
 
-	/* Didn't find the region */
-	if (i == type->cnt)
-		return -1;
+		/* If we fully enclose the block, drop it */
+		if (base <= rgn->base && end >= rend) {
+			memblock_remove_region(type, i--);
+			continue;
+		}
 
-	/* Check to see if we are removing entire region */
-	if ((rgnbegin == base) && (rgnend == end)) {
-		memblock_remove_region(type, i);
-		return 0;
-	}
+		/* If we are fully enclosed within a block
+		 * then we need to split it and we are done
+		 */
+		if (base > rgn->base && end < rend) {
+			rgn->size = base - rgn->base;
+			if (!memblock_add_region(type, end, rend - end))
+				return 0;
+			/* Failure to split is bad, we at least
+			 * restore the block before erroring
+			 */
+			rgn->size = rend - rgn->base;
+			WARN_ON(1);
+			return -1;
+		}
 
-	/* Check to see if region is matching at the front */
-	if (rgnbegin == base) {
-		type->regions[i].base = end;
-		type->regions[i].size -= size;
-		return 0;
-	}
+		/* Check if we need to trim the bottom of a block */
+		if (rgn->base < end && rend > end) {
+			rgn->size -= end - rgn->base;
+			rgn->base = end;
+			break;
+		}
 
-	/* Check to see if the region is matching at the end */
-	if (rgnend == end) {
-		type->regions[i].size -= size;
-		return 0;
-	}
+		/* And check if we need to trim the top of a block */
+		if (base < rend)
+			rgn->size -= rend - base;
 
-	/*
-	 * We need to split the entry -  adjust the current one to the
-	 * beginging of the hole and add the region after hole.
-	 */
-	type->regions[i].size = base - type->regions[i].base;
-	return memblock_add_region(type, end, rgnend - end);
+	}
+	return 0;
 }
 
 long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
@@ -467,7 +490,7 @@ phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, ph
 
 	found = memblock_find_base(size, align, 0, max_addr);
 	if (found != MEMBLOCK_ERROR &&
-	    memblock_add_region(&memblock.reserved, found, size) >= 0)
+	    !memblock_add_region(&memblock.reserved, found, size))
 		return found;
 
 	return 0;
@@ -548,7 +571,7 @@ static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
 		if (this_nid == nid) {
 			phys_addr_t ret = memblock_find_region(start, this_end, size, align);
 			if (ret != MEMBLOCK_ERROR &&
-			    memblock_add_region(&memblock.reserved, ret, size) >= 0)
+			    !memblock_add_region(&memblock.reserved, ret, size))
 				return ret;
 		}
 		start = this_end;
-- 
cgit v0.10.2


From bee4c36a5cf5c9f63ce1d7372aa62045fbd16d47 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Mar 2011 16:33:43 -0700
Subject: shmem: let shared anonymous be nonlinear again

Up to 2.6.22, you could use remap_file_pages(2) on a tmpfs file or a
shared mapping of /dev/zero or a shared anonymous mapping.  In 2.6.23 we
disabled it by default, but set VM_CAN_NONLINEAR to enable it on safe
mappings.  We made sure to set it in shmem_mmap() for tmpfs files, but
missed it in shmem_zero_setup() for the others.  Fix that at last.

Reported-by: Kenny Simpson <theonetruekenny@yahoo.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index 8859358..91ce9a1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2793,5 +2793,6 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 		fput(vma->vm_file);
 	vma->vm_file = file;
 	vma->vm_ops = &shmem_vm_ops;
+	vma->vm_flags |= VM_CAN_NONLINEAR;
 	return 0;
 }
-- 
cgit v0.10.2


From 3dd7ae8ec0ef399bfea347f297d2a95504d35571 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Tue, 22 Mar 2011 16:33:45 -0700
Subject: mm: simplify code of swap.c

Clean up code and remove duplicate code. Next patch will use
pagevec_lru_move_fn introduced here too.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Hiroyuki Kamezawa <kamezawa.hiroyuki@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swap.c b/mm/swap.c
index 0a33714..a448db3 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -179,15 +179,13 @@ void put_pages_list(struct list_head *pages)
 }
 EXPORT_SYMBOL(put_pages_list);
 
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
+static void pagevec_lru_move_fn(struct pagevec *pvec,
+				void (*move_fn)(struct page *page, void *arg),
+				void *arg)
 {
 	int i;
-	int pgmoved = 0;
 	struct zone *zone = NULL;
+	unsigned long flags = 0;
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -195,30 +193,50 @@ static void pagevec_move_tail(struct pagevec *pvec)
 
 		if (pagezone != zone) {
 			if (zone)
-				spin_unlock(&zone->lru_lock);
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
 			zone = pagezone;
-			spin_lock(&zone->lru_lock);
-		}
-		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			enum lru_list lru = page_lru_base_type(page);
-			list_move_tail(&page->lru, &zone->lru[lru].list);
-			mem_cgroup_rotate_reclaimable_page(page);
-			pgmoved++;
+			spin_lock_irqsave(&zone->lru_lock, flags);
 		}
+
+		(*move_fn)(page, arg);
 	}
 	if (zone)
-		spin_unlock(&zone->lru_lock);
-	__count_vm_events(PGROTATED, pgmoved);
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	release_pages(pvec->pages, pvec->nr, pvec->cold);
 	pagevec_reinit(pvec);
 }
 
+static void pagevec_move_tail_fn(struct page *page, void *arg)
+{
+	int *pgmoved = arg;
+	struct zone *zone = page_zone(page);
+
+	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+		enum lru_list lru = page_lru_base_type(page);
+		list_move_tail(&page->lru, &zone->lru[lru].list);
+		mem_cgroup_rotate_reclaimable_page(page);
+		(*pgmoved)++;
+	}
+}
+
+/*
+ * pagevec_move_tail() must be called with IRQ disabled.
+ * Otherwise this may cause nasty races.
+ */
+static void pagevec_move_tail(struct pagevec *pvec)
+{
+	int pgmoved = 0;
+
+	pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
+	__count_vm_events(PGROTATED, pgmoved);
+}
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
-void  rotate_reclaimable_page(struct page *page)
+void rotate_reclaimable_page(struct page *page)
 {
 	if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
 	    !PageUnevictable(page) && PageLRU(page)) {
@@ -369,10 +387,11 @@ void add_page_to_unevictable_list(struct page *page)
  * be write it out by flusher threads as this is much more effective
  * than the single-page writeout from reclaim.
  */
-static void lru_deactivate(struct page *page, struct zone *zone)
+static void lru_deactivate_fn(struct page *page, void *arg)
 {
 	int lru, file;
 	bool active;
+	struct zone *zone = page_zone(page);
 
 	if (!PageLRU(page))
 		return;
@@ -412,31 +431,6 @@ static void lru_deactivate(struct page *page, struct zone *zone)
 	update_page_reclaim_stat(zone, page, file, 0);
 }
 
-static void ____pagevec_lru_deactivate(struct pagevec *pvec)
-{
-	int i;
-	struct zone *zone = NULL;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
-
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		lru_deactivate(page, zone);
-	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
-}
-
-
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
@@ -466,7 +460,7 @@ static void drain_cpu_pagevecs(int cpu)
 
 	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
 	if (pagevec_count(pvec))
-		____pagevec_lru_deactivate(pvec);
+		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 }
 
 /**
@@ -483,7 +477,7 @@ void deactivate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
 		if (!pagevec_add(pvec, page))
-			____pagevec_lru_deactivate(pvec);
+			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
 	}
 }
@@ -630,44 +624,33 @@ void lru_add_page_tail(struct zone* zone,
 	}
 }
 
+static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+{
+	enum lru_list lru = (enum lru_list)arg;
+	struct zone *zone = page_zone(page);
+	int file = is_file_lru(lru);
+	int active = is_active_lru(lru);
+
+	VM_BUG_ON(PageActive(page));
+	VM_BUG_ON(PageUnevictable(page));
+	VM_BUG_ON(PageLRU(page));
+
+	SetPageLRU(page);
+	if (active)
+		SetPageActive(page);
+	update_page_reclaim_stat(zone, page, file, active);
+	add_page_to_lru_list(zone, page, lru);
+}
+
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
-	int i;
-	struct zone *zone = NULL;
-
 	VM_BUG_ON(is_unevictable_lru(lru));
 
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-		struct zone *pagezone = page_zone(page);
-		int file;
-		int active;
-
-		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
-			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
-		}
-		VM_BUG_ON(PageActive(page));
-		VM_BUG_ON(PageUnevictable(page));
-		VM_BUG_ON(PageLRU(page));
-		SetPageLRU(page);
-		active = is_active_lru(lru);
-		file = is_file_lru(lru);
-		if (active)
-			SetPageActive(page);
-		update_page_reclaim_stat(zone, page, file, active);
-		add_page_to_lru_list(zone, page, lru);
-	}
-	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
-	release_pages(pvec->pages, pvec->nr, pvec->cold);
-	pagevec_reinit(pvec);
+	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
 }
 
 EXPORT_SYMBOL(____pagevec_lru_add);
-- 
cgit v0.10.2


From 13e165baf6ada9f427c319bd41a1c7a9c7b4f0a6 Mon Sep 17 00:00:00 2001
From: Paul Pluzhnikov <ppluzhnikov@google.com>
Date: Tue, 22 Mar 2011 16:33:46 -0700
Subject: uml: kernels on {i386,x86_64} produce bad coredumps

One of our users reported that when a user-level program SIGSEGVs under
UML kernel, the resulting core dump is not very usable.

I have reproduced that with the latest kernel:

  make ARCH=um defconfig; make ARCH=um

Run the resulting kernel, then "inside" run this program:

#include <pthread.h>

void *fn(void *p)
{
 abort();
}

int main()
{
 pthread_t tid;
 pthread_create(&tid, 0, fn, 0);
 pthread_join(tid, 0);
 return 0;
}

Analyze the coredump with GDB. Here is what you'll see:

sudo gdb -q -ex 'set solib-absolute-prefix ../root_fs' -ex 'file ../root_fs/var/tmp/mt-abort' -ex 'core ../root_fs/var/tmp/core.762'
Reading symbols from /usr/local/google/root_fs/var/tmp/mt-abort...done.
[New Thread 763]
[New Thread 762]
Core was generated by `./mt-abort'.
Program terminated with signal 6, Aborted.
#0  0x0000000040255250 in raise () from ../root_fs/lib64/libc.so.6
(gdb) info thread
  2 Thread 762  0x0000000000000000 in ?? ()
* 1 Thread 763  0x0000000040255250 in raise () from ../root_fs/lib64/libc.so.6

Note that thread#2 looks funny.

(gdb) thread 2
[Switching to thread 2 (Thread 762)]#0  0x0000000000000000 in ?? ()
(gdb) info reg
rax            0x0      0
rbx            0x0      0
rcx            0x0      0
rdx            0x0      0
rsi            0x0      0
rdi            0x0      0
rbp            0x0      0x0
rsp            0x0      0x0
r8             0x0      0
r9             0x0      0
r10            0x0      0
r11            0x0      0
r12            0x0      0
r13            0x0      0
r14            0x0      0
r15            0x0      0
rip            0x0      0
eflags         0x0      [ ]
cs             0x0      0
ss             0x0      0
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0

Examining the core shows that NT_PRSTATUS notes for all threads other than
the one that crashed are zeroed out.

I believe this is happening because neither ELF_CORE_COPY_TASK_REGS nor
task_pt_regs are defined under ARCH=um, and so elf_core_copy_task_regs()
becomes a no-op.

Attached patch fixes this for SUBARCH={x86_64,i386}.

Signed-off-by: Paul Pluzhnikov <ppluzhnikov@google.com>
Cc: Jeff Dike <jdike@addtoit.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/sys-i386/asm/elf.h b/arch/um/sys-i386/asm/elf.h
index a979a22..d964a41 100644
--- a/arch/um/sys-i386/asm/elf.h
+++ b/arch/um/sys-i386/asm/elf.h
@@ -75,6 +75,8 @@ typedef struct user_i387_struct elf_fpregset_t;
 	pr_reg[16] = PT_REGS_SS(regs);		\
 } while (0);
 
+#define task_pt_regs(t) (&(t)->thread.regs)
+
 struct task_struct;
 
 extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
diff --git a/arch/um/sys-x86_64/asm/elf.h b/arch/um/sys-x86_64/asm/elf.h
index d760967f..d6d5af3 100644
--- a/arch/um/sys-x86_64/asm/elf.h
+++ b/arch/um/sys-x86_64/asm/elf.h
@@ -95,6 +95,8 @@ typedef struct user_i387_struct elf_fpregset_t;
 	(pr_reg)[25] = 0;					\
 	(pr_reg)[26] = 0;
 
+#define task_pt_regs(t) (&(t)->thread.regs)
+
 struct task_struct;
 
 extern int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
-- 
cgit v0.10.2


From 8a06dc4d52458e4a909b652ee9fe8f82d2cd87a2 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 22 Mar 2011 16:33:48 -0700
Subject: um: remove file pointer from ioctl

Commit 6caa76b ("tty: now phase out the ioctl file pointer for good")
removed the ioctl file pointer.  User Mode Linux's line driver uses this
ioctl and needs a signature update too.

Signed-off-by: Richard Weinberger <richard@nod.at>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <greg@kroah.com>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 050e4dd..35dd0b8 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -255,8 +255,8 @@ static const struct {
 	{ KDSIGACCEPT, KERN_INFO,  "KDSIGACCEPT" },
 };
 
-int line_ioctl(struct tty_struct *tty, struct file * file,
-	       unsigned int cmd, unsigned long arg)
+int line_ioctl(struct tty_struct *tty, unsigned int cmd,
+				unsigned long arg)
 {
 	int ret;
 	int i;
diff --git a/arch/um/include/shared/line.h b/arch/um/include/shared/line.h
index 311a0d3..72f4f25 100644
--- a/arch/um/include/shared/line.h
+++ b/arch/um/include/shared/line.h
@@ -77,8 +77,8 @@ extern int line_chars_in_buffer(struct tty_struct *tty);
 extern void line_flush_buffer(struct tty_struct *tty);
 extern void line_flush_chars(struct tty_struct *tty);
 extern int line_write_room(struct tty_struct *tty);
-extern int line_ioctl(struct tty_struct *tty, struct file * file,
-		      unsigned int cmd, unsigned long arg);
+extern int line_ioctl(struct tty_struct *tty, unsigned int cmd,
+				unsigned long arg);
 extern void line_throttle(struct tty_struct *tty);
 extern void line_unthrottle(struct tty_struct *tty);
 
-- 
cgit v0.10.2


From 3e50594e8e72932ad4cfcb0b3cbdf58fc3bce416 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 22 Mar 2011 16:33:50 -0700
Subject: add the common dma_addr_t typedef to include/linux/types.h

All architectures can use the common dma_addr_t typedef now. We can
remove the arch specific dma_addr_t.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Matt Turner <mattst88@gmail.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index bd621ec..e46e503 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -27,7 +27,6 @@ typedef unsigned int umode_t;
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-typedef u64 dma_addr_t;
 typedef u64 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h
index 345df01..c684e37 100644
--- a/arch/arm/include/asm/types.h
+++ b/arch/arm/include/asm/types.h
@@ -18,9 +18,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
 typedef u32 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/avr32/include/asm/types.h b/arch/avr32/include/asm/types.h
index 9cefda6..72667a3 100644
--- a/arch/avr32/include/asm/types.h
+++ b/arch/avr32/include/asm/types.h
@@ -23,14 +23,6 @@ typedef unsigned short umode_t;
 
 #define BITS_PER_LONG 32
 
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* __KERNEL__ */
 
 
diff --git a/arch/cris/include/asm/types.h b/arch/cris/include/asm/types.h
index 5790262..4405508 100644
--- a/arch/cris/include/asm/types.h
+++ b/arch/cris/include/asm/types.h
@@ -18,9 +18,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* Dma addresses are 32-bits wide, just like our other addresses.  */
- 
-typedef u32 dma_addr_t;
 typedef u32 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/frv/include/asm/types.h b/arch/frv/include/asm/types.h
index 613bf1e..aa3e7fd 100644
--- a/arch/frv/include/asm/types.h
+++ b/arch/frv/include/asm/types.h
@@ -27,14 +27,6 @@ typedef unsigned short umode_t;
 
 #define BITS_PER_LONG 32
 
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_TYPES_H */
diff --git a/arch/h8300/include/asm/types.h b/arch/h8300/include/asm/types.h
index 1287519..bb2c91a 100644
--- a/arch/h8300/include/asm/types.h
+++ b/arch/h8300/include/asm/types.h
@@ -22,10 +22,6 @@ typedef unsigned short umode_t;
 
 #define BITS_PER_LONG 32
 
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h
index 93773fd..82b3939 100644
--- a/arch/ia64/include/asm/types.h
+++ b/arch/ia64/include/asm/types.h
@@ -40,9 +40,6 @@ struct fnptr {
 	unsigned long gp;
 };
 
-/* DMA addresses are 64-bits wide, in general.  */
-typedef u64 dma_addr_t;
-
 # endif /* __KERNEL__ */
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/m32r/include/asm/types.h b/arch/m32r/include/asm/types.h
index bc9f7ff..fd84b48 100644
--- a/arch/m32r/include/asm/types.h
+++ b/arch/m32r/include/asm/types.h
@@ -18,9 +18,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* DMA addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
 typedef u64 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/include/asm/types.h b/arch/m68k/include/asm/types.h
index 6441cb5..10ad92f 100644
--- a/arch/m68k/include/asm/types.h
+++ b/arch/m68k/include/asm/types.h
@@ -25,9 +25,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* DMA addresses are always 32-bits wide */
-
-typedef u32 dma_addr_t;
 typedef u32 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/mips/include/asm/types.h b/arch/mips/include/asm/types.h
index 544a285..9520dc8 100644
--- a/arch/mips/include/asm/types.h
+++ b/arch/mips/include/asm/types.h
@@ -33,12 +33,6 @@ typedef unsigned short umode_t;
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-#if (defined(CONFIG_HIGHMEM) && defined(CONFIG_64BIT_PHYS_ADDR)) \
-    || defined(CONFIG_64BIT)
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
 typedef u64 dma64_addr_t;
 
 /*
diff --git a/arch/mn10300/include/asm/types.h b/arch/mn10300/include/asm/types.h
index 7b9f010..c1833eb 100644
--- a/arch/mn10300/include/asm/types.h
+++ b/arch/mn10300/include/asm/types.h
@@ -26,13 +26,6 @@ typedef unsigned short umode_t;
 
 #define BITS_PER_LONG 32
 
-#ifndef __ASSEMBLY__
-
-/* Dma addresses are 32-bits wide.  */
-typedef u32 dma_addr_t;
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_TYPES_H */
diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h
index 20135cc..bc164dd 100644
--- a/arch/parisc/include/asm/types.h
+++ b/arch/parisc/include/asm/types.h
@@ -16,9 +16,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
 typedef u64 dma64_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index a5aea0c..e16a6b2 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -44,11 +44,6 @@ typedef struct {
 
 typedef __vector128 vector128;
 
-#if defined(__powerpc64__) || defined(CONFIG_PHYS_64BIT)
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
 typedef u64 dma64_addr_t;
 
 typedef struct {
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
index 04d6b95..f7f6ae6 100644
--- a/arch/s390/include/asm/types.h
+++ b/arch/s390/include/asm/types.h
@@ -31,12 +31,6 @@ typedef __signed__ long saddr_t;
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
-#ifdef __s390x__
-/* DMA addresses come in 32-bit and 64-bit flavours. */
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
 
 #ifndef __s390x__
 typedef union {
diff --git a/arch/sparc/include/asm/types.h b/arch/sparc/include/asm/types.h
index 09c79a9..f02d330 100644
--- a/arch/sparc/include/asm/types.h
+++ b/arch/sparc/include/asm/types.h
@@ -22,10 +22,6 @@ typedef unsigned short umode_t;
 
 #ifndef __ASSEMBLY__
 
-/* Dma addresses come in generic and 64-bit flavours.  */
-
-typedef u32 dma_addr_t;
-
 #if defined(__arch64__)
 
 /*** SPARC 64 bit ***/
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h
index df1da20..8810205 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/asm/types.h
@@ -1,20 +1,12 @@
 #ifndef _ASM_X86_TYPES_H
 #define _ASM_X86_TYPES_H
 
-#define dma_addr_t	dma_addr_t
-
 #include <asm-generic/types.h>
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
 typedef u64 dma64_addr_t;
-#if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G)
-/* DMA addresses come in 32-bit and 64-bit flavours. */
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
 
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
diff --git a/arch/xtensa/include/asm/types.h b/arch/xtensa/include/asm/types.h
index c89569a..b1c981e 100644
--- a/arch/xtensa/include/asm/types.h
+++ b/arch/xtensa/include/asm/types.h
@@ -32,10 +32,6 @@ typedef unsigned short umode_t;
 
 #define BITS_PER_LONG 32
 
-/* Dma addresses are 32-bits wide.  */
-
-typedef u32 dma_addr_t;
-
 #endif	/* __KERNEL__ */
 #endif
 
diff --git a/include/asm-generic/types.h b/include/asm-generic/types.h
index fba7d33..7a0f69e 100644
--- a/include/asm-generic/types.h
+++ b/include/asm-generic/types.h
@@ -12,31 +12,4 @@ typedef unsigned short umode_t;
 
 #endif /* __ASSEMBLY__ */
 
-/*
- * These aren't exported outside the kernel to avoid name space clashes
- */
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-/*
- * DMA addresses may be very different from physical addresses
- * and pointers. i386 and powerpc may have 64 bit DMA on 32 bit
- * systems, while sparc64 uses 32 bit DMA addresses for 64 bit
- * physical addresses.
- * This default defines dma_addr_t to have the same size as
- * phys_addr_t, which is the most common way.
- * Do not define the dma64_addr_t type, which never really
- * worked.
- */
-#ifndef dma_addr_t
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif /* CONFIG_PHYS_ADDR_T_64BIT */
-#endif /* dma_addr_t */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_GENERIC_TYPES_H */
diff --git a/include/linux/types.h b/include/linux/types.h
index c2a9eb4..176da8c 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -150,6 +150,12 @@ typedef unsigned long blkcnt_t;
 #define pgoff_t unsigned long
 #endif
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif /* dma_addr_t */
+
 #endif /* __KERNEL__ */
 
 /*
-- 
cgit v0.10.2


From 80cdc6dae76ea67d2b21bdca8df17ef47251eb8b Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Tue, 22 Mar 2011 16:33:54 -0700
Subject: fs: use appropriate printk priority levels

printk()s without a priority level default to KERN_WARNING.  To reduce
noise at KERN_WARNING, this patch set the priority level appriopriately
for unleveled printks()s.  This should be useful to folks that look at
dmesg warnings closely.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/bio.c b/fs/bio.c
index 4bd454f..4cf2a52 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -111,7 +111,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
 	if (!slab)
 		goto out_unlock;
 
-	printk("bio: create slab <%s> at %d\n", bslab->name, entry);
+	printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
 	bslab->slab = slab;
 	bslab->slab_ref = 1;
 	bslab->slab_size = sz;
diff --git a/fs/namespace.c b/fs/namespace.c
index 9263995..7dba2ed 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2701,7 +2701,7 @@ void __init mnt_init(void)
 	if (!mount_hashtable)
 		panic("Failed to allocate mount hash table\n");
 
-	printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
+	printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
 
 	for (u = 0; u < HASH_SIZE; u++)
 		INIT_LIST_HEAD(&mount_hashtable[u]);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 2b54bef3..3e01121 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -293,7 +293,8 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
 
 	sys_chdir((const char __user __force *)"/root");
 	ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
-	printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
+	printk(KERN_INFO
+	       "VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
 	       current->fs->pwd.mnt->mnt_sb->s_type->name,
 	       current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
 	       " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
-- 
cgit v0.10.2


From c837fb37a60aa642b9ad423462e32b851a03020a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 22 Mar 2011 16:33:55 -0700
Subject: include/linux/compiler-gcc*.h: unify macro definitions

Unify identical gcc3.x and gcc4.x macros.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 16508bc..cb4c1eb 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -92,3 +92,11 @@
 #if !defined(__noclone)
 #define __noclone	/* not needed */
 #endif
+
+/*
+ * A trick to suppress uninitialized variable warning without generating any
+ * code
+ */
+#define uninitialized_var(x) x = x
+
+#define __always_inline		inline __attribute__((always_inline))
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index b721129..37d4124 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -21,11 +21,3 @@
 #   error "GCOV profiling support for gcc versions below 3.4 not included"
 # endif /* __GNUC_MINOR__ */
 #endif /* CONFIG_GCOV_KERNEL */
-
-/*
- * A trick to suppress uninitialized variable warning without generating any
- * code
- */
-#define uninitialized_var(x) x = x
-
-#define __always_inline		inline __attribute__((always_inline))
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index fcfa5b9..64b7c00 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -12,13 +12,6 @@
 #define __used			__attribute__((__used__))
 #define __must_check 		__attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
-#define __always_inline		inline __attribute__((always_inline))
-
-/*
- * A trick to suppress uninitialized variable warning without generating any
- * code
- */
-#define uninitialized_var(x) x = x
 
 #if __GNUC_MINOR__ >= 3
 /* Mark functions as cold. gcc will assume any path leading to a call
@@ -53,7 +46,6 @@
 #define __noclone	__attribute__((__noclone__))
 
 #endif
-
 #endif
 
 #if __GNUC_MINOR__ > 0
-- 
cgit v0.10.2


From f3ccfcdaf3986f8c541886606e950de52cab7ad3 Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Tue, 22 Mar 2011 16:33:56 -0700
Subject: fs.h: remove 8 bytes of padding from block_device on 64bit builds

Re-ordering struct block_inode to remove 8 bytes of padding on 64 bit
builds, which also shrinks bdev_inode by 8 bytes (776 -> 768) allowing it
to fit into one fewer cache lines.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7061a85..12529e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -662,9 +662,9 @@ struct address_space {
 
 struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
+	int			bd_openers;
 	struct inode *		bd_inode;	/* will die */
 	struct super_block *	bd_super;
-	int			bd_openers;
 	struct mutex		bd_mutex;	/* open/close mutex */
 	struct list_head	bd_inodes;
 	void *			bd_claiming;
-- 
cgit v0.10.2


From 4a7de634f0316080cae5dd4a994112049a7d8462 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti@ti.com>
Date: Tue, 22 Mar 2011 16:33:57 -0700
Subject: bh1780gli: convert to dev pm ops

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c
index d5f3a3f..d07cd67 100644
--- a/drivers/misc/bh1780gli.c
+++ b/drivers/misc/bh1780gli.c
@@ -196,10 +196,11 @@ static int __devexit bh1780_remove(struct i2c_client *client)
 }
 
 #ifdef CONFIG_PM
-static int bh1780_suspend(struct i2c_client *client, pm_message_t mesg)
+static int bh1780_suspend(struct device *dev)
 {
 	struct bh1780_data *ddata;
 	int state, ret;
+	struct i2c_client *client = to_i2c_client(dev);
 
 	ddata = i2c_get_clientdata(client);
 	state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
@@ -217,14 +218,14 @@ static int bh1780_suspend(struct i2c_client *client, pm_message_t mesg)
 	return 0;
 }
 
-static int bh1780_resume(struct i2c_client *client)
+static int bh1780_resume(struct device *dev)
 {
 	struct bh1780_data *ddata;
 	int state, ret;
+	struct i2c_client *client = to_i2c_client(dev);
 
 	ddata = i2c_get_clientdata(client);
 	state = ddata->power_state;
-
 	ret = bh1780_write(ddata, BH1780_REG_CONTROL, state,
 				"CONTROL");
 
@@ -233,9 +234,10 @@ static int bh1780_resume(struct i2c_client *client)
 
 	return 0;
 }
+static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume);
+#define BH1780_PMOPS (&bh1780_pm)
 #else
-#define bh1780_suspend NULL
-#define bh1780_resume NULL
+#define BH1780_PMOPS NULL
 #endif /* CONFIG_PM */
 
 static const struct i2c_device_id bh1780_id[] = {
@@ -247,11 +249,10 @@ static struct i2c_driver bh1780_driver = {
 	.probe		= bh1780_probe,
 	.remove		= bh1780_remove,
 	.id_table	= bh1780_id,
-	.suspend	= bh1780_suspend,
-	.resume		= bh1780_resume,
 	.driver = {
-		.name = "bh1780"
-	},
+		.name = "bh1780",
+		.pm	= BH1780_PMOPS,
+},
 };
 
 static int __init bh1780_init(void)
-- 
cgit v0.10.2


From 45bff2ea4285cac5b4438c65216e114d3619f123 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti@ti.com>
Date: Tue, 22 Mar 2011 16:33:57 -0700
Subject: drivers/misc/bmp085.c: free initmem memory

Free the memory that is used only at init

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index b6e1c9a..ecd276a 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -402,7 +402,7 @@ exit:
 	return status;
 }
 
-static int bmp085_probe(struct i2c_client *client,
+static int __devinit bmp085_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct bmp085_data *data;
@@ -438,7 +438,7 @@ exit:
 	return err;
 }
 
-static int bmp085_remove(struct i2c_client *client)
+static int __devexit bmp085_remove(struct i2c_client *client)
 {
 	sysfs_remove_group(&client->dev.kobj, &bmp085_attr_group);
 	kfree(i2c_get_clientdata(client));
@@ -458,7 +458,7 @@ static struct i2c_driver bmp085_driver = {
 	},
 	.id_table	= bmp085_id,
 	.probe		= bmp085_probe,
-	.remove		= bmp085_remove,
+	.remove		= __devexit_p(bmp085_remove),
 
 	.detect		= bmp085_detect,
 	.address_list	= normal_i2c
-- 
cgit v0.10.2


From b9500546d5f85b56359657e1f78334db8de851fd Mon Sep 17 00:00:00 2001
From: Pratyush Anand <pratyush.anand@st.com>
Date: Tue, 22 Mar 2011 16:33:58 -0700
Subject: ST SPEAr: PCIE gadget suppport

This is a configurable gadget.  can be configured by configfs interface.
Any IP available at PCIE bus can be programmed to be used by host
controller.It supoorts both INTX and MSI.

By default, the gadget is configured for INTX and SYSRAM1 is mapped to
BAR0 with size 0x1000

Signed-off-by: Pratyush Anand <pratyush.anand@st.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Viresh Kumar <viresh.kumar@st.com>
Cc: Shiraz Hashim <shiraz.hashim@st.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/ABI/testing/configfs-spear-pcie-gadget b/Documentation/ABI/testing/configfs-spear-pcie-gadget
new file mode 100644
index 0000000..8759881
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-spear-pcie-gadget
@@ -0,0 +1,31 @@
+What:		/config/pcie-gadget
+Date:		Feb 2011
+KernelVersion:	2.6.37
+Contact:	Pratyush Anand <pratyush.anand@st.com>
+Description:
+
+	Interface is used to configure selected dual mode PCIe controller
+	as device and then program its various registers to configure it
+	as a particular device type.
+	This interfaces can be used to show spear's PCIe device capability.
+
+	Nodes are only visible when configfs is mounted. To mount configfs
+	in /config directory use:
+	# mount -t configfs none /config/
+
+	For nth PCIe Device Controller
+	/config/pcie-gadget.n/
+		link ... used to enable ltssm and read its status.
+		int_type ...used to configure and read type of supported
+			interrupt
+		no_of_msi ... used to configure number of MSI vector needed and
+			to read no of MSI granted.
+		inta ... write 1 to assert INTA and 0 to de-assert.
+		send_msi ... write MSI vector to be sent.
+		vendor_id ... used to write and read vendor id (hex)
+		device_id ... used to write and read device id (hex)
+		bar0_size ... used to write and read bar0_size
+		bar0_address ... used to write and read bar0 mapped area in hex.
+		bar0_rw_offset ... used to write and read offset of bar0 where
+			bar0_data will be written or read.
+		bar0_data ... used to write and read data at bar0_rw_offset.
diff --git a/Documentation/misc-devices/spear-pcie-gadget.txt b/Documentation/misc-devices/spear-pcie-gadget.txt
new file mode 100644
index 0000000..02c13ef
--- /dev/null
+++ b/Documentation/misc-devices/spear-pcie-gadget.txt
@@ -0,0 +1,130 @@
+Spear PCIe Gadget Driver:
+
+Author
+=============
+Pratyush Anand (pratyush.anand@st.com)
+
+Location
+============
+driver/misc/spear13xx_pcie_gadget.c
+
+Supported Chip:
+===================
+SPEAr1300
+SPEAr1310
+
+Menuconfig option:
+==========================
+Device Drivers
+	Misc devices
+		PCIe gadget support for SPEAr13XX platform
+purpose
+===========
+This driver has several nodes which can be read/written by configfs interface.
+Its main purpose is to configure selected dual mode PCIe controller as device
+and then program its various registers to configure it as a particular device
+type. This driver can be used to show spear's PCIe device capability.
+
+Description of different nodes:
+=================================
+
+read behavior of nodes:
+------------------------------
+link 		:gives ltssm status.
+int_type 	:type of supported interrupt
+no_of_msi 	:zero if MSI is not enabled by host. A positive value is the
+		number of MSI vector granted.
+vendor_id	:returns programmed vendor id (hex)
+device_id	:returns programmed device id(hex)
+bar0_size:	:returns size of bar0 in hex.
+bar0_address	:returns address of bar0 mapped area in hex.
+bar0_rw_offset	:returns offset of bar0 for which bar0_data will return value.
+bar0_data	:returns data at bar0_rw_offset.
+
+write behavior of nodes:
+------------------------------
+link 		:write UP to enable ltsmm DOWN to disable
+int_type	:write interrupt type to be configured and (int_type could be
+		INTA, MSI or NO_INT). Select MSI only when you have programmed
+		no_of_msi node.
+no_of_msi	:number of MSI vector needed.
+inta		:write 1 to assert INTA and 0 to de-assert.
+send_msi	:write MSI vector to be sent.
+vendor_id	:write vendor id(hex) to be programmed.
+device_id	:write device id(hex) to be programmed.
+bar0_size	:write size of bar0 in hex. default bar0 size is 1000 (hex)
+		bytes.
+bar0_address	:write	address of bar0 mapped area in hex. (default mapping of
+		bar0 is SYSRAM1(E0800000). Always program bar size before bar
+		address. Kernel might modify bar size and address for alignment, so
+		read back bar size and address after writing to cross check.
+bar0_rw_offset	:write offset of bar0 for which	bar0_data will write value.
+bar0_data	:write data to be written at bar0_rw_offset.
+
+Node programming example
+===========================
+Program all PCIe registers in such a way that when this device is connected
+to the PCIe host, then host sees this device as 1MB RAM.
+#mount -t configfs none /Config
+For nth PCIe Device Controller
+# cd /config/pcie_gadget.n/
+Now you have all the nodes in this directory.
+program vendor id as 0x104a
+# echo 104A >> vendor_id
+
+program device id as 0xCD80
+# echo CD80 >> device_id
+
+program BAR0 size as 1MB
+# echo 100000 >> bar0_size
+
+check for programmed bar0 size
+# cat bar0_size
+
+Program BAR0 Address as DDR (0x2100000). This is the physical address of
+memory, which is to be made visible to PCIe host. Similarly any other peripheral
+can also be made visible to PCIe host. E.g., if you program base address of UART
+as BAR0 address then when this device will be connected to a host, it will be
+visible as UART.
+# echo 2100000 >> bar0_address
+
+program interrupt type : INTA
+# echo INTA >> int_type
+
+go for link up now.
+# echo UP >> link
+
+It will have to be insured that, once link up is done on gadget, then only host
+is initialized and start to search PCIe devices on its port.
+
+/*wait till link is up*/
+# cat link
+wait till it returns UP.
+
+To assert INTA
+# echo 1 >> inta
+
+To de-assert INTA
+# echo 0 >> inta
+
+if MSI is to be used as interrupt, program no of msi vector needed (say4)
+# echo 4 >> no_of_msi
+
+select MSI as interrupt type
+# echo MSI >> int_type
+
+go for link up now
+# echo UP >> link
+
+wait till link is up
+# cat link
+An application can repetitively read this node till link is found UP. It can
+sleep between two read.
+
+wait till msi is enabled
+# cat no_of_msi
+Should return 4 (number of requested MSI vector)
+
+to send msi vector 2
+# echo 2 >> send_msi
+#cd -
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 203500d..4e007c6 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -402,6 +402,16 @@ config DS1682
 	  This driver can also be built as a module.  If so, the module
 	  will be called ds1682.
 
+config SPEAR13XX_PCIE_GADGET
+	bool "PCIe gadget support for SPEAr13XX platform"
+	depends on ARCH_SPEAR13XX
+	default n
+	help
+	 This option enables gadget support for PCIe controller. If
+	 board file defines any controller as PCIe endpoint then a sysfs
+	 entry will be created for that controller. User can use these
+	 sysfs node to configure PCIe EP as per his requirements.
+
 config TI_DAC7512
 	tristate "Texas Instruments DAC7512"
 	depends on SPI && SYSFS
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 804f421..f546860 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_IWMC3200TOP)      += iwmc3200top/
 obj-$(CONFIG_HMC6352)		+= hmc6352.o
 obj-y				+= eeprom/
 obj-y				+= cb710/
+obj-$(CONFIG_SPEAR13XX_PCIE_GADGET)	+= spear13xx_pcie_gadget.o
 obj-$(CONFIG_VMWARE_BALLOON)	+= vmw_balloon.o
 obj-$(CONFIG_ARM_CHARLCD)	+= arm-charlcd.o
 obj-$(CONFIG_PCH_PHUB)		+= pch_phub.o
diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c
new file mode 100644
index 0000000..ec3b8c9
--- /dev/null
+++ b/drivers/misc/spear13xx_pcie_gadget.c
@@ -0,0 +1,908 @@
+/*
+ * drivers/misc/spear13xx_pcie_gadget.c
+ *
+ * Copyright (C) 2010 ST Microelectronics
+ * Pratyush Anand<pratyush.anand@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pci_regs.h>
+#include <linux/configfs.h>
+#include <mach/pcie.h>
+#include <mach/misc_regs.h>
+
+#define IN0_MEM_SIZE	(200 * 1024 * 1024 - 1)
+/* In current implementation address translation is done using IN0 only.
+ * So IN1 start address and IN0 end address has been kept same
+*/
+#define IN1_MEM_SIZE	(0 * 1024 * 1024 - 1)
+#define IN_IO_SIZE	(20 * 1024 * 1024 - 1)
+#define IN_CFG0_SIZE	(12 * 1024 * 1024 - 1)
+#define IN_CFG1_SIZE	(12 * 1024 * 1024 - 1)
+#define IN_MSG_SIZE	(12 * 1024 * 1024 - 1)
+/* Keep default BAR size as 4K*/
+/* AORAM would be mapped by default*/
+#define INBOUND_ADDR_MASK	(SPEAR13XX_SYSRAM1_SIZE - 1)
+
+#define INT_TYPE_NO_INT	0
+#define INT_TYPE_INTX	1
+#define INT_TYPE_MSI	2
+struct spear_pcie_gadget_config {
+	void __iomem *base;
+	void __iomem *va_app_base;
+	void __iomem *va_dbi_base;
+	char int_type[10];
+	ulong requested_msi;
+	ulong configured_msi;
+	ulong bar0_size;
+	ulong bar0_rw_offset;
+	void __iomem *va_bar0_address;
+};
+
+struct pcie_gadget_target {
+	struct configfs_subsystem subsys;
+	struct spear_pcie_gadget_config config;
+};
+
+struct pcie_gadget_target_attr {
+	struct configfs_attribute	attr;
+	ssize_t		(*show)(struct spear_pcie_gadget_config *config,
+						char *buf);
+	ssize_t		(*store)(struct spear_pcie_gadget_config *config,
+						 const char *buf,
+						 size_t count);
+};
+
+static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+	/* Enable DBI access */
+	writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_armisc);
+	writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_awmisc);
+
+}
+
+static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg)
+{
+	/* disable DBI access */
+	writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_armisc);
+	writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID),
+			&app_reg->slv_awmisc);
+
+}
+
+static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config,
+		int where, int size, u32 *val)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong va_address;
+
+	/* Enable DBI access */
+	enable_dbi_access(app_reg);
+
+	va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+	*val = readl(va_address);
+
+	if (size == 1)
+		*val = (*val >> (8 * (where & 3))) & 0xff;
+	else if (size == 2)
+		*val = (*val >> (8 * (where & 3))) & 0xffff;
+
+	/* Disable DBI access */
+	disable_dbi_access(app_reg);
+}
+
+static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config,
+		int where, int size, u32 val)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong va_address;
+
+	/* Enable DBI access */
+	enable_dbi_access(app_reg);
+
+	va_address = (ulong)config->va_dbi_base + (where & ~0x3);
+
+	if (size == 4)
+		writel(val, va_address);
+	else if (size == 2)
+		writew(val, va_address + (where & 2));
+	else if (size == 1)
+		writeb(val, va_address + (where & 3));
+
+	/* Disable DBI access */
+	disable_dbi_access(app_reg);
+}
+
+#define PCI_FIND_CAP_TTL	48
+
+static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config,
+		u32 pos, int cap, int *ttl)
+{
+	u32 id;
+
+	while ((*ttl)--) {
+		spear_dbi_read_reg(config, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+	return 0;
+}
+
+static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config,
+			u32 pos, int cap)
+{
+	int ttl = PCI_FIND_CAP_TTL;
+
+	return pci_find_own_next_cap_ttl(config, pos, cap, &ttl);
+}
+
+static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config,
+				u8 hdr_type)
+{
+	u32 status;
+
+	spear_dbi_read_reg(config, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	switch (hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+	case PCI_HEADER_TYPE_BRIDGE:
+		return PCI_CAPABILITY_LIST;
+	case PCI_HEADER_TYPE_CARDBUS:
+		return PCI_CB_CAPABILITY_LIST;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+/*
+ * Tell if a device supports a given PCI capability.
+ * Returns the address of the requested capability structure within the
+ * device's PCI configuration space or 0 in case the device does not
+ * support it. Possible values for @cap:
+ *
+ * %PCI_CAP_ID_PM	Power Management
+ * %PCI_CAP_ID_AGP	Accelerated Graphics Port
+ * %PCI_CAP_ID_VPD	Vital Product Data
+ * %PCI_CAP_ID_SLOTID	Slot Identification
+ * %PCI_CAP_ID_MSI	Message Signalled Interrupts
+ * %PCI_CAP_ID_CHSWP	CompactPCI HotSwap
+ * %PCI_CAP_ID_PCIX	PCI-X
+ * %PCI_CAP_ID_EXP	PCI Express
+ */
+static int pci_find_own_capability(struct spear_pcie_gadget_config *config,
+		int cap)
+{
+	u32 pos;
+	u32 hdr_type;
+
+	spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type);
+
+	pos = pci_find_own_cap_start(config, hdr_type);
+	if (pos)
+		pos = pci_find_own_next_cap(config, pos, cap);
+
+	return pos;
+}
+
+static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id)
+{
+	return 0;
+}
+
+/*
+ * configfs interfaces show/store functions
+ */
+static ssize_t pcie_gadget_show_link(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+	if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID))
+		return sprintf(buf, "UP");
+	else
+		return sprintf(buf, "DOWN");
+}
+
+static ssize_t pcie_gadget_store_link(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+	if (sysfs_streq(buf, "UP"))
+		writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID),
+			&app_reg->app_ctrl_0);
+	else if (sysfs_streq(buf, "DOWN"))
+		writel(readl(&app_reg->app_ctrl_0)
+				& ~(1 << APP_LTSSM_ENABLE_ID),
+				&app_reg->app_ctrl_0);
+	else
+		return -EINVAL;
+	return count;
+}
+
+static ssize_t pcie_gadget_show_int_type(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	return sprintf(buf, "%s", config->int_type);
+}
+
+static ssize_t pcie_gadget_store_int_type(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	u32 cap, vec, flags;
+	ulong vector;
+
+	if (sysfs_streq(buf, "INTA"))
+		spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+
+	else if (sysfs_streq(buf, "MSI")) {
+		vector = config->requested_msi;
+		vec = 0;
+		while (vector > 1) {
+			vector /= 2;
+			vec++;
+		}
+		spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0);
+		cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+		spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+		flags &= ~PCI_MSI_FLAGS_QMASK;
+		flags |= vec << 1;
+		spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags);
+	} else
+		return -EINVAL;
+
+	strcpy(config->int_type, buf);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_no_of_msi(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	u32 cap, vec, flags;
+	ulong vector;
+
+	if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID))
+			!= (1 << CFG_MSI_EN_ID))
+		vector = 0;
+	else {
+		cap = pci_find_own_capability(config, PCI_CAP_ID_MSI);
+		spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags);
+		flags &= ~PCI_MSI_FLAGS_QSIZE;
+		vec = flags >> 4;
+		vector = 1;
+		while (vec--)
+			vector *= 2;
+	}
+	config->configured_msi = vector;
+
+	return sprintf(buf, "%lu", vector);
+}
+
+static ssize_t pcie_gadget_store_no_of_msi(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	if (strict_strtoul(buf, 0, &config->requested_msi))
+		return -EINVAL;
+	if (config->requested_msi > 32)
+		config->requested_msi = 32;
+
+	return count;
+}
+
+static ssize_t pcie_gadget_store_inta(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong en;
+
+	if (strict_strtoul(buf, 0, &en))
+		return -EINVAL;
+
+	if (en)
+		writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID),
+				&app_reg->app_ctrl_0);
+	else
+		writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID),
+				&app_reg->app_ctrl_0);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_store_send_msi(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong vector;
+	u32 ven_msi;
+
+	if (strict_strtoul(buf, 0, &vector))
+		return -EINVAL;
+
+	if (!config->configured_msi)
+		return -EINVAL;
+
+	if (vector >= config->configured_msi)
+		return -EINVAL;
+
+	ven_msi = readl(&app_reg->ven_msi_1);
+	ven_msi &= ~VEN_MSI_FUN_NUM_MASK;
+	ven_msi |= 0 << VEN_MSI_FUN_NUM_ID;
+	ven_msi &= ~VEN_MSI_TC_MASK;
+	ven_msi |= 0 << VEN_MSI_TC_ID;
+	ven_msi &= ~VEN_MSI_VECTOR_MASK;
+	ven_msi |= vector << VEN_MSI_VECTOR_ID;
+
+	/* generating interrupt for msi vector */
+	ven_msi |= VEN_MSI_REQ_EN;
+	writel(ven_msi, &app_reg->ven_msi_1);
+	udelay(1);
+	ven_msi &= ~VEN_MSI_REQ_EN;
+	writel(ven_msi, &app_reg->ven_msi_1);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_vendor_id(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	u32 id;
+
+	spear_dbi_read_reg(config, PCI_VENDOR_ID, 2, &id);
+
+	return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_store_vendor_id(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	ulong id;
+
+	if (strict_strtoul(buf, 0, &id))
+		return -EINVAL;
+
+	spear_dbi_write_reg(config, PCI_VENDOR_ID, 2, id);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_device_id(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	u32 id;
+
+	spear_dbi_read_reg(config, PCI_DEVICE_ID, 2, &id);
+
+	return sprintf(buf, "%x", id);
+}
+
+static ssize_t pcie_gadget_store_device_id(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	ulong id;
+
+	if (strict_strtoul(buf, 0, &id))
+		return -EINVAL;
+
+	spear_dbi_write_reg(config, PCI_DEVICE_ID, 2, id);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_bar0_size(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	return sprintf(buf, "%lx", config->bar0_size);
+}
+
+static ssize_t pcie_gadget_store_bar0_size(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	ulong size;
+	u32 pos, pos1;
+	u32 no_of_bit = 0;
+
+	if (strict_strtoul(buf, 0, &size))
+		return -EINVAL;
+	/* min bar size is 256 */
+	if (size <= 0x100)
+		size = 0x100;
+	/* max bar size is 1MB*/
+	else if (size >= 0x100000)
+		size = 0x100000;
+	else {
+		pos = 0;
+		pos1 = 0;
+		while (pos < 21) {
+			pos = find_next_bit((ulong *)&size, 21, pos);
+			if (pos != 21)
+				pos1 = pos + 1;
+			pos++;
+			no_of_bit++;
+		}
+		if (no_of_bit == 2)
+			pos1--;
+
+		size = 1 << pos1;
+	}
+	config->bar0_size = size;
+	spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_bar0_address(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+	u32 address = readl(&app_reg->pim0_mem_addr_start);
+
+	return sprintf(buf, "%x", address);
+}
+
+static ssize_t pcie_gadget_store_bar0_address(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+	ulong address;
+
+	if (strict_strtoul(buf, 0, &address))
+		return -EINVAL;
+
+	address &= ~(config->bar0_size - 1);
+	if (config->va_bar0_address)
+		iounmap(config->va_bar0_address);
+	config->va_bar0_address = ioremap(address, config->bar0_size);
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	writel(address, &app_reg->pim0_mem_addr_start);
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_bar0_rw_offset(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	return sprintf(buf, "%lx", config->bar0_rw_offset);
+}
+
+static ssize_t pcie_gadget_store_bar0_rw_offset(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	ulong offset;
+
+	if (strict_strtoul(buf, 0, &offset))
+		return -EINVAL;
+
+	if (offset % 4)
+		return -EINVAL;
+
+	config->bar0_rw_offset = offset;
+
+	return count;
+}
+
+static ssize_t pcie_gadget_show_bar0_data(
+		struct spear_pcie_gadget_config *config,
+		char *buf)
+{
+	ulong data;
+
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+	return sprintf(buf, "%lx", data);
+}
+
+static ssize_t pcie_gadget_store_bar0_data(
+		struct spear_pcie_gadget_config *config,
+		const char *buf, size_t count)
+{
+	ulong data;
+
+	if (strict_strtoul(buf, 0, &data))
+		return -EINVAL;
+
+	if (!config->va_bar0_address)
+		return -ENOMEM;
+
+	writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset);
+
+	return count;
+}
+
+/*
+ * Attribute definitions.
+ */
+
+#define PCIE_GADGET_TARGET_ATTR_RO(_name)				\
+static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
+	__CONFIGFS_ATTR(_name, S_IRUGO, pcie_gadget_show_##_name, NULL)
+
+#define PCIE_GADGET_TARGET_ATTR_WO(_name)				\
+static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
+	__CONFIGFS_ATTR(_name, S_IWUSR, NULL, pcie_gadget_store_##_name)
+
+#define PCIE_GADGET_TARGET_ATTR_RW(_name)				\
+static struct pcie_gadget_target_attr pcie_gadget_target_##_name =	\
+	__CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, pcie_gadget_show_##_name, \
+			pcie_gadget_store_##_name)
+PCIE_GADGET_TARGET_ATTR_RW(link);
+PCIE_GADGET_TARGET_ATTR_RW(int_type);
+PCIE_GADGET_TARGET_ATTR_RW(no_of_msi);
+PCIE_GADGET_TARGET_ATTR_WO(inta);
+PCIE_GADGET_TARGET_ATTR_WO(send_msi);
+PCIE_GADGET_TARGET_ATTR_RW(vendor_id);
+PCIE_GADGET_TARGET_ATTR_RW(device_id);
+PCIE_GADGET_TARGET_ATTR_RW(bar0_size);
+PCIE_GADGET_TARGET_ATTR_RW(bar0_address);
+PCIE_GADGET_TARGET_ATTR_RW(bar0_rw_offset);
+PCIE_GADGET_TARGET_ATTR_RW(bar0_data);
+
+static struct configfs_attribute *pcie_gadget_target_attrs[] = {
+	&pcie_gadget_target_link.attr,
+	&pcie_gadget_target_int_type.attr,
+	&pcie_gadget_target_no_of_msi.attr,
+	&pcie_gadget_target_inta.attr,
+	&pcie_gadget_target_send_msi.attr,
+	&pcie_gadget_target_vendor_id.attr,
+	&pcie_gadget_target_device_id.attr,
+	&pcie_gadget_target_bar0_size.attr,
+	&pcie_gadget_target_bar0_address.attr,
+	&pcie_gadget_target_bar0_rw_offset.attr,
+	&pcie_gadget_target_bar0_data.attr,
+	NULL,
+};
+
+static struct pcie_gadget_target *to_target(struct config_item *item)
+{
+	return item ?
+		container_of(to_configfs_subsystem(to_config_group(item)),
+				struct pcie_gadget_target, subsys) : NULL;
+}
+
+/*
+ * Item operations and type for pcie_gadget_target.
+ */
+
+static ssize_t pcie_gadget_target_attr_show(struct config_item *item,
+					   struct configfs_attribute *attr,
+					   char *buf)
+{
+	ssize_t ret = -EINVAL;
+	struct pcie_gadget_target *target = to_target(item);
+	struct pcie_gadget_target_attr *t_attr =
+		container_of(attr, struct pcie_gadget_target_attr, attr);
+
+	if (t_attr->show)
+		ret = t_attr->show(&target->config, buf);
+	return ret;
+}
+
+static ssize_t pcie_gadget_target_attr_store(struct config_item *item,
+					struct configfs_attribute *attr,
+					const char *buf,
+					size_t count)
+{
+	ssize_t ret = -EINVAL;
+	struct pcie_gadget_target *target = to_target(item);
+	struct pcie_gadget_target_attr *t_attr =
+		container_of(attr, struct pcie_gadget_target_attr, attr);
+
+	if (t_attr->store)
+		ret = t_attr->store(&target->config, buf, count);
+	return ret;
+}
+
+static struct configfs_item_operations pcie_gadget_target_item_ops = {
+	.show_attribute		= pcie_gadget_target_attr_show,
+	.store_attribute	= pcie_gadget_target_attr_store,
+};
+
+static struct config_item_type pcie_gadget_target_type = {
+	.ct_attrs		= pcie_gadget_target_attrs,
+	.ct_item_ops		= &pcie_gadget_target_item_ops,
+	.ct_owner		= THIS_MODULE,
+};
+
+static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config)
+{
+	struct pcie_app_reg __iomem *app_reg = config->va_app_base;
+
+	/*setup registers for outbound translation */
+
+	writel(config->base, &app_reg->in0_mem_addr_start);
+	writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE,
+			&app_reg->in0_mem_addr_limit);
+	writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start);
+	writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE,
+			&app_reg->in1_mem_addr_limit);
+	writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start);
+	writel(app_reg->in_io_addr_start + IN_IO_SIZE,
+			&app_reg->in_io_addr_limit);
+	writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start);
+	writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE,
+			&app_reg->in_cfg0_addr_limit);
+	writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start);
+	writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE,
+			&app_reg->in_cfg1_addr_limit);
+	writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start);
+	writel(app_reg->in_msg_addr_start + IN_MSG_SIZE,
+			&app_reg->in_msg_addr_limit);
+
+	writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start);
+	writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start);
+	writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start);
+
+	/*setup registers for inbound translation */
+
+	/* Keep AORAM mapped at BAR0 as default */
+	config->bar0_size = INBOUND_ADDR_MASK + 1;
+	spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK);
+	spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC);
+	config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE,
+			config->bar0_size);
+
+	writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start);
+	writel(0, &app_reg->pim1_mem_addr_start);
+	writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit);
+
+	writel(0x0, &app_reg->pim_io_addr_start);
+	writel(0x0, &app_reg->pim_io_addr_start);
+	writel(0x0, &app_reg->pim_rom_addr_start);
+
+	writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID)
+			| ((u32)1 << REG_TRANSLATION_ENABLE),
+			&app_reg->app_ctrl_0);
+	/* disable all rx interrupts */
+	writel(0, &app_reg->int_mask);
+
+	/* Select INTA as default*/
+	spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1);
+}
+
+static int __devinit spear_pcie_gadget_probe(struct platform_device *pdev)
+{
+	struct resource *res0, *res1;
+	unsigned int status = 0;
+	int irq;
+	struct clk *clk;
+	static struct pcie_gadget_target *target;
+	struct spear_pcie_gadget_config *config;
+	struct config_item		*cg_item;
+	struct configfs_subsystem *subsys;
+
+	/* get resource for application registers*/
+
+	res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res0) {
+		dev_err(&pdev->dev, "no resource defined\n");
+		return -EBUSY;
+	}
+	if (!request_mem_region(res0->start, resource_size(res0),
+				pdev->name)) {
+		dev_err(&pdev->dev, "pcie gadget region already	claimed\n");
+		return -EBUSY;
+	}
+	/* get resource for dbi registers*/
+
+	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res1) {
+		dev_err(&pdev->dev, "no resource defined\n");
+		goto err_rel_res0;
+	}
+	if (!request_mem_region(res1->start, resource_size(res1),
+				pdev->name)) {
+		dev_err(&pdev->dev, "pcie gadget region already	claimed\n");
+		goto err_rel_res0;
+	}
+
+	target = kzalloc(sizeof(*target), GFP_KERNEL);
+	if (!target) {
+		dev_err(&pdev->dev, "out of memory\n");
+		status = -ENOMEM;
+		goto err_rel_res;
+	}
+
+	cg_item = &target->subsys.su_group.cg_item;
+	sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id);
+	cg_item->ci_type	= &pcie_gadget_target_type;
+	config = &target->config;
+	config->va_app_base = (void __iomem *)ioremap(res0->start,
+			resource_size(res0));
+	if (!config->va_app_base) {
+		dev_err(&pdev->dev, "ioremap fail\n");
+		status = -ENOMEM;
+		goto err_kzalloc;
+	}
+
+	config->base = (void __iomem *)res1->start;
+
+	config->va_dbi_base = (void __iomem *)ioremap(res1->start,
+			resource_size(res1));
+	if (!config->va_dbi_base) {
+		dev_err(&pdev->dev, "ioremap fail\n");
+		status = -ENOMEM;
+		goto err_iounmap_app;
+	}
+
+	dev_set_drvdata(&pdev->dev, target);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no update irq?\n");
+		status = irq;
+		goto err_iounmap;
+	}
+
+	status = request_irq(irq, spear_pcie_gadget_irq, 0, pdev->name, NULL);
+	if (status) {
+		dev_err(&pdev->dev, "pcie gadget interrupt IRQ%d already \
+				claimed\n", irq);
+		goto err_iounmap;
+	}
+
+	/* Register configfs hooks */
+	subsys = &target->subsys;
+	config_group_init(&subsys->su_group);
+	mutex_init(&subsys->su_mutex);
+	status = configfs_register_subsystem(subsys);
+	if (status)
+		goto err_irq;
+
+	/*
+	 * init basic pcie application registers
+	 * do not enable clock if it is PCIE0.Ideally , all controller should
+	 * have been independent from others with respect to clock. But PCIE1
+	 * and 2 depends on PCIE0.So PCIE0 clk is provided during board init.
+	 */
+	if (pdev->id == 1) {
+		/*
+		 * Ideally CFG Clock should have been also enabled here. But
+		 * it is done currently during board init routne
+		 */
+		clk = clk_get_sys("pcie1", NULL);
+		if (IS_ERR(clk)) {
+			pr_err("%s:couldn't get clk for pcie1\n", __func__);
+			goto err_irq;
+		}
+		if (clk_enable(clk)) {
+			pr_err("%s:couldn't enable clk for pcie1\n", __func__);
+			goto err_irq;
+		}
+	} else if (pdev->id == 2) {
+		/*
+		 * Ideally CFG Clock should have been also enabled here. But
+		 * it is done currently during board init routne
+		 */
+		clk = clk_get_sys("pcie2", NULL);
+		if (IS_ERR(clk)) {
+			pr_err("%s:couldn't get clk for pcie2\n", __func__);
+			goto err_irq;
+		}
+		if (clk_enable(clk)) {
+			pr_err("%s:couldn't enable clk for pcie2\n", __func__);
+			goto err_irq;
+		}
+	}
+	spear13xx_pcie_device_init(config);
+
+	return 0;
+err_irq:
+	free_irq(irq, NULL);
+err_iounmap:
+	iounmap(config->va_dbi_base);
+err_iounmap_app:
+	iounmap(config->va_app_base);
+err_kzalloc:
+	kfree(config);
+err_rel_res:
+	release_mem_region(res1->start, resource_size(res1));
+err_rel_res0:
+	release_mem_region(res0->start, resource_size(res0));
+	return status;
+}
+
+static int __devexit spear_pcie_gadget_remove(struct platform_device *pdev)
+{
+	struct resource *res0, *res1;
+	static struct pcie_gadget_target *target;
+	struct spear_pcie_gadget_config *config;
+	int irq;
+
+	res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	irq = platform_get_irq(pdev, 0);
+	target = dev_get_drvdata(&pdev->dev);
+	config = &target->config;
+
+	free_irq(irq, NULL);
+	iounmap(config->va_dbi_base);
+	iounmap(config->va_app_base);
+	release_mem_region(res1->start, resource_size(res1));
+	release_mem_region(res0->start, resource_size(res0));
+	configfs_unregister_subsystem(&target->subsys);
+	kfree(target);
+
+	return 0;
+}
+
+static void spear_pcie_gadget_shutdown(struct platform_device *pdev)
+{
+}
+
+static struct platform_driver spear_pcie_gadget_driver = {
+	.probe = spear_pcie_gadget_probe,
+	.remove = spear_pcie_gadget_remove,
+	.shutdown = spear_pcie_gadget_shutdown,
+	.driver = {
+		.name = "pcie-gadget-spear",
+		.bus = &platform_bus_type
+	},
+};
+
+static int __init spear_pcie_gadget_init(void)
+{
+	return platform_driver_register(&spear_pcie_gadget_driver);
+}
+module_init(spear_pcie_gadget_init);
+
+static void __exit spear_pcie_gadget_exit(void)
+{
+	platform_driver_unregister(&spear_pcie_gadget_driver);
+}
+module_exit(spear_pcie_gadget_exit);
+
+MODULE_ALIAS("pcie-gadget-spear");
+MODULE_AUTHOR("Pratyush Anand");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 4e6735992392ecd393674119aef9ec7bff9be5b0 Mon Sep 17 00:00:00 2001
From: Hong Liu <hong.liu@intel.com>
Date: Tue, 22 Mar 2011 16:33:59 -0700
Subject: drivers/misc/apds9802als.c: put the device into runtime suspend after
 resume()/probe() is handled

Put the device into runtime suspend after resume()/probe() is handled by
the PM core and the device core code.  No need to manually add them in
each single driver.  And correct the runtime state in remove().

Signed-off-by: Hong Liu <hong.liu@intel.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
index 644d4cd..81db781 100644
--- a/drivers/misc/apds9802als.c
+++ b/drivers/misc/apds9802als.c
@@ -245,9 +245,8 @@ static int apds9802als_probe(struct i2c_client *client,
 	als_set_default_config(client);
 	mutex_init(&data->mutex);
 
+	pm_runtime_set_active(&client->dev);
 	pm_runtime_enable(&client->dev);
-	pm_runtime_get(&client->dev);
-	pm_runtime_put(&client->dev);
 
 	return res;
 als_error1:
@@ -255,12 +254,19 @@ als_error1:
 	return res;
 }
 
-static int apds9802als_remove(struct i2c_client *client)
+static int __devexit apds9802als_remove(struct i2c_client *client)
 {
 	struct als_data *data = i2c_get_clientdata(client);
 
+	pm_runtime_get_sync(&client->dev);
+
 	als_set_power_state(client, false);
 	sysfs_remove_group(&client->dev.kobj, &m_als_gr);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+	pm_runtime_put_noidle(&client->dev);
+
 	kfree(data);
 	return 0;
 }
@@ -275,9 +281,6 @@ static int apds9802als_suspend(struct i2c_client *client, pm_message_t mesg)
 static int apds9802als_resume(struct i2c_client *client)
 {
 	als_set_default_config(client);
-
-	pm_runtime_get(&client->dev);
-	pm_runtime_put(&client->dev);
 	return 0;
 }
 
@@ -323,7 +326,7 @@ static struct i2c_driver apds9802als_driver = {
 		.pm = APDS9802ALS_PM_OPS,
 	},
 	.probe = apds9802als_probe,
-	.remove = apds9802als_remove,
+	.remove = __devexit_p(apds9802als_remove),
 	.suspend = apds9802als_suspend,
 	.resume = apds9802als_resume,
 	.id_table = apds9802als_id,
-- 
cgit v0.10.2


From 6f7d485e13c6c07348cf9cfd1b735fe1bcf3caa9 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 22 Mar 2011 16:34:00 -0700
Subject: drivers/misc/hmc6352.c: fix wrong return value checking for
 i2c_master_recv()

i2c_master_recv() returns negative errno, or else the number of bytes
read.  Thus i2c_master_recv(client, i2c_data, 2) returns 2 instead of 1 in
success case.

[akpm@linux-foundation.org: make `ret' signed]
Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: Kalhan Trisal <kalhan.trisal@intel.com>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
index 234bfca..ca938fc 100644
--- a/drivers/misc/hmc6352.c
+++ b/drivers/misc/hmc6352.c
@@ -75,7 +75,7 @@ static ssize_t compass_heading_data_show(struct device *dev,
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char i2c_data[2];
-	unsigned int ret;
+	int ret;
 
 	mutex_lock(&compass_mutex);
 	ret = compass_command(client, 'A');
@@ -86,7 +86,7 @@ static ssize_t compass_heading_data_show(struct device *dev,
 	msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */
 	ret = i2c_master_recv(client, i2c_data, 2);
 	mutex_unlock(&compass_mutex);
-	if (ret != 1) {
+	if (ret < 0) {
 		dev_warn(dev, "i2c read data cmd failed\n");
 		return ret;
 	}
-- 
cgit v0.10.2


From a844b43ca078d7dd8f0cd13c6f030ec62a996975 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 22 Mar 2011 16:34:01 -0700
Subject: drivers/misc/atmel_tclib.c: fix a memory leak

request_mem_region() will call kzalloc to allocate memory for struct
resource.  release_resource() unregisters the resource but does not free
the allocated memory, thus use release_mem_region() instead to fix the
memory leak.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index 3891124..a844810 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -75,7 +75,7 @@ out:
 	return tc;
 
 fail_ioremap:
-	release_resource(r);
+	release_mem_region(r->start, ATMEL_TC_IOMEM_SIZE);
 fail:
 	tc = NULL;
 	goto out;
@@ -95,7 +95,7 @@ void atmel_tc_free(struct atmel_tc *tc)
 	spin_lock(&tc_list_lock);
 	if (tc->regs) {
 		iounmap(tc->regs);
-		release_resource(tc->iomem);
+		release_mem_region(tc->iomem->start, ATMEL_TC_IOMEM_SIZE);
 		tc->regs = NULL;
 		tc->iomem = NULL;
 	}
-- 
cgit v0.10.2


From deb187e72470b0382d4f0cb859e76e1ebc3a1082 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Tue, 22 Mar 2011 16:34:01 -0700
Subject: drivers/misc/ep93xx_pwm.c: world-writable sysfs files

Don't allow everybody to change device settings.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Acked-by: Hartley Sweeten <hartleys@visionengravers.com>
Cc: Matthieu Crapet <mcrapet@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/ep93xx_pwm.c b/drivers/misc/ep93xx_pwm.c
index 46b3439..16d7179 100644
--- a/drivers/misc/ep93xx_pwm.c
+++ b/drivers/misc/ep93xx_pwm.c
@@ -249,11 +249,11 @@ static ssize_t ep93xx_pwm_set_invert(struct device *dev,
 
 static DEVICE_ATTR(min_freq, S_IRUGO, ep93xx_pwm_get_min_freq, NULL);
 static DEVICE_ATTR(max_freq, S_IRUGO, ep93xx_pwm_get_max_freq, NULL);
-static DEVICE_ATTR(freq, S_IWUGO | S_IRUGO,
+static DEVICE_ATTR(freq, S_IWUSR | S_IRUGO,
 		   ep93xx_pwm_get_freq, ep93xx_pwm_set_freq);
-static DEVICE_ATTR(duty_percent, S_IWUGO | S_IRUGO,
+static DEVICE_ATTR(duty_percent, S_IWUSR | S_IRUGO,
 		   ep93xx_pwm_get_duty_percent, ep93xx_pwm_set_duty_percent);
-static DEVICE_ATTR(invert, S_IWUGO | S_IRUGO,
+static DEVICE_ATTR(invert, S_IWUSR | S_IRUGO,
 		   ep93xx_pwm_get_invert, ep93xx_pwm_set_invert);
 
 static struct attribute *ep93xx_pwm_attrs[] = {
-- 
cgit v0.10.2


From b259514282d54323aaadead3dac4823ea01f9b89 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@gmail.com>
Date: Tue, 22 Mar 2011 16:34:02 -0700
Subject: drivers/misc/pch_phub.c: add MODULE_DEVICE_TABLE

The device table is required to load modules based on modaliases.

Signed-off-by: Axel Lin <axel.lin@gmail.com>
Cc: Masayuki Ohtak <masa-korg@dsn.okisemi.com>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
index 380ba80..a19cb71 100644
--- a/drivers/misc/pch_phub.c
+++ b/drivers/misc/pch_phub.c
@@ -735,6 +735,7 @@ static struct pci_device_id pch_phub_pcidev_id[] = {
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2,  },
 	{ }
 };
+MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id);
 
 static struct pci_driver pch_phub_driver = {
 	.name = "pch_phub",
-- 
cgit v0.10.2


From d404ab0a1133e95557bb7deab2a49b348dfeba85 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Tue, 22 Mar 2011 16:34:04 -0700
Subject: move x86 specific oops=panic to generic code

The oops=panic cmdline option is not x86 specific, move it to generic code.
Update documentation.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index d18a9e1..a3b6882 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1825,6 +1825,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 				perfmon on Intel CPUs instead of the
 				CPU specific event set.
 
+	oops=panic	Always panic on oopses. Default is to just kill the process,
+			but there is a small probability of deadlocking the machine.
+			This will also cause panics on machine check exceptions.
+			Useful together with panic=30 to trigger a reboot.
+
 	OSS		[HW,OSS]
 			See Documentation/sound/oss/oss-parameters.txt
 
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 48c13b8..092e596 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -293,11 +293,6 @@ IOMMU (input/output memory management unit)
 
 Debugging
 
-  oops=panic	Always panic on oopses. Default is to just kill the process,
-		but there is a small probability of deadlocking the machine.
-		This will also cause panics on machine check exceptions.
-		Useful together with panic=30 to trigger a reboot.
-
   kstack=N	Print N words from the kernel stack in oops dumps.
 
   pagefaulttrace  Dump all page faults. Only useful for extreme debugging
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 999e279..81ac6c7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -322,16 +322,6 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 }
 
-static int __init oops_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	if (!strcmp(s, "panic"))
-		panic_on_oops = 1;
-	return 0;
-}
-early_param("oops", oops_setup);
-
 static int __init kstack_setup(char *s)
 {
 	if (!s)
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87..6923167 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail);
 
 core_param(panic, panic_timeout, int, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
+
+static int __init oops_setup(char *s)
+{
+	if (!s)
+		return -EINVAL;
+	if (!strcmp(s, "panic"))
+		panic_on_oops = 1;
+	return 0;
+}
+early_param("oops", oops_setup);
-- 
cgit v0.10.2


From fa9ee9c4b9885dfdf8eccac19b8b4fc8a7c53288 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 22 Mar 2011 16:34:05 -0700
Subject: include/linux/err.h: add a function to cast error-pointers to a
 return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PTR_RET() can be used if you have an error-pointer and are only interested
in the eventual error value, but not the pointer.  Yields the usual 0 for
no error, -ESOMETHING otherwise.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/err.h b/include/linux/err.h
index 448afc1..f2edce2 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -52,6 +52,14 @@ static inline void * __must_check ERR_CAST(const void *ptr)
 	return (void *) ptr;
 }
 
+static inline int __must_check PTR_RET(const void *ptr)
+{
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+	else
+		return 0;
+}
+
 #endif
 
 #endif /* _LINUX_ERR_H */
-- 
cgit v0.10.2


From 34db18a054c600b6f81787165669dc572fe4de25 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 22 Mar 2011 16:34:06 -0700
Subject: smp: move smp setup functions to kernel/smp.c

Move setup_nr_cpu_ids(), smp_init() and some other SMP boot parameter
setup functions from init/main.c to kenrel/smp.c, saves some #ifdef
CONFIG_SMP.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Rakib Mullick <rakib.mullick@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tejun Heo <tj@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 6dc95ca..48159dd 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -114,6 +114,8 @@ int on_each_cpu(smp_call_func_t func, void *info, int wait);
 void smp_prepare_boot_cpu(void);
 
 extern unsigned int setup_max_cpus;
+extern void __init setup_nr_cpu_ids(void);
+extern void __init smp_init(void);
 
 #else /* !SMP */
 
diff --git a/init/main.c b/init/main.c
index 33c37c3..3627bb3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -129,63 +129,6 @@ static char *static_command_line;
 static char *execute_command;
 static char *ramdisk_execute_command;
 
-#ifdef CONFIG_SMP
-/* Setup configured maximum number of CPUs to activate */
-unsigned int setup_max_cpus = NR_CPUS;
-EXPORT_SYMBOL(setup_max_cpus);
-
-
-/*
- * Setup routine for controlling SMP activation
- *
- * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
- * activation entirely (the MPS table probe still happens, though).
- *
- * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
- * greater than 0, limits the maximum number of CPUs activated in
- * SMP mode to <NUM>.
- */
-
-void __weak arch_disable_smp_support(void) { }
-
-static int __init nosmp(char *str)
-{
-	setup_max_cpus = 0;
-	arch_disable_smp_support();
-
-	return 0;
-}
-
-early_param("nosmp", nosmp);
-
-/* this is hard limit */
-static int __init nrcpus(char *str)
-{
-	int nr_cpus;
-
-	get_option(&str, &nr_cpus);
-	if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
-		nr_cpu_ids = nr_cpus;
-
-	return 0;
-}
-
-early_param("nr_cpus", nrcpus);
-
-static int __init maxcpus(char *str)
-{
-	get_option(&str, &setup_max_cpus);
-	if (setup_max_cpus == 0)
-		arch_disable_smp_support();
-
-	return 0;
-}
-
-early_param("maxcpus", maxcpus);
-#else
-static const unsigned int setup_max_cpus = NR_CPUS;
-#endif
-
 /*
  * If set, this is an indication to the drivers that reset the underlying
  * device before going ahead with the initialization otherwise driver might
@@ -362,7 +305,7 @@ static int __init rdinit_setup(char *str)
 __setup("rdinit=", rdinit_setup);
 
 #ifndef CONFIG_SMP
-
+static const unsigned int setup_max_cpus = NR_CPUS;
 #ifdef CONFIG_X86_LOCAL_APIC
 static void __init smp_init(void)
 {
@@ -374,37 +317,6 @@ static void __init smp_init(void)
 
 static inline void setup_nr_cpu_ids(void) { }
 static inline void smp_prepare_cpus(unsigned int maxcpus) { }
-
-#else
-
-/* Setup number of possible processor ids */
-int nr_cpu_ids __read_mostly = NR_CPUS;
-EXPORT_SYMBOL(nr_cpu_ids);
-
-/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
-static void __init setup_nr_cpu_ids(void)
-{
-	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
-}
-
-/* Called by boot processor to activate the rest. */
-static void __init smp_init(void)
-{
-	unsigned int cpu;
-
-	/* FIXME: This should be done in userspace --RR */
-	for_each_present_cpu(cpu) {
-		if (num_online_cpus() >= setup_max_cpus)
-			break;
-		if (!cpu_online(cpu))
-			cpu_up(cpu);
-	}
-
-	/* Any cleanup work */
-	printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
-	smp_cpus_done(setup_max_cpus);
-}
-
 #endif
 
 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 7cbd0f2..73a1951 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -604,6 +604,87 @@ void ipi_call_unlock_irq(void)
 }
 #endif /* USE_GENERIC_SMP_HELPERS */
 
+/* Setup configured maximum number of CPUs to activate */
+unsigned int setup_max_cpus = NR_CPUS;
+EXPORT_SYMBOL(setup_max_cpus);
+
+
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+
+void __weak arch_disable_smp_support(void) { }
+
+static int __init nosmp(char *str)
+{
+	setup_max_cpus = 0;
+	arch_disable_smp_support();
+
+	return 0;
+}
+
+early_param("nosmp", nosmp);
+
+/* this is hard limit */
+static int __init nrcpus(char *str)
+{
+	int nr_cpus;
+
+	get_option(&str, &nr_cpus);
+	if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
+		nr_cpu_ids = nr_cpus;
+
+	return 0;
+}
+
+early_param("nr_cpus", nrcpus);
+
+static int __init maxcpus(char *str)
+{
+	get_option(&str, &setup_max_cpus);
+	if (setup_max_cpus == 0)
+		arch_disable_smp_support();
+
+	return 0;
+}
+
+early_param("maxcpus", maxcpus);
+
+/* Setup number of possible processor ids */
+int nr_cpu_ids __read_mostly = NR_CPUS;
+EXPORT_SYMBOL(nr_cpu_ids);
+
+/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
+void __init setup_nr_cpu_ids(void)
+{
+	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
+}
+
+/* Called by boot processor to activate the rest. */
+void __init smp_init(void)
+{
+	unsigned int cpu;
+
+	/* FIXME: This should be done in userspace --RR */
+	for_each_present_cpu(cpu) {
+		if (num_online_cpus() >= setup_max_cpus)
+			break;
+		if (!cpu_online(cpu))
+			cpu_up(cpu);
+	}
+
+	/* Any cleanup work */
+	printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
+	smp_cpus_done(setup_max_cpus);
+}
+
 /*
  * Call a function on all processors.  May be used during early boot while
  * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
-- 
cgit v0.10.2


From 4d51985e484dd11d9047dfcd1278ec9ccfb435d5 Mon Sep 17 00:00:00 2001
From: Michael Rodriguez <dkingston02@gmail.com>
Date: Tue, 22 Mar 2011 16:34:07 -0700
Subject: kernel/cpu.c: fix many errors related to style.

Change the printk() calls to have the KERN_INFO/KERN_ERROR stuff, and
fixes other coding style errors.  Not _all_ of them are gone, though.

[akpm@linux-foundation.org: revert the bits I disagree with]
Signed-off-by: Michael Rodriguez <dkingston02@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 156cc55..c95fc4d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v)
 {
 	BUG_ON(cpu_notify(val, v));
 }
-
 EXPORT_SYMBOL(register_cpu_notifier);
 
 void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param)
 		return err;
 
 	cpu_notify(CPU_DYING | param->mod, param->hcpu);
-
 	return 0;
 }
 
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		return -EINVAL;
 
 	cpu_hotplug_begin();
+
 	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
 	if (err) {
 		nr_calls--;
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 	if (ret) {
 		nr_calls--;
-		printk("%s: attempt to bring up CPU %u failed\n",
+		printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
 				__func__, cpu);
 		goto out_notify;
 	}
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void)
 	if (cpumask_empty(frozen_cpus))
 		goto out;
 
-	printk("Enabling non-boot CPUs ...\n");
+	printk(KERN_INFO "Enabling non-boot CPUs ...\n");
 
 	arch_enable_nonboot_cpus_begin();
 
 	for_each_cpu(cpu, frozen_cpus) {
 		error = _cpu_up(cpu, 1);
 		if (!error) {
-			printk("CPU%d is up\n", cpu);
+			printk(KERN_INFO "CPU%d is up\n", cpu);
 			continue;
 		}
 		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
  */
 
 /* cpu_bit_bitmap[0] is empty - so we can back into it */
-#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
+#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
 #define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
 #define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
 #define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
-- 
cgit v0.10.2


From 9bfb23fc4a481650e60d22dbe84c0fd5a9d49bba Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 22 Mar 2011 16:34:09 -0700
Subject: sys_unshare: remove the dead CLONE_THREAD/SIGHAND/VM code

Cleanup: kill the dead code which does nothing but complicates the code
and confuses the reader.

sys_unshare(CLONE_THREAD/SIGHAND/VM) is not really implemented, and I
doubt very much it will ever work.  At least, nobody even tried since the
original 99d1419d96d7df9cfa56 ("unshare system call -v5: system call
handler function") was applied more than 4 years ago.

And the code is not consistent.  unshare_thread() always fails
unconditionally, while unshare_sighand() and unshare_vm() pretend to work
if there is nothing to unshare.

Remove unshare_thread(), unshare_sighand(), unshare_vm() helpers and
related variables and add a simple CLONE_THREAD | CLONE_SIGHAND| CLONE_VM
check into check_unshare_flags().

Also, move the "CLONE_NEWNS needs CLONE_FS" check from
check_unshare_flags() to sys_unshare().  This looks more consistent and
matches the similar do_sysvsem check in sys_unshare().

Note: with or without this patch "atomic_read(mm->mm_users) > 1" can give
a false positive due to get_task_mm().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Cc: Janak Desai <janak@us.ibm.com>
Cc: Daniel Lezcano <daniel.lezcano@free.fr>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/fork.c b/kernel/fork.c
index a8f64f8e..f2b494d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1519,38 +1519,24 @@ void __init proc_caches_init(void)
 }
 
 /*
- * Check constraints on flags passed to the unshare system call and
- * force unsharing of additional process context as appropriate.
+ * Check constraints on flags passed to the unshare system call.
  */
-static void check_unshare_flags(unsigned long *flags_ptr)
+static int check_unshare_flags(unsigned long unshare_flags)
 {
+	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+		return -EINVAL;
 	/*
-	 * If unsharing a thread from a thread group, must also
-	 * unshare vm.
-	 */
-	if (*flags_ptr & CLONE_THREAD)
-		*flags_ptr |= CLONE_VM;
-
-	/*
-	 * If unsharing vm, must also unshare signal handlers.
-	 */
-	if (*flags_ptr & CLONE_VM)
-		*flags_ptr |= CLONE_SIGHAND;
-
-	/*
-	 * If unsharing namespace, must also unshare filesystem information.
+	 * Not implemented, but pretend it works if there is nothing to
+	 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
+	 * needs to unshare vm.
 	 */
-	if (*flags_ptr & CLONE_NEWNS)
-		*flags_ptr |= CLONE_FS;
-}
-
-/*
- * Unsharing of tasks created with CLONE_THREAD is not supported yet
- */
-static int unshare_thread(unsigned long unshare_flags)
-{
-	if (unshare_flags & CLONE_THREAD)
-		return -EINVAL;
+	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
+		/* FIXME: get_task_mm() increments ->mm_users */
+		if (atomic_read(&current->mm->mm_users) > 1)
+			return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1577,34 +1563,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 }
 
 /*
- * Unsharing of sighand is not supported yet
- */
-static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
-{
-	struct sighand_struct *sigh = current->sighand;
-
-	if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
-		return -EINVAL;
-	else
-		return 0;
-}
-
-/*
- * Unshare vm if it is being shared
- */
-static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
-{
-	struct mm_struct *mm = current->mm;
-
-	if ((unshare_flags & CLONE_VM) &&
-	    (mm && atomic_read(&mm->mm_users) > 1)) {
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
  * Unshare file descriptor table if it is being shared
  */
 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1632,45 +1590,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
  */
 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 {
-	int err = 0;
 	struct fs_struct *fs, *new_fs = NULL;
-	struct sighand_struct *new_sigh = NULL;
-	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct nsproxy *new_nsproxy = NULL;
 	int do_sysvsem = 0;
+	int err;
 
-	check_unshare_flags(&unshare_flags);
-
-	/* Return -EINVAL for all unsupported flags */
-	err = -EINVAL;
-	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
-				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+	err = check_unshare_flags(unshare_flags);
+	if (err)
 		goto bad_unshare_out;
 
 	/*
+	 * If unsharing namespace, must also unshare filesystem information.
+	 */
+	if (unshare_flags & CLONE_NEWNS)
+		unshare_flags |= CLONE_FS;
+	/*
 	 * CLONE_NEWIPC must also detach from the undolist: after switching
 	 * to a new ipc namespace, the semaphore arrays from the old
 	 * namespace are unreachable.
 	 */
 	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 		do_sysvsem = 1;
-	if ((err = unshare_thread(unshare_flags)))
-		goto bad_unshare_out;
 	if ((err = unshare_fs(unshare_flags, &new_fs)))
-		goto bad_unshare_cleanup_thread;
-	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
-		goto bad_unshare_cleanup_fs;
-	if ((err = unshare_vm(unshare_flags, &new_mm)))
-		goto bad_unshare_cleanup_sigh;
+		goto bad_unshare_out;
 	if ((err = unshare_fd(unshare_flags, &new_fd)))
-		goto bad_unshare_cleanup_vm;
+		goto bad_unshare_cleanup_fs;
 	if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
 			new_fs)))
 		goto bad_unshare_cleanup_fd;
 
-	if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
+	if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
 			 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1696,19 +1646,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 			spin_unlock(&fs->lock);
 		}
 
-		if (new_mm) {
-			mm = current->mm;
-			active_mm = current->active_mm;
-			current->mm = new_mm;
-			current->active_mm = new_mm;
-			if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-				atomic_dec(&mm->oom_disable_count);
-				atomic_inc(&new_mm->oom_disable_count);
-			}
-			activate_mm(active_mm, new_mm);
-			new_mm = mm;
-		}
-
 		if (new_fd) {
 			fd = current->files;
 			current->files = new_fd;
@@ -1725,20 +1662,10 @@ bad_unshare_cleanup_fd:
 	if (new_fd)
 		put_files_struct(new_fd);
 
-bad_unshare_cleanup_vm:
-	if (new_mm)
-		mmput(new_mm);
-
-bad_unshare_cleanup_sigh:
-	if (new_sigh)
-		if (atomic_dec_and_test(&new_sigh->count))
-			kmem_cache_free(sighand_cachep, new_sigh);
-
 bad_unshare_cleanup_fs:
 	if (new_fs)
 		free_fs_struct(new_fs);
 
-bad_unshare_cleanup_thread:
 bad_unshare_out:
 	return err;
 }
-- 
cgit v0.10.2


From 71c696b1d0310da3ab8033d743282959bd49d28b Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Tue, 22 Mar 2011 16:34:12 -0700
Subject: calibrate: extract fall-back calculation into own helper

The motivation for this patch series is that currently our OMAP calibrates
itself using the trial-and-error binary chop fallback that some other
architectures no longer need to perform.  This is a lengthy process,
taking 0.2s in an environment where boot time is of great interest.

Patch 2/4 has two optimisations.  Firstly, it replaces the initial
repeated- doubling to find the relevant power of 2 with a tight loop that
just does as much as it can in a jiffy.  Secondly, it doesn't binary chop
over an entire power of 2 range, it choses a much smaller range based on
how much it squeezed in, and failed to squeeze in, during the first stage.
 Both are significant optimisations, and bring our calibration down from
23 jiffies to 5, and, in the process, often arrive at a more accurate lpj
value.

The 'bands' and 'sub-logarithmic' growth may look over-engineered, but
they only cost a small level of inaccuracy in the initial guess (for all
architectures) in order to avoid the very large inaccuracies that appeared
during testing (on x86_64 architectures, and presumably others with less
metronomic operation).  Note that due to the existence of the TSC and
other timers, the x86_64 will not typically use this fallback routine, but
I wanted to code defensively, able to cope with all kinds of processor
behaviours and kernel command line options.

Patch 3/4 is an additional trap for the nightmare scenario where the
initial estimate is very inaccurate, possibly due to things like SMIs.
It simply retries with a larger bound.

Stephen said:

I tried this patch set out on an MSM7630.
:
: Before:
:
: Calibrating delay loop... 681.57 BogoMIPS (lpj=3407872)
:
: After:
:
: Calibrating delay loop... 680.75 BogoMIPS (lpj=3403776)
:
: But the really good news is calibration time dropped from ~247ms to ~56ms.
:  Sadly we won't be able to benefit from this should my udelay patches make
: it into ARM because we would be using calibrate_delay_direct() instead (at
: least on machines who choose to).  Can we somehow reapply the logic behind
: this to calibrate_delay_direct()?  That would be even better, but this is
: definitely a boot time improvement.
:
: Or maybe we could just replace calibrate_delay_direct() with this fallback
: calculation?  If __delay() is a thin wrapper around read_current_timer()
: it should work just as well (plus patch 3 makes it handle SMIs).  I'll try
: that out.

This patch:

... so that it can be modified more clinically.

This is almost entirely cosmetic. The only change to the operation
is that the global variable is only set once after the estimation is
completed, rather than taking on all the intermediate values. However,
there are no readers of that variable, so this change is unimportant.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/calibrate.c b/init/calibrate.c
index 24fe022..b71643a 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -119,10 +119,47 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
  */
 #define LPS_PREC 8
 
-void __cpuinit calibrate_delay(void)
+static unsigned long __cpuinit calibrate_delay_converge(void)
 {
-	unsigned long ticks, loopbit;
+	unsigned long lpj, ticks, loopbit;
 	int lps_precision = LPS_PREC;
+
+	lpj = (1<<12);
+	while ((lpj <<= 1) != 0) {
+		/* wait for "start of" clock tick */
+		ticks = jiffies;
+		while (ticks == jiffies)
+			/* nothing */;
+		/* Go .. */
+		ticks = jiffies;
+		__delay(lpj);
+		ticks = jiffies - ticks;
+		if (ticks)
+			break;
+	}
+
+	/*
+	 * Do a binary approximation to get lpj set to
+	 * equal one clock (up to lps_precision bits)
+	 */
+	lpj >>= 1;
+	loopbit = lpj;
+	while (lps_precision-- && (loopbit >>= 1)) {
+		lpj |= loopbit;
+		ticks = jiffies;
+		while (ticks == jiffies)
+			/* nothing */;
+		ticks = jiffies;
+		__delay(lpj);
+		if (jiffies != ticks)	/* longer than 1 tick */
+			lpj &= ~loopbit;
+	}
+
+	return lpj;
+}
+
+void __cpuinit calibrate_delay(void)
+{
 	static bool printed;
 
 	if (preset_lpj) {
@@ -139,39 +176,9 @@ void __cpuinit calibrate_delay(void)
 			pr_info("Calibrating delay using timer "
 				"specific routine.. ");
 	} else {
-		loops_per_jiffy = (1<<12);
-
 		if (!printed)
 			pr_info("Calibrating delay loop... ");
-		while ((loops_per_jiffy <<= 1) != 0) {
-			/* wait for "start of" clock tick */
-			ticks = jiffies;
-			while (ticks == jiffies)
-				/* nothing */;
-			/* Go .. */
-			ticks = jiffies;
-			__delay(loops_per_jiffy);
-			ticks = jiffies - ticks;
-			if (ticks)
-				break;
-		}
-
-		/*
-		 * Do a binary approximation to get loops_per_jiffy set to
-		 * equal one clock (up to lps_precision bits)
-		 */
-		loops_per_jiffy >>= 1;
-		loopbit = loops_per_jiffy;
-		while (lps_precision-- && (loopbit >>= 1)) {
-			loops_per_jiffy |= loopbit;
-			ticks = jiffies;
-			while (ticks == jiffies)
-				/* nothing */;
-			ticks = jiffies;
-			__delay(loops_per_jiffy);
-			if (jiffies != ticks)	/* longer than 1 tick */
-				loops_per_jiffy &= ~loopbit;
-		}
+		loops_per_jiffy = calibrate_delay_converge();
 	}
 	if (!printed)
 		pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
-- 
cgit v0.10.2


From 191e56880a6a638ce931859317f37deb084b6433 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Tue, 22 Mar 2011 16:34:13 -0700
Subject: calibrate: home in on correct lpj value more quickly

Binary chop with a jiffy-resync on each step to find an upper bound is
slow, so just race in a tight-ish loop to find an underestimate.

If done with lots of individual steps, sometimes several hundreds of
iterations would be required, which would impose a significant overhead,
and make the initial estimate very low.  By taking slowly increasing steps
there will be less overhead.

E.g.  an x86_64 2.67GHz could have fitted in 613 individual small delays,
but in reality should have been able to fit in a single delay 644 times
longer, so underestimated by 31 steps.  To reach the equivalent of 644
small delays with the accelerating scheme now requires about 130
iterations, so has <1/4th of the overhead, and can therefore be expected
to underestimate by only 7 steps.

As now we have a better initial estimate we can binary chop over a smaller
range.  With the loop overhead in the initial estimate kept low, and the
step sizes moderate, we won't have under-estimated by much, so chose as
tight a range as we can.

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/calibrate.c b/init/calibrate.c
index b71643a..f9000df 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -110,8 +110,8 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
 
 /*
  * This is the number of bits of precision for the loops_per_jiffy.  Each
- * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
- * better than 1%
+ * time we refine our estimate after the first takes 1.5/HZ seconds, so try
+ * to start with a good estimate.
  * For the boot cpu we can skip the delay calibration and assign it a value
  * calculated based on the timer frequency.
  * For the rest of the CPUs we cannot assume that the timer frequency is same as
@@ -121,38 +121,49 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
 
 static unsigned long __cpuinit calibrate_delay_converge(void)
 {
-	unsigned long lpj, ticks, loopbit;
-	int lps_precision = LPS_PREC;
+	/* First stage - slowly accelerate to find initial bounds */
+	unsigned long lpj, ticks, loopadd, chop_limit;
+	int trials = 0, band = 0, trial_in_band = 0;
 
 	lpj = (1<<12);
-	while ((lpj <<= 1) != 0) {
-		/* wait for "start of" clock tick */
-		ticks = jiffies;
-		while (ticks == jiffies)
-			/* nothing */;
-		/* Go .. */
-		ticks = jiffies;
-		__delay(lpj);
-		ticks = jiffies - ticks;
-		if (ticks)
-			break;
-	}
+
+	/* wait for "start of" clock tick */
+	ticks = jiffies;
+	while (ticks == jiffies)
+		; /* nothing */
+	/* Go .. */
+	ticks = jiffies;
+	do {
+		if (++trial_in_band == (1<<band)) {
+			++band;
+			trial_in_band = 0;
+		}
+		__delay(lpj * band);
+		trials += band;
+	} while (ticks == jiffies);
+	/*
+	 * We overshot, so retreat to a clear underestimate. Then estimate
+	 * the largest likely undershoot. This defines our chop bounds.
+	 */
+	trials -= band;
+	loopadd = lpj * band;
+	lpj *= trials;
+	chop_limit = lpj >> (LPS_PREC + 1);
 
 	/*
 	 * Do a binary approximation to get lpj set to
-	 * equal one clock (up to lps_precision bits)
+	 * equal one clock (up to LPS_PREC bits)
 	 */
-	lpj >>= 1;
-	loopbit = lpj;
-	while (lps_precision-- && (loopbit >>= 1)) {
-		lpj |= loopbit;
+	while (loopadd > chop_limit) {
+		lpj += loopadd;
 		ticks = jiffies;
 		while (ticks == jiffies)
-			/* nothing */;
+			; /* nothing */
 		ticks = jiffies;
 		__delay(lpj);
 		if (jiffies != ticks)	/* longer than 1 tick */
-			lpj &= ~loopbit;
+			lpj -= loopadd;
+		loopadd >>= 1;
 	}
 
 	return lpj;
-- 
cgit v0.10.2


From b1b5f65e53af770ede22c113e249de2f6fa53706 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Tue, 22 Mar 2011 16:34:15 -0700
Subject: calibrate: retry with wider bounds when converge seems to fail

Systems with unmaskable interrupts such as SMIs may massively
underestimate loops_per_jiffy, and fail to converge anywhere near the real
value.  A case seen on x86_64 was an initial estimate of 256<<12, which
converged to 511<<12 where the real value should have been over 630<<12.
This admitedly requires bypassing the TSC calibration (lpj_fine), and a
failure to settle in the direct calibration too, but is physically
possible.  This failure does not depend on my previous calibration
optimisation, but by luck is easy to fix with the optimisation in place
with a trivial retry loop.

In the context of the optimised converging method, as we can no longer
trust the starting estimate, enlarge the search bounds exponentially so
that the number of retries is logarithmically bounded.

[akpm@linux-foundation.org: mention x86_64 SMIs in comment]
Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/calibrate.c b/init/calibrate.c
index f9000df..76ac919 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -122,7 +122,7 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
 static unsigned long __cpuinit calibrate_delay_converge(void)
 {
 	/* First stage - slowly accelerate to find initial bounds */
-	unsigned long lpj, ticks, loopadd, chop_limit;
+	unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
 	int trials = 0, band = 0, trial_in_band = 0;
 
 	lpj = (1<<12);
@@ -146,14 +146,18 @@ static unsigned long __cpuinit calibrate_delay_converge(void)
 	 * the largest likely undershoot. This defines our chop bounds.
 	 */
 	trials -= band;
-	loopadd = lpj * band;
-	lpj *= trials;
-	chop_limit = lpj >> (LPS_PREC + 1);
+	loopadd_base = lpj * band;
+	lpj_base = lpj * trials;
+
+recalibrate:
+	lpj = lpj_base;
+	loopadd = loopadd_base;
 
 	/*
 	 * Do a binary approximation to get lpj set to
 	 * equal one clock (up to LPS_PREC bits)
 	 */
+	chop_limit = lpj >> LPS_PREC;
 	while (loopadd > chop_limit) {
 		lpj += loopadd;
 		ticks = jiffies;
@@ -165,6 +169,16 @@ static unsigned long __cpuinit calibrate_delay_converge(void)
 			lpj -= loopadd;
 		loopadd >>= 1;
 	}
+	/*
+	 * If we incremented every single time possible, presume we've
+	 * massively underestimated initially, and retry with a higher
+	 * start, and larger range. (Only seen on x86_64, due to SMIs)
+	 */
+	if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
+		lpj_base = lpj;
+		loopadd_base <<= 2;
+		goto recalibrate;
+	}
 
 	return lpj;
 }
-- 
cgit v0.10.2


From fef2c9bc1b54c0261324a96e948c0b849796e896 Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 22 Mar 2011 16:34:16 -0700
Subject: kernel/watchdog.c: allow hardlockup to panic by default

When a cpu is considered stuck, instead of limping along and just printing
a warning, it is sometimes preferred to just panic, let kdump capture the
vmcore and reboot.  This gets the machine back into a stable state quickly
while saving the info that got it into a stuck state to begin with.

Add a Kconfig option to allow users to set the hardlockup to panic
by default.  Also add in a 'nmi_watchdog=nopanic' to override this.

[akpm@linux-foundation.org: fix strncmp length]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a3b6882..e9261e9 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1597,11 +1597,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: [state][,regs][,debounce][,die]
 
 	nmi_watchdog=	[KNL,BUGS=X86] Debugging features for SMP kernels
-			Format: [panic,][num]
+			Format: [panic,][nopanic,][num]
 			Valid num: 0
 			0 - turn nmi_watchdog off
 			When panic is specified, panic when an NMI watchdog
-			timeout occurs.
+			timeout occurs (or 'nopanic' to override the opposite
+			default).
 			This is useful when you use a panic=... timeout and
 			need the box quickly up again.
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18bb157..054a67c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
  * Should we panic when a soft-lockup or hard-lockup occurs:
  */
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-static int hardlockup_panic;
+static int hardlockup_panic =
+			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
 static int __init hardlockup_panic_setup(char *str)
 {
 	if (!strncmp(str, "panic", 5))
 		hardlockup_panic = 1;
+	else if (!strncmp(str, "nopanic", 7))
+		hardlockup_panic = 0;
 	else if (!strncmp(str, "0", 1))
 		watchdog_enabled = 0;
 	return 1;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 191c5c4..fb0afef 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -171,6 +171,23 @@ config HARDLOCKUP_DETECTOR
 	def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
 		 !ARCH_HAS_NMI_WATCHDOG
 
+config BOOTPARAM_HARDLOCKUP_PANIC
+	bool "Panic (Reboot) On Hard Lockups"
+	depends on LOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to panic on "hard lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode with interrupts disabled for more than 60 seconds.
+
+	  Say N if unsure.
+
+config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
+	int
+	depends on LOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
+	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
+
 config BOOTPARAM_SOFTLOCKUP_PANIC
 	bool "Panic (Reboot) On Soft Lockups"
 	depends on LOCKUP_DETECTOR
-- 
cgit v0.10.2


From f99a99330f85a84c346ddeb4adc72dbfad9b9e3e Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Tue, 22 Mar 2011 16:34:17 -0700
Subject: kernel/watchdog.c: always return NOTIFY_OK during cpu up/down events

This patch addresses a couple of problems.  One was the case when the
hardlockup failed to start, it also failed to start the softlockup.  There
were valid cases when the hardlockup shouldn't start and that shouldn't
block the softlockup (no lapic, bios controls perf counters).

The second problem was when the hardlockup failed to start on boxes (from
a no lapic or bios controlled perf counter case), it reported failure to
the cpu notifier chain.  This blocked the notifier from continuing to
start other more critical pieces of cpu bring-up (in our case based on a
2.6.32 fork, it was the mce).  As a result, during soft cpu online/offline
testing, the system would panic when a cpu was offlined because the cpu
notifier would succeed in processing a watchdog disable cpu event and
would panic in the mce case as a result of un-initialized variables from a
never executed cpu up event.

I realized the hardlockup/softlockup cases are really just debugging aids
and should never impede the progress of a cpu up/down event.  Therefore I
modified the code to always return NOTIFY_OK and instead rely on printks
to inform the user of problems.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 054a67c..140dce7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -418,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu)
 static int watchdog_enable(int cpu)
 {
 	struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
-	int err;
+	int err = 0;
 
 	/* enable the perf event */
 	err = watchdog_nmi_enable(cpu);
-	if (err)
-		return err;
+
+	/* Regardless of err above, fall through and start softlockup */
 
 	/* create the watchdog thread */
 	if (!p) {
 		p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
 		if (IS_ERR(p)) {
 			printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
-			return PTR_ERR(p);
+			if (!err)
+				/* if hardlockup hasn't already set this */
+				err = PTR_ERR(p);
+			goto out;
 		}
 		kthread_bind(p, cpu);
 		per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -438,7 +441,8 @@ static int watchdog_enable(int cpu)
 		wake_up_process(p);
 	}
 
-	return 0;
+out:
+	return err;
 }
 
 static void watchdog_disable(int cpu)
@@ -550,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 #endif /* CONFIG_HOTPLUG_CPU */
 	}
-	return notifier_from_errno(err);
+
+	/*
+	 * hardlockup and softlockup are not important enough
+	 * to block cpu bring up.  Just always succeed and
+	 * rely on printk output to flag problems.
+	 */
+	return NOTIFY_OK;
 }
 
 static struct notifier_block __cpuinitdata cpu_nfb = {
-- 
cgit v0.10.2


From 3bb598fb23b6040e67b5e6db9a00b28cd26e5809 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Tue, 22 Mar 2011 16:34:18 -0700
Subject: drivers/dca/dca-core.c: use list_move() instead of
 list_del()/list_add() combination

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index c461eda..4abd089 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -111,10 +111,8 @@ static void unregister_dca_providers(void)
 	/* at this point only one domain in the list is expected */
 	domain = list_first_entry(&dca_domains, struct dca_domain, node);
 
-	list_for_each_entry_safe(dca, _dca, &domain->dca_providers, node) {
-		list_del(&dca->node);
-		list_add(&dca->node, &unregistered_providers);
-	}
+	list_for_each_entry_safe(dca, _dca, &domain->dca_providers, node)
+		list_move(&dca->node, &unregistered_providers);
 
 	dca_free_domain(domain);
 
-- 
cgit v0.10.2


From 26297607e09ca6c7e6f2a6b86a8bee2f23503bb8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:19 -0700
Subject: vsprintf: neaten %pK kptr_restrict, save a bit of code space

If kptr restrictions are on, just set the passed pointer to NULL.

$ size lib/vsprintf.o.*
   text	   data	    bss	    dec	    hex	filename
   8247	      4	      2	   8253	   203d	lib/vsprintf.o.new
   8282	      4	      2	   8288	   2060	lib/vsprintf.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d3023df..070d134 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1047,16 +1047,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 			if (spec.field_width == -1)
 				spec.field_width = 2 * sizeof(void *);
 			return string(buf, end, "pK-error", spec);
-		} else if ((kptr_restrict == 0) ||
-			 (kptr_restrict == 1 &&
-			  has_capability_noaudit(current, CAP_SYSLOG)))
-			break;
-
-		if (spec.field_width == -1) {
-			spec.field_width = 2 * sizeof(void *);
-			spec.flags |= ZEROPAD;
 		}
-		return number(buf, end, 0, spec);
+		if (!((kptr_restrict == 0) ||
+		      (kptr_restrict == 1 &&
+		       has_capability_noaudit(current, CAP_SYSLOG))))
+			ptr = NULL;
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
-- 
cgit v0.10.2


From 7bf693951a8e5f7e600a45b74d91d962a453146e Mon Sep 17 00:00:00 2001
From: "Fabio M. Di Nitto" <fdinitto@redhat.com>
Date: Tue, 22 Mar 2011 16:34:20 -0700
Subject: console: allow to retain boot console via boot option keep_bootcon

On some architectures, the boot process involves de-registering the boot
console (early boot), initialize drivers and then re-register the console.

This mechanism introduces a window in which no printk can happen on the
console and messages are buffered and then printed once the new console is
available.

If a kernel crashes during this window, all it's left on the boot console
is "console [foo] enabled, bootconsole disabled" making debug of the crash
rather 'interesting'.

By adding "keep_bootcon" option, do not unregister the boot console, that
will allow to printk everything that is happening up to the crash.

The option is clearly meant only for debugging purposes as it introduces
lots of duplicated info printed on console, but will make bug report from
users easier as it doesn't require a kernel build just to figure out where
we crash.

Signed-off-by: Fabio M. Di Nitto <fabbione@fabbione.net>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <gregkh@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e9261e9..c357a31 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -872,6 +872,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			       If specified, z/VM IUCV HVC accepts connections
 			       from listed z/VM user IDs only.
 
+	keep_bootcon	[KNL]
+			Do not unregister boot console at start. This is only
+			useful for debugging when something happens in the window
+			between unregistering the boot console and initializing
+			the real console.
+
 	i2c_bus=	[HW] Override the default board specific I2C bus speed
 			     or register an additional I2C bus that is not
 			     registered from board initialization code.
diff --git a/kernel/printk.c b/kernel/printk.c
index 33284ad..2b591f2 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1316,6 +1316,18 @@ void console_start(struct console *console)
 }
 EXPORT_SYMBOL(console_start);
 
+static int __read_mostly keep_bootcon;
+
+static int __init keep_bootcon_setup(char *str)
+{
+	keep_bootcon = 1;
+	printk(KERN_INFO "debug: skip boot console de-registration.\n");
+
+	return 0;
+}
+
+early_param("keep_bootcon", keep_bootcon_setup);
+
 /*
  * The console driver calls this routine during kernel initialization
  * to register the console printing procedure with printk() and to
@@ -1463,7 +1475,9 @@ void register_console(struct console *newcon)
 	 * users know there might be something in the kernel's log buffer that
 	 * went to the bootconsole (that they do not see on the real console)
 	 */
-	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
+	if (bcon &&
+	    ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
+	    !keep_bootcon) {
 		/* we need to iterate through twice, to make sure we print
 		 * everything out, before we unregister the console(s)
 		 */
-- 
cgit v0.10.2


From fe3d8ad31cf51b062bbb8a9609eeb1d0c41a7f30 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 22 Mar 2011 16:34:21 -0700
Subject: console: prevent registered consoles from dumping old kernel message
 over again

For a platform with many consoles like:
 "console=tty1 console=ttyMFD2 console=ttyS0 earlyprintk=mrst"

Each time when the non "selected_console" (tty1 and ttyMFD2 here) get
registered, the existing kernel message will be printed out on registered
consoles again, the "mrst" early console will get some same message for 3
times, and "tty1" will get some for twice.

As suggested by Andrew Morton, every time a new console is registered, it
will be set as the "exclusive" console which will dump the already
existing kernel messages.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Cc: Greg KH <gregkh@suse.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/printk.c b/kernel/printk.c
index 2b591f2..a53607e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -113,6 +113,11 @@ static unsigned con_start;	/* Index into log_buf: next char to be sent to consol
 static unsigned log_end;	/* Index into log_buf: most-recently-written-char + 1 */
 
 /*
+ * If exclusive_console is non-NULL then only this console is to be printed to.
+ */
+static struct console *exclusive_console;
+
+/*
  *	Array of consoles built from command line options (console=)
  */
 struct console_cmdline
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
 	struct console *con;
 
 	for_each_console(con) {
+		if (exclusive_console && con != exclusive_console)
+			continue;
 		if ((con->flags & CON_ENABLED) && con->write &&
 				(cpu_online(smp_processor_id()) ||
 				(con->flags & CON_ANYTIME)))
@@ -1230,6 +1237,11 @@ void console_unlock(void)
 		local_irq_restore(flags);
 	}
 	console_locked = 0;
+
+	/* Release the exclusive_console once it is used */
+	if (unlikely(exclusive_console))
+		exclusive_console = NULL;
+
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
 	if (wake_klogd)
@@ -1464,6 +1476,12 @@ void register_console(struct console *newcon)
 		spin_lock_irqsave(&logbuf_lock, flags);
 		con_start = log_start;
 		spin_unlock_irqrestore(&logbuf_lock, flags);
+		/*
+		 * We're about to replay the log buffer.  Only do this to the
+		 * just-registered console to avoid excessive message spam to
+		 * the already-registered consoles.
+		 */
+		exclusive_console = newcon;
 	}
 	console_unlock();
 	console_sysfs_notify();
-- 
cgit v0.10.2


From 9f36e2c448007b54851e7e4fa48da97d1477a175 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Tue, 22 Mar 2011 16:34:22 -0700
Subject: printk: use %pK for /proc/kallsyms and /proc/modules

In an effort to reduce kernel address leaks that might be used to help
target kernel privilege escalation exploits, this patch uses %pK when
displaying addresses in /proc/kallsyms, /proc/modules, and
/sys/module/*/sections/*.

Note that this changes %x to %p, so some legitimately 0 values in
/proc/kallsyms would have changed from 00000000 to "(null)".  To avoid
this, "(null)" is not used when using the "K" format.  Anything that was
already successfully parsing "(null)" in addition to full hex digits
should have no problem with this change.  (Thanks to Joe Perches for the
suggestion.) Due to the %x to %p, "void *" casts are needed since these
addresses are already "unsigned long" everywhere internally, due to their
starting life as ELF section offsets.

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Cc: Eugene Teo <eugene@redhat.com>
Cc: Dan Rosenberg <drosenberg@vsecurity.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f6d091..75dcca3 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -477,13 +477,11 @@ static int s_show(struct seq_file *m, void *p)
 		 */
 		type = iter->exported ? toupper(iter->type) :
 					tolower(iter->type);
-		seq_printf(m, "%0*lx %c %s\t[%s]\n",
-			   (int)(2 * sizeof(void *)),
-			   iter->value, type, iter->name, iter->module_name);
+		seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value,
+			   type, iter->name, iter->module_name);
 	} else
-		seq_printf(m, "%0*lx %c %s\n",
-			   (int)(2 * sizeof(void *)),
-			   iter->value, iter->type, iter->name);
+		seq_printf(m, "%pK %c %s\n", (void *)iter->value,
+			   iter->type, iter->name);
 	return 0;
 }
 
diff --git a/kernel/module.c b/kernel/module.c
index efa290e..1f9f7bc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
 {
 	struct module_sect_attr *sattr =
 		container_of(mattr, struct module_sect_attr, mattr);
-	return sprintf(buf, "0x%lx\n", sattr->address);
+	return sprintf(buf, "0x%pK\n", (void *)sattr->address);
 }
 
 static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p)
 		   mod->state == MODULE_STATE_COMING ? "Loading":
 		   "Live");
 	/* Used by oprofile and other similar tools. */
-	seq_printf(m, " 0x%p", mod->module_core);
+	seq_printf(m, " 0x%pK", mod->module_core);
 
 	/* Taints info */
 	if (mod->taints)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 070d134..ac444ff 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -991,7 +991,7 @@ static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	      struct printf_spec spec)
 {
-	if (!ptr) {
+	if (!ptr && *fmt != 'K') {
 		/*
 		 * Print (null) with the same width as a pointer so it makes
 		 * tabular output look nice.
-- 
cgit v0.10.2


From 5af5bcb8d37f99ba415a1adc6da71051b84f93a5 Mon Sep 17 00:00:00 2001
From: Mandeep Singh Baines <msb@chromium.org>
Date: Tue, 22 Mar 2011 16:34:23 -0700
Subject: printk: allow setting DEFAULT_MESSAGE_LEVEL via Kconfig

We've been burned by regressions/bugs which we later realized could have
been triaged quicker if only we'd paid closer attention to dmesg.  To make
it easier to audit dmesg, we'd like to make DEFAULT_MESSAGE_LEVEL
Kconfig-settable.  That way we can set it to KERN_NOTICE and audit any
messages <= KERN_WARNING.

Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Joe Perches <joe@perches.com>
Cc: Olof Johansson <olofj@chromium.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/printk.c b/kernel/printk.c
index a53607e..da8ca81 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 #define __LOG_BUF_LEN	(1 << CONFIG_LOG_BUF_SHIFT)
 
 /* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */
+#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 
 /* We show everything that is MORE important than this.. */
 #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index fb0afef..bef5faa 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -9,6 +9,17 @@ config PRINTK_TIME
 	  operations.  This is useful for identifying long delays
 	  in kernel startup.
 
+config DEFAULT_MESSAGE_LOGLEVEL
+	int "Default message log level (1-7)"
+	range 1 7
+	default "4"
+	help
+	  Default log level for printk statements with no specified priority.
+
+	  This was hard-coded to KERN_WARNING since at least 2.6.10 but folks
+	  that are auditing their logs closely may want to set it to a lower
+	  priority.
+
 config ENABLE_WARN_DEPRECATED
 	bool "Enable __deprecated logic"
 	default y
-- 
cgit v0.10.2


From 7764dcb53473e5665ab3cdc461ccfc510fce925e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:24 -0700
Subject: get_maintainer.pl: allow "K:" pattern tests to match non-patch text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the usage of the K section in the MAINTAINERS file to support
matching regular expressions to any arbitrary text that may precede the
patch itself.  For example, the commit message or mail headers generated
by git-format-patch.

Signed-off-by: Joe Perches <joe@perches.com>
Original-patch-by: L. Alberto Giménez <agimenez@sysvalve.es>
Acked-by: L. Alberto Giménez <agimenez@sysvalve.es>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 139e0ff..d29a8d7 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -420,6 +420,14 @@ foreach my $file (@ARGV) {
 
 	open(my $patch, "< $file")
 	    or die "$P: Can't open $file: $!\n";
+
+	# We can check arbitrary information before the patch
+	# like the commit message, mail headers, etc...
+	# This allows us to match arbitrary keywords against any part
+	# of a git format-patch generated file (subject tags, etc...)
+
+	my $patch_prefix = "";			#Parsing the intro
+
 	while (<$patch>) {
 	    my $patch_line = $_;
 	    if (m/^\+\+\+\s+(\S+)/) {
@@ -428,13 +436,14 @@ foreach my $file (@ARGV) {
 		$filename =~ s@\n@@;
 		$lastfile = $filename;
 		push(@files, $filename);
+		$patch_prefix = "^[+-].*";	#Now parsing the actual patch
 	    } elsif (m/^\@\@ -(\d+),(\d+)/) {
 		if ($email_git_blame) {
 		    push(@range, "$lastfile:$1:$2");
 		}
 	    } elsif ($keywords) {
 		foreach my $line (keys %keyword_hash) {
-		    if ($patch_line =~ m/^[+-].*$keyword_hash{$line}/x) {
+		    if ($patch_line =~ m/${patch_prefix}$keyword_hash{$line}/x) {
 			push(@keyword_tvi, $line);
 		    }
 		}
-- 
cgit v0.10.2


From b153da600c2755588aad1977349d14eab9c2a959 Mon Sep 17 00:00:00 2001
From: Harry Wei <jiaweiwei.xiyou@gmail.com>
Date: Tue, 22 Mar 2011 16:34:25 -0700
Subject: MAINTAINERS: add topgit for T:

At least one tree:

	ARM/EZX SMARTPHONES (A780, A910, A1200, E680, ROKR E2 and ROKR E6)

is available via topgit.  Add mention of topgit in the MAINTAINERS
description section.

Signed-off-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 53e315e..d78dd3e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -72,7 +72,7 @@ Descriptions of section entries:
 	L: Mailing list that is relevant to this area
 	W: Web-page with status/info
 	Q: Patchwork web based patch tracking system site
-	T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
+	T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit, topgit.
 	S: Status, one of the following:
 	   Supported:	Someone is actually paid to look after this.
 	   Maintained:	Someone actually looks after it.
-- 
cgit v0.10.2


From 706e69d67652686d45d431b36174aefa3760aa39 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:26 -0700
Subject: MAINTAINERS: quote non-alphanumeric email addresses

Add quotes around email address with periods and commas.  So they don't
explode when pasted into certain email clients.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Cc: "Mark F. Brown" <mark.brown314@gmail.com>
Cc: "Gustavo F. Padovan" <padovan@profusion.mobi>
Cc: "Stephen M. Cameron" <scameron@beardog.cce.hp.com>
Cc: "Lee, Chun-Yi" <jlee@novell.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index d78dd3e..3dc158b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1078,7 +1078,7 @@ L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/TETON BGA MACHINE SUPPORT
-M:	Mark F. Brown <mark.brown314@gmail.com>
+M:	"Mark F. Brown" <mark.brown314@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
@@ -1482,7 +1482,7 @@ F:	drivers/mtd/devices/block2mtd.c
 
 BLUETOOTH DRIVERS
 M:	Marcel Holtmann <marcel@holtmann.org>
-M:	Gustavo F. Padovan <padovan@profusion.mobi>
+M:	"Gustavo F. Padovan" <padovan@profusion.mobi>
 L:	linux-bluetooth@vger.kernel.org
 W:	http://www.bluez.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth-2.6.git
@@ -1491,7 +1491,7 @@ F:	drivers/bluetooth/
 
 BLUETOOTH SUBSYSTEM
 M:	Marcel Holtmann <marcel@holtmann.org>
-M:	Gustavo F. Padovan <padovan@profusion.mobi>
+M:	"Gustavo F. Padovan" <padovan@profusion.mobi>
 L:	linux-bluetooth@vger.kernel.org
 W:	http://www.bluez.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/padovan/bluetooth-2.6.git
@@ -2950,7 +2950,7 @@ F:	Documentation/blockdev/cpqarray.txt
 F:	drivers/block/cpqarray.*
 
 HEWLETT-PACKARD SMART ARRAY RAID DRIVER (hpsa)
-M:	Stephen M. Cameron <scameron@beardog.cce.hp.com>
+M:	"Stephen M. Cameron" <scameron@beardog.cce.hp.com>
 L:	iss_storagedev@hp.com
 S:	Supported
 F:	Documentation/scsi/hpsa.txt
@@ -4232,7 +4232,7 @@ F:	Documentation/serial/moxa-smartio
 F:	drivers/char/mxser.*
 
 MSI LAPTOP SUPPORT
-M:	Lee, Chun-Yi <jlee@novell.com>
+M:	"Lee, Chun-Yi" <jlee@novell.com>
 L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/msi-laptop.c
-- 
cgit v0.10.2


From 45b4e0d54bdbef19b5f753421851bdc91007c29b Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:27 -0700
Subject: MAINTAINERS: update ADP5520 pattern

Typo in path.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 3dc158b..4212dd1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -346,7 +346,7 @@ W:	http://wiki-analog.com/ADP5520
 S:	Supported
 F:	drivers/mfd/adp5520.c
 F:	drivers/video/backlight/adp5520_bl.c
-F:	drivers/led/leds-adp5520.c
+F:	drivers/leds/leds-adp5520.c
 F:	drivers/gpio/adp5520-gpio.c
 F:	drivers/input/keyboard/adp5520-keys.c
 
-- 
cgit v0.10.2


From a3f531ac556d77bca39bfd454b8b151f562c30e4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:28 -0700
Subject: MAINTAINERS: change web links from wiki-analog to wiki.analog

wiki-analog doesn't seem to work, but wiki.analog does.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 4212dd1..dc23f20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -288,35 +288,35 @@ F:	sound/pci/ad1889.*
 AD525X ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/AD5254
+W:	http://wiki.analog.com/AD5254
 S:	Supported
 F:	drivers/misc/ad525x_dpot.c
 
 AD5398 CURRENT REGULATOR DRIVER (AD5398/AD5821)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/AD5398
+W:	http://wiki.analog.com/AD5398
 S:	Supported
 F:	drivers/regulator/ad5398.c
 
 AD714X CAPACITANCE TOUCH SENSOR DRIVER (AD7142/3/7/8/7A)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/AD7142
+W:	http://wiki.analog.com/AD7142
 S:	Supported
 F:	drivers/input/misc/ad714x.c
 
 AD7877 TOUCHSCREEN DRIVER
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/AD7877
+W:	http://wiki.analog.com/AD7877
 S:	Supported
 F:	drivers/input/touchscreen/ad7877.c
 
 AD7879 TOUCHSCREEN DRIVER (AD7879/AD7889)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/AD7879
+W:	http://wiki.analog.com/AD7879
 S:	Supported
 F:	drivers/input/touchscreen/ad7879.c
 
@@ -342,7 +342,7 @@ F:	drivers/net/wireless/adm8211.*
 ADP5520 BACKLIGHT DRIVER WITH IO EXPANDER (ADP5520/ADP5501)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/ADP5520
+W:	http://wiki.analog.com/ADP5520
 S:	Supported
 F:	drivers/mfd/adp5520.c
 F:	drivers/video/backlight/adp5520_bl.c
@@ -353,7 +353,7 @@ F:	drivers/input/keyboard/adp5520-keys.c
 ADP5588 QWERTY KEYPAD AND IO EXPANDER DRIVER (ADP5588/ADP5587)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/ADP5588
+W:	http://wiki.analog.com/ADP5588
 S:	Supported
 F:	drivers/input/keyboard/adp5588-keys.c
 F:	drivers/gpio/adp5588-gpio.c
@@ -361,7 +361,7 @@ F:	drivers/gpio/adp5588-gpio.c
 ADP8860 BACKLIGHT DRIVER (ADP8860/ADP8861/ADP8863)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/ADP8860
+W:	http://wiki.analog.com/ADP8860
 S:	Supported
 F:	drivers/video/backlight/adp8860_bl.c
 
@@ -388,7 +388,7 @@ F:	drivers/hwmon/adt7475.c
 ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346)
 M:	Michael Hennerich <michael.hennerich@analog.com>
 L:	device-driver-devel@blackfin.uclinux.org
-W:	http://wiki-analog.com/ADXL345
+W:	http://wiki.analog.com/ADXL345
 S:	Supported
 F:	drivers/input/misc/adxl34x.c
 
@@ -528,7 +528,7 @@ F:	drivers/infiniband/hw/amso1100/
 ANALOG DEVICES INC ASOC CODEC DRIVERS
 L:	device-driver-devel@blackfin.uclinux.org
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
-W:	http://wiki-analog.com/
+W:	http://wiki.analog.com/
 S:	Supported
 F:	sound/soc/codecs/ad1*
 F:	sound/soc/codecs/adau*
-- 
cgit v0.10.2


From 0ddf20e3743d6f1bb1f417749f10203f83664415 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:29 -0700
Subject: MAINTAINERS: remove ASOC CODEC DRIVERS files not in tree

Remove these patterns until such files are actually in the tree.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index dc23f20..56e6d84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -531,8 +531,6 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 W:	http://wiki.analog.com/
 S:	Supported
 F:	sound/soc/codecs/ad1*
-F:	sound/soc/codecs/adau*
-F:	sound/soc/codecs/adav*
 F:	sound/soc/codecs/ssm*
 
 ANALOG DEVICES INC ASOC DRIVERS
-- 
cgit v0.10.2


From e801dc5cf4af61d19c02fc7ea5e884fa07ce6264 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:29 -0700
Subject: MAINTAINERS: add missing : after HR Timers F tag

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 56e6d84..4e5f4c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3005,7 +3005,7 @@ F:	kernel/hrtimer.c
 F:	kernel/time/clockevents.c
 F:	kernel/time/tick*.*
 F:	kernel/time/timer_*.c
-F	include/linux/clockevents.h
+F:	include/linux/clockevents.h
 F:	include/linux/hrtimer.h
 
 HIGH-SPEED SCC DRIVER FOR AX.25
-- 
cgit v0.10.2


From 6e08bbb8830b3ab1b583c9a2b30a45968f946c7a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:30 -0700
Subject: MAINTAINERS: remove unnecessary linux-kernel@vger.kernel.org entries

CC'ing lkml is the default and doesn't need separate entries.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 4e5f4c9..64b94b2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2824,7 +2824,6 @@ F:	include/linux/gigaset_dev.h
 
 GPIO SUBSYSTEM
 M:	Grant Likely <grant.likely@secretlab.ca>
-L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 F:	Documentation/gpio/gpio.txt
@@ -2847,7 +2846,6 @@ F:	drivers/platform/x86/hdaps.c
 HWPOISON MEMORY FAILURE HANDLING
 M:	Andi Kleen <andi@firstfloor.org>
 L:	linux-mm@kvack.org
-L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison
 S:	Maintained
 F:	mm/memory-failure.c
@@ -4725,7 +4723,6 @@ F:	drivers/i2c/busses/i2c-pasemi.c
 
 PADATA PARALLEL EXECUTION MECHANISM
 M:	Steffen Klassert <steffen.klassert@secunet.com>
-L:	linux-kernel@vger.kernel.org
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	kernel/padata.c
@@ -4875,7 +4872,6 @@ F:	include/crypto/pcrypt.h
 PER-CPU MEMORY ALLOCATOR
 M:	Tejun Heo <tj@kernel.org>
 M:	Christoph Lameter <cl@linux-foundation.org>
-L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
 S:	Maintained
 F:	include/linux/percpu*.h
@@ -6923,7 +6919,6 @@ F:	sound/soc/codecs/wm*
 
 WORKQUEUE
 M:	Tejun Heo <tj@kernel.org>
-L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git
 S:	Maintained
 F:	include/linux/workqueue.h
-- 
cgit v0.10.2


From 4fa2651db4635d0fbb9ecc875a79072a00c3c3a7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:31 -0700
Subject: MAINTAINERS: update clkdev location

Commit 6d803ba736a ("ARM: 6483/1: arm & sh: factorised duplicated
clkdev.c") moved it to a separate directory.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 64b94b2..450c2b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -695,8 +695,8 @@ S:	Maintained
 ARM/CLKDEV SUPPORT
 M:	Russell King <linux@arm.linux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-F:	arch/arm/common/clkdev.c
 F:	arch/arm/include/asm/clkdev.h
+F:	drivers/clk/clkdev.c
 
 ARM/COMPULAB CM-X270/EM-X270 and CM-X300 MACHINE SUPPORT
 M:	Mike Rapoport <mike@compulab.co.il>
-- 
cgit v0.10.2


From 2a8374492b2301275debe1d378a12619a96a217e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:32 -0700
Subject: MAINTAINERS: update media path

Commit 52b661449ae ("[media] rc: Rename remote controller type to rc_type
instead of ir_type") moved it around.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Maxim Levitsky <maximlevitsky@gmail.com>
Acked-by: Harry Wei <harryxiyou@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 450c2b9..c2c0fd0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2479,8 +2479,7 @@ F:	include/linux/cb710.h
 ENE KB2426 (ENE0100/ENE020XX) INFRARED RECEIVER
 M:	Maxim Levitsky <maximlevitsky@gmail.com>
 S:	Maintained
-F:	drivers/media/IR/ene_ir.c
-F:	drivers/media/IR/ene_ir.h
+F:	drivers/media/rc/ene_ir.*
 
 EPSON 1355 FRAMEBUFFER DRIVER
 M:	Christopher Hoover <ch@murgatroid.com>
-- 
cgit v0.10.2


From c45e98a274fd5cc5f270b06a0f224322bbed2d40 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:33 -0700
Subject: MAINTAINERS: remove IEEE1394 entry

commit 66fa12c571d ("ieee1394: remove the old IEEE 1394 driver stack")

removed the code, remove the MAINTAINERS entry.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index c2c0fd0..ca02f35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3175,15 +3175,6 @@ L:	linux-pm@lists.linux-foundation.org
 S:	Supported
 F:	drivers/idle/i7300_idle.c
 
-IEEE 1394 SUBSYSTEM
-M:	Stefan Richter <stefanr@s5r6.in-berlin.de>
-L:	linux1394-devel@lists.sourceforge.net
-W:	http://ieee1394.wiki.kernel.org/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394-2.6.git
-S:	Obsolete
-F:	Documentation/debugging-via-ohci1394.txt
-F:	drivers/ieee1394/
-
 IEEE 802.15.4 SUBSYSTEM
 M:	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 M:	Sergey Lapin <slapin@ossfans.org>
-- 
cgit v0.10.2


From 3a83ea6ecd31cbf4330b2c0a616e3d2539d5b616 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:34 -0700
Subject: MAINTAINERS: remove unused TIMEKEEPING timekeeping.h

Commit 88606e80da0 ("MAINTAINERS: Update timer related entries") added a
file pattern that didn't actually exist.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index ca02f35..018a033 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5420,7 +5420,6 @@ S:	Supported
 F:	include/linux/clocksource.h
 F:	include/linux/time.h
 F:	include/linux/timex.h
-F:	include/linux/timekeeping.h
 F:	kernel/time/clocksource.c
 F:	kernel/time/time*.c
 F:	kernel/time/ntp.c
-- 
cgit v0.10.2


From 390889b6da931096aa6d8d8bb013f75ad711c0f4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:34 -0700
Subject: MAINTAINERS: update SCX200 file pattern

commit 3b3da9d25ae ("x86: Move scx200 to platform") moved it, convert the
pattern too.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 018a033..549d43b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5507,7 +5507,7 @@ SCx200 CPU SUPPORT
 M:	Jim Cromie <jim.cromie@gmail.com>
 S:	Odd Fixes
 F:	Documentation/i2c/busses/scx200_acb
-F:	arch/x86/kernel/scx200_32.c
+F:	arch/x86/platform/scx200/
 F:	drivers/watchdog/scx200_wdt.c
 F:	drivers/i2c/busses/scx200*
 F:	drivers/mtd/maps/scx200_docflash.c
-- 
cgit v0.10.2


From 4e850855d747b5a8c7c54d90479b33f38d79726a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:35 -0700
Subject: MAINTAINERS: remove SHARP LH7A40X section

commit 82e6923e186 ("ARM: lh7a40x: remove unmaintained platform support")
removed support, remove it from MAINTAINERS.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 549d43b..7b6952c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5651,17 +5651,6 @@ M:	Robin Holt <holt@sgi.com>
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
-SHARP LH SUPPORT (LH7952X & LH7A40X)
-M:	Marc Singer <elf@buici.com>
-W:	http://projects.buici.com/arm
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:	Maintained
-F:	Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen
-F:	arch/arm/mach-lh7a40x/
-F:	drivers/tty/serial/serial_lh7a40x.c
-F:	drivers/usb/gadget/lh7a40*
-F:	drivers/usb/host/ohci-lh7a40*
-
 SIMPLE FIRMWARE INTERFACE (SFI)
 M:	Len Brown <lenb@kernel.org>
 L:	sfi-devel@simplefirmware.org
-- 
cgit v0.10.2


From 943fc810bdc9c0fce5cc66bdb65b42d699241aa6 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:36 -0700
Subject: MAINTAINERS: update SFI pattern

commit 937f961a653 ("x86: Move sfi to platform") moved the files, update
the pattern.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 7b6952c..8b97e46 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5657,7 +5657,7 @@ L:	sfi-devel@simplefirmware.org
 W:	http://simplefirmware.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux-sfi-2.6.git
 S:	Supported
-F:	arch/x86/kernel/*sfi*
+F:	arch/x86/platform/sfi/
 F:	drivers/sfi/
 F:	include/linux/sfi*.h
 
-- 
cgit v0.10.2


From f1d2313025c92c25972372d3688d894476dcd61e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:37 -0700
Subject: MAINTAINERS: uSB SE401 moved to staging, update pattern

And set the status to Orphan.

Signed-off-by: Joe Perches <joe@perches.com>
Cc: Jeroen Vreeken <pe1rxq@amsat.org>
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 8b97e46..57ef3c1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6465,12 +6465,11 @@ S:	Maintained
 F:	drivers/net/usb/rtl8150.c
 
 USB SE401 DRIVER
-M:	Jeroen Vreeken <pe1rxq@amsat.org>
 L:	linux-usb@vger.kernel.org
 W:	http://www.chello.nl/~j.vreeken/se401/
-S:	Maintained
+S:	Orphan
 F:	Documentation/video4linux/se401.txt
-F:	drivers/media/video/se401.*
+F:	drivers/staging/se401/
 
 USB SERIAL BELKIN F5U103 DRIVER
 M:	William Greathouse <wgreathouse@smva.com>
-- 
cgit v0.10.2


From 116ab806e871083d5695c25f13f85b0fded71737 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 22 Mar 2011 16:34:38 -0700
Subject: MAINTAINERS: update WINBOND CIR pattern
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5b2e303f6df ("[media] rc-core: convert winbond-cir") moved the
files, update the pattern.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: David Härdeman <david@hardeman.nu>
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 57ef3c1..fbf38d7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6819,7 +6819,7 @@ F:	drivers/scsi/wd7000.c
 WINBOND CIR DRIVER
 M:	David Härdeman <david@hardeman.nu>
 S:	Maintained
-F:	drivers/input/misc/winbond-cir.c
+F:	drivers/media/rc/winbond-cir.c
 
 WIMAX STACK
 M:	Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-- 
cgit v0.10.2


From 8a5700cd6754a3c88d2ea2f1d7a56f671987fc25 Mon Sep 17 00:00:00 2001
From: Kenneth Heitke <kheitke@codeaurora.org>
Date: Tue, 22 Mar 2011 16:34:39 -0700
Subject: MAINTAINERS: add drivers/platform/msm to MSM subsystem

Signed-off-by: Kenneth Heitke <kheitke@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index fbf38d7..e11953d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -917,6 +917,7 @@ F:	drivers/mmc/host/msm_sdcc.c
 F:	drivers/mmc/host/msm_sdcc.h
 F:	drivers/tty/serial/msm_serial.h
 F:	drivers/tty/serial/msm_serial.c
+F:	drivers/platform/msm/
 T:	git git://codeaurora.org/quic/kernel/davidb/linux-msm.git
 S:	Maintained
 
-- 
cgit v0.10.2


From 33ee3b2e2eb9b4b6c64dcf9ed66e2ac3124e748c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 22 Mar 2011 16:34:40 -0700
Subject: kstrto*: converting strings to integers done (hopefully) right

1. simple_strto*() do not contain overflow checks and crufty,
   libc way to indicate failure.
2. strict_strto*() also do not have overflow checks but the name and
   comments pretend they do.
3. Both families have only "long long" and "long" variants,
   but users want strtou8()
4. Both "simple" and "strict" prefixes are wrong:
   Simple doesn't exactly say what's so simple, strict should not exist
   because conversion should be strict by default.

The solution is to use "k" prefix and add convertors for more types.
Enter
	kstrtoull()
	kstrtoll()
	kstrtoul()
	kstrtol()
	kstrtouint()
	kstrtoint()

	kstrtou64()
	kstrtos64()
	kstrtou32()
	kstrtos32()
	kstrtou16()
	kstrtos16()
	kstrtou8()
	kstrtos8()

Include runtime testsuite (somewhat incomplete) as well.

strict_strto*() become deprecated, stubbed to kstrto*() and
eventually will be removed altogether.

Use kstrto*() in code today!

Note: on some archs _kstrtoul() and _kstrtol() are left in tree, even if
      they'll be unused at runtime. This is temporarily solution,
      because I don't want to hardcode list of archs where these
      functions aren't needed. Current solution with sizeof() and
      __alignof__ at least always works.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/video/via/viafbdev.h b/drivers/video/via/viafbdev.h
index d66f963..137996d 100644
--- a/drivers/video/via/viafbdev.h
+++ b/drivers/video/via/viafbdev.h
@@ -94,9 +94,6 @@ extern int viafb_LCD_ON;
 extern int viafb_DVI_ON;
 extern int viafb_hotplug;
 
-extern int strict_strtoul(const char *cp, unsigned int base,
-	unsigned long *res);
-
 u8 viafb_gpio_i2c_read_lvds(struct lvds_setting_information
 	*plvds_setting_info, struct lvds_chip_information
 	*plvds_chip_info, u8 index);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2fe6e84..00cec4d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -187,14 +187,76 @@ NORET_TYPE void do_exit(long error_code)
 	ATTRIB_NORET;
 NORET_TYPE void complete_and_exit(struct completion *, long)
 	ATTRIB_NORET;
+
+/* Internal, do not use. */
+int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
+int __must_check _kstrtol(const char *s, unsigned int base, long *res);
+
+int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res);
+int __must_check kstrtoll(const char *s, unsigned int base, long long *res);
+static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+	/*
+	 * We want to shortcut function call, but
+	 * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0.
+	 */
+	if (sizeof(unsigned long) == sizeof(unsigned long long) &&
+	    __alignof__(unsigned long) == __alignof__(unsigned long long))
+		return kstrtoull(s, base, (unsigned long long *)res);
+	else
+		return _kstrtoul(s, base, res);
+}
+
+static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
+{
+	/*
+	 * We want to shortcut function call, but
+	 * __builtin_types_compatible_p(long, long long) = 0.
+	 */
+	if (sizeof(long) == sizeof(long long) &&
+	    __alignof__(long) == __alignof__(long long))
+		return kstrtoll(s, base, (long long *)res);
+	else
+		return _kstrtol(s, base, res);
+}
+
+int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res);
+int __must_check kstrtoint(const char *s, unsigned int base, int *res);
+
+static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res)
+{
+	return kstrtoull(s, base, res);
+}
+
+static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res)
+{
+	return kstrtoll(s, base, res);
+}
+
+static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res)
+{
+	return kstrtouint(s, base, res);
+}
+
+static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res)
+{
+	return kstrtoint(s, base, res);
+}
+
+int __must_check kstrtou16(const char *s, unsigned int base, u16 *res);
+int __must_check kstrtos16(const char *s, unsigned int base, s16 *res);
+int __must_check kstrtou8(const char *s, unsigned int base, u8 *res);
+int __must_check kstrtos8(const char *s, unsigned int base, s8 *res);
+
 extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
-extern int __must_check strict_strtoul(const char *, unsigned int, unsigned long *);
-extern int __must_check strict_strtol(const char *, unsigned int, long *);
-extern int __must_check strict_strtoull(const char *, unsigned int, unsigned long long *);
-extern int __must_check strict_strtoll(const char *, unsigned int, long long *);
+#define strict_strtoul	kstrtoul
+#define strict_strtol	kstrtol
+#define strict_strtoull	kstrtoull
+#define strict_strtoll	kstrtoll
+
 extern int sprintf(char * buf, const char * fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern int vsprintf(char *buf, const char *, va_list)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bef5faa..df9234c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1250,3 +1250,6 @@ source "samples/Kconfig"
 source "lib/Kconfig.kgdb"
 
 source "lib/Kconfig.kmemcheck"
+
+config TEST_KSTRTOX
+	tristate "Test kstrto*() family of functions at runtime"
diff --git a/lib/Makefile b/lib/Makefile
index ef7ed71..8c9de02 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,6 +22,8 @@ lib-y	+= kobject.o kref.o klist.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o
+obj-y += kstrtox.o
+obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/kstrtox.c b/lib/kstrtox.c
new file mode 100644
index 0000000..05672e8
--- /dev/null
+++ b/lib/kstrtox.c
@@ -0,0 +1,227 @@
+/*
+ * Convert integer string representation to an integer.
+ * If an integer doesn't fit into specified type, -E is returned.
+ *
+ * Integer starts with optional sign.
+ * kstrtou*() functions do not accept sign "-".
+ *
+ * Radix 0 means autodetection: leading "0x" implies radix 16,
+ * leading "0" implies radix 8, otherwise radix is 10.
+ * Autodetection hints work after optional sign, but not before.
+ *
+ * If -E is returned, result is not touched.
+ */
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+static inline char _tolower(const char c)
+{
+	return c | 0x20;
+}
+
+static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+	unsigned long long acc;
+	int ok;
+
+	if (base == 0) {
+		if (s[0] == '0') {
+			if (_tolower(s[1]) == 'x' && isxdigit(s[2]))
+				base = 16;
+			else
+				base = 8;
+		} else
+			base = 10;
+	}
+	if (base == 16 && s[0] == '0' && _tolower(s[1]) == 'x')
+		s += 2;
+
+	acc = 0;
+	ok = 0;
+	while (*s) {
+		unsigned int val;
+
+		if ('0' <= *s && *s <= '9')
+			val = *s - '0';
+		else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f')
+			val = _tolower(*s) - 'a' + 10;
+		else if (*s == '\n') {
+			if (*(s + 1) == '\0')
+				break;
+			else
+				return -EINVAL;
+		} else
+			return -EINVAL;
+
+		if (val >= base)
+			return -EINVAL;
+		if (acc > div_u64(ULLONG_MAX - val, base))
+			return -ERANGE;
+		acc = acc * base + val;
+		ok = 1;
+
+		s++;
+	}
+	if (!ok)
+		return -EINVAL;
+	*res = acc;
+	return 0;
+}
+
+int kstrtoull(const char *s, unsigned int base, unsigned long long *res)
+{
+	if (s[0] == '+')
+		s++;
+	return _kstrtoull(s, base, res);
+}
+EXPORT_SYMBOL(kstrtoull);
+
+int kstrtoll(const char *s, unsigned int base, long long *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	if (s[0] == '-') {
+		rv = _kstrtoull(s + 1, base, &tmp);
+		if (rv < 0)
+			return rv;
+		if ((long long)(-tmp) >= 0)
+			return -ERANGE;
+		*res = -tmp;
+	} else {
+		rv = kstrtoull(s, base, &tmp);
+		if (rv < 0)
+			return rv;
+		if ((long long)tmp < 0)
+			return -ERANGE;
+		*res = tmp;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(kstrtoll);
+
+/* Internal, do not use. */
+int _kstrtoul(const char *s, unsigned int base, unsigned long *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(unsigned long)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(_kstrtoul);
+
+/* Internal, do not use. */
+int _kstrtol(const char *s, unsigned int base, long *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(long)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(_kstrtol);
+
+int kstrtouint(const char *s, unsigned int base, unsigned int *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(unsigned int)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtouint);
+
+int kstrtoint(const char *s, unsigned int base, int *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(int)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtoint);
+
+int kstrtou16(const char *s, unsigned int base, u16 *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(u16)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtou16);
+
+int kstrtos16(const char *s, unsigned int base, s16 *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(s16)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtos16);
+
+int kstrtou8(const char *s, unsigned int base, u8 *res)
+{
+	unsigned long long tmp;
+	int rv;
+
+	rv = kstrtoull(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (unsigned long long)(u8)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtou8);
+
+int kstrtos8(const char *s, unsigned int base, s8 *res)
+{
+	long long tmp;
+	int rv;
+
+	rv = kstrtoll(s, base, &tmp);
+	if (rv < 0)
+		return rv;
+	if (tmp != (long long)(s8)tmp)
+		return -ERANGE;
+	*res = tmp;
+	return 0;
+}
+EXPORT_SYMBOL(kstrtos8);
diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c
new file mode 100644
index 0000000..325c2f9
--- /dev/null
+++ b/lib/test-kstrtox.c
@@ -0,0 +1,739 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define for_each_test(i, test)	\
+	for (i = 0; i < sizeof(test) / sizeof(test[0]); i++)
+
+struct test_fail {
+	const char *str;
+	unsigned int base;
+};
+
+#define DEFINE_TEST_FAIL(test)	\
+	const struct test_fail test[] __initdata
+
+#define DECLARE_TEST_OK(type, test_type)	\
+	test_type {				\
+		const char *str;		\
+		unsigned int base;		\
+		type expected_res;		\
+	}
+
+#define DEFINE_TEST_OK(type, test)	\
+	const type test[] __initdata
+
+#define TEST_FAIL(fn, type, fmt, test)					\
+{									\
+	unsigned int i;							\
+									\
+	for_each_test(i, test) {					\
+		const struct test_fail *t = &test[i];			\
+		type tmp;						\
+		int rv;							\
+									\
+		tmp = 0;						\
+		rv = fn(t->str, t->base, &tmp);				\
+		if (rv >= 0) {						\
+			WARN(1, "str '%s', base %u, expected -E, got %d/" fmt "\n",	\
+				t->str, t->base, rv, tmp);		\
+			continue;					\
+		}							\
+	}								\
+}
+
+#define TEST_OK(fn, type, fmt, test)					\
+{									\
+	unsigned int i;							\
+									\
+	for_each_test(i, test) {					\
+		const typeof(test[0]) *t = &test[i];			\
+		type res;						\
+		int rv;							\
+									\
+		rv = fn(t->str, t->base, &res);				\
+		if (rv != 0) {						\
+			WARN(1, "str '%s', base %u, expected 0/" fmt ", got %d\n",	\
+				t->str, t->base, t->expected_res, rv);	\
+			continue;					\
+		}							\
+		if (res != t->expected_res) {				\
+			WARN(1, "str '%s', base %u, expected " fmt ", got " fmt "\n",	\
+				t->str, t->base, t->expected_res, res);	\
+			continue;					\
+		}							\
+	}								\
+}
+
+static void __init test_kstrtoull_ok(void)
+{
+	DECLARE_TEST_OK(unsigned long long, struct test_ull);
+	static DEFINE_TEST_OK(struct test_ull, test_ull_ok) = {
+		{"0",	10,	0ULL},
+		{"1",	10,	1ULL},
+		{"127",	10,	127ULL},
+		{"128",	10,	128ULL},
+		{"129",	10,	129ULL},
+		{"255",	10,	255ULL},
+		{"256",	10,	256ULL},
+		{"257",	10,	257ULL},
+		{"32767",	10,	32767ULL},
+		{"32768",	10,	32768ULL},
+		{"32769",	10,	32769ULL},
+		{"65535",	10,	65535ULL},
+		{"65536",	10,	65536ULL},
+		{"65537",	10,	65537ULL},
+		{"2147483647",	10,	2147483647ULL},
+		{"2147483648",	10,	2147483648ULL},
+		{"2147483649",	10,	2147483649ULL},
+		{"4294967295",	10,	4294967295ULL},
+		{"4294967296",	10,	4294967296ULL},
+		{"4294967297",	10,	4294967297ULL},
+		{"9223372036854775807",	10,	9223372036854775807ULL},
+		{"9223372036854775808",	10,	9223372036854775808ULL},
+		{"9223372036854775809",	10,	9223372036854775809ULL},
+		{"18446744073709551614",	10,	18446744073709551614ULL},
+		{"18446744073709551615",	10,	18446744073709551615ULL},
+
+		{"00",		8,	00ULL},
+		{"01",		8,	01ULL},
+		{"0177",	8,	0177ULL},
+		{"0200",	8,	0200ULL},
+		{"0201",	8,	0201ULL},
+		{"0377",	8,	0377ULL},
+		{"0400",	8,	0400ULL},
+		{"0401",	8,	0401ULL},
+		{"077777",	8,	077777ULL},
+		{"0100000",	8,	0100000ULL},
+		{"0100001",	8,	0100001ULL},
+		{"0177777",	8,	0177777ULL},
+		{"0200000",	8,	0200000ULL},
+		{"0200001",	8,	0200001ULL},
+		{"017777777777",	8,	017777777777ULL},
+		{"020000000000",	8,	020000000000ULL},
+		{"020000000001",	8,	020000000001ULL},
+		{"037777777777",	8,	037777777777ULL},
+		{"040000000000",	8,	040000000000ULL},
+		{"040000000001",	8,	040000000001ULL},
+		{"0777777777777777777777",	8,	0777777777777777777777ULL},
+		{"01000000000000000000000",	8,	01000000000000000000000ULL},
+		{"01000000000000000000001",	8,	01000000000000000000001ULL},
+		{"01777777777777777777776",	8,	01777777777777777777776ULL},
+		{"01777777777777777777777",	8,	01777777777777777777777ULL},
+
+		{"0x0",		16,	0x0ULL},
+		{"0x1",		16,	0x1ULL},
+		{"0x7f",	16,	0x7fULL},
+		{"0x80",	16,	0x80ULL},
+		{"0x81",	16,	0x81ULL},
+		{"0xff",	16,	0xffULL},
+		{"0x100",	16,	0x100ULL},
+		{"0x101",	16,	0x101ULL},
+		{"0x7fff",	16,	0x7fffULL},
+		{"0x8000",	16,	0x8000ULL},
+		{"0x8001",	16,	0x8001ULL},
+		{"0xffff",	16,	0xffffULL},
+		{"0x10000",	16,	0x10000ULL},
+		{"0x10001",	16,	0x10001ULL},
+		{"0x7fffffff",	16,	0x7fffffffULL},
+		{"0x80000000",	16,	0x80000000ULL},
+		{"0x80000001",	16,	0x80000001ULL},
+		{"0xffffffff",	16,	0xffffffffULL},
+		{"0x100000000",	16,	0x100000000ULL},
+		{"0x100000001",	16,	0x100000001ULL},
+		{"0x7fffffffffffffff",	16,	0x7fffffffffffffffULL},
+		{"0x8000000000000000",	16,	0x8000000000000000ULL},
+		{"0x8000000000000001",	16,	0x8000000000000001ULL},
+		{"0xfffffffffffffffe",	16,	0xfffffffffffffffeULL},
+		{"0xffffffffffffffff",	16,	0xffffffffffffffffULL},
+
+		{"0\n",	0,	0ULL},
+	};
+	TEST_OK(kstrtoull, unsigned long long, "%llu", test_ull_ok);
+}
+
+static void __init test_kstrtoull_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_ull_fail) = {
+		{"",	0},
+		{"",	8},
+		{"",	10},
+		{"",	16},
+		{"\n",	0},
+		{"\n",	8},
+		{"\n",	10},
+		{"\n",	16},
+		{"\n0",	0},
+		{"\n0",	8},
+		{"\n0",	10},
+		{"\n0",	16},
+		{"+",	0},
+		{"+",	8},
+		{"+",	10},
+		{"+",	16},
+		{"-",	0},
+		{"-",	8},
+		{"-",	10},
+		{"-",	16},
+		{"0x",	0},
+		{"0x",	16},
+		{"0X",	0},
+		{"0X",	16},
+		{"0 ",	0},
+		{"1+",	0},
+		{"1-",	0},
+		{" 2",	0},
+		/* base autodetection */
+		{"0x0z",	0},
+		{"0z",		0},
+		{"a",		0},
+		/* digit >= base */
+		{"2",	2},
+		{"8",	8},
+		{"a",	10},
+		{"A",	10},
+		{"g",	16},
+		{"G",	16},
+		/* overflow */
+		{"10000000000000000000000000000000000000000000000000000000000000000",	2},
+		{"2000000000000000000000",	8},
+		{"18446744073709551616",	10},
+		{"10000000000000000",	16},
+		/* negative */
+		{"-0", 0},
+		{"-0", 8},
+		{"-0", 10},
+		{"-0", 16},
+		{"-1", 0},
+		{"-1", 8},
+		{"-1", 10},
+		{"-1", 16},
+		/* sign is first character if any */
+		{"-+1", 0},
+		{"-+1", 8},
+		{"-+1", 10},
+		{"-+1", 16},
+		/* nothing after \n */
+		{"0\n0", 0},
+		{"0\n0", 8},
+		{"0\n0", 10},
+		{"0\n0", 16},
+		{"0\n+", 0},
+		{"0\n+", 8},
+		{"0\n+", 10},
+		{"0\n+", 16},
+		{"0\n-", 0},
+		{"0\n-", 8},
+		{"0\n-", 10},
+		{"0\n-", 16},
+		{"0\n ", 0},
+		{"0\n ", 8},
+		{"0\n ", 10},
+		{"0\n ", 16},
+	};
+	TEST_FAIL(kstrtoull, unsigned long long, "%llu", test_ull_fail);
+}
+
+static void __init test_kstrtoll_ok(void)
+{
+	DECLARE_TEST_OK(long long, struct test_ll);
+	static DEFINE_TEST_OK(struct test_ll, test_ll_ok) = {
+		{"0",	10,	0LL},
+		{"1",	10,	1LL},
+		{"127",	10,	127LL},
+		{"128",	10,	128LL},
+		{"129",	10,	129LL},
+		{"255",	10,	255LL},
+		{"256",	10,	256LL},
+		{"257",	10,	257LL},
+		{"32767",	10,	32767LL},
+		{"32768",	10,	32768LL},
+		{"32769",	10,	32769LL},
+		{"65535",	10,	65535LL},
+		{"65536",	10,	65536LL},
+		{"65537",	10,	65537LL},
+		{"2147483647",	10,	2147483647LL},
+		{"2147483648",	10,	2147483648LL},
+		{"2147483649",	10,	2147483649LL},
+		{"4294967295",	10,	4294967295LL},
+		{"4294967296",	10,	4294967296LL},
+		{"4294967297",	10,	4294967297LL},
+		{"9223372036854775807",	10,	9223372036854775807LL},
+
+		{"-1",	10,	-1LL},
+		{"-2",	10,	-2LL},
+		{"-9223372036854775808",	10,	LLONG_MIN},
+	};
+	TEST_OK(kstrtoll, long long, "%lld", test_ll_ok);
+}
+
+static void __init test_kstrtoll_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_ll_fail) = {
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"-9223372036854775809",	10},
+		{"-18446744073709551614",	10},
+		{"-18446744073709551615",	10},
+		/* negative zero isn't an integer in Linux */
+		{"-0",	0},
+		{"-0",	8},
+		{"-0",	10},
+		{"-0",	16},
+		/* sign is first character if any */
+		{"-+1", 0},
+		{"-+1", 8},
+		{"-+1", 10},
+		{"-+1", 16},
+	};
+	TEST_FAIL(kstrtoll, long long, "%lld", test_ll_fail);
+}
+
+static void __init test_kstrtou64_ok(void)
+{
+	DECLARE_TEST_OK(u64, struct test_u64);
+	static DEFINE_TEST_OK(struct test_u64, test_u64_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648},
+		{"2147483649",	10,	2147483649},
+		{"4294967294",	10,	4294967294},
+		{"4294967295",	10,	4294967295},
+		{"4294967296",	10,	4294967296},
+		{"4294967297",	10,	4294967297},
+		{"9223372036854775806",	10,	9223372036854775806ULL},
+		{"9223372036854775807",	10,	9223372036854775807ULL},
+		{"9223372036854775808",	10,	9223372036854775808ULL},
+		{"9223372036854775809",	10,	9223372036854775809ULL},
+		{"18446744073709551614",	10,	18446744073709551614ULL},
+		{"18446744073709551615",	10,	18446744073709551615ULL},
+	};
+	TEST_OK(kstrtou64, u64, "%llu", test_u64_ok);
+}
+
+static void __init test_kstrtou64_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u64_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou64, u64, "%llu", test_u64_fail);
+}
+
+static void __init test_kstrtos64_ok(void)
+{
+	DECLARE_TEST_OK(s64, struct test_s64);
+	static DEFINE_TEST_OK(struct test_s64, test_s64_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648},
+		{"2147483649",	10,	2147483649},
+		{"4294967294",	10,	4294967294},
+		{"4294967295",	10,	4294967295},
+		{"4294967296",	10,	4294967296},
+		{"4294967297",	10,	4294967297},
+		{"9223372036854775806",	10,	9223372036854775806LL},
+		{"9223372036854775807",	10,	9223372036854775807LL},
+	};
+	TEST_OK(kstrtos64, s64, "%lld", test_s64_ok);
+}
+
+static void __init test_kstrtos64_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s64_fail) = {
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos64, s64, "%lld", test_s64_fail);
+}
+
+static void __init test_kstrtou32_ok(void)
+{
+	DECLARE_TEST_OK(u32, struct test_u32);
+	static DEFINE_TEST_OK(struct test_u32, test_u32_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+		{"2147483648",	10,	2147483648},
+		{"2147483649",	10,	2147483649},
+		{"4294967294",	10,	4294967294},
+		{"4294967295",	10,	4294967295},
+	};
+	TEST_OK(kstrtou32, u32, "%u", test_u32_ok);
+}
+
+static void __init test_kstrtou32_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u32_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou32, u32, "%u", test_u32_fail);
+}
+
+static void __init test_kstrtos32_ok(void)
+{
+	DECLARE_TEST_OK(s32, struct test_s32);
+	static DEFINE_TEST_OK(struct test_s32, test_s32_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+		{"65536",	10,	65536},
+		{"65537",	10,	65537},
+		{"2147483646",	10,	2147483646},
+		{"2147483647",	10,	2147483647},
+	};
+	TEST_OK(kstrtos32, s32, "%d", test_s32_ok);
+}
+
+static void __init test_kstrtos32_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s32_fail) = {
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos32, s32, "%d", test_s32_fail);
+}
+
+static void __init test_kstrtou16_ok(void)
+{
+	DECLARE_TEST_OK(u16, struct test_u16);
+	static DEFINE_TEST_OK(struct test_u16, test_u16_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+		{"32768",	10,	32768},
+		{"32769",	10,	32769},
+		{"65534",	10,	65534},
+		{"65535",	10,	65535},
+	};
+	TEST_OK(kstrtou16, u16, "%hu", test_u16_ok);
+}
+
+static void __init test_kstrtou16_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u16_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou16, u16, "%hu", test_u16_fail);
+}
+
+static void __init test_kstrtos16_ok(void)
+{
+	DECLARE_TEST_OK(s16, struct test_s16);
+	static DEFINE_TEST_OK(struct test_s16, test_s16_ok) = {
+		{"-130",	10,	-130},
+		{"-129",	10,	-129},
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+		{"256",	10,	256},
+		{"257",	10,	257},
+		{"32766",	10,	32766},
+		{"32767",	10,	32767},
+	};
+	TEST_OK(kstrtos16, s16, "%hd", test_s16_ok);
+}
+
+static void __init test_kstrtos16_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s16_fail) = {
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos16, s16, "%hd", test_s16_fail);
+}
+
+static void __init test_kstrtou8_ok(void)
+{
+	DECLARE_TEST_OK(u8, struct test_u8);
+	static DEFINE_TEST_OK(struct test_u8, test_u8_ok) = {
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+		{"128",	10,	128},
+		{"129",	10,	129},
+		{"254",	10,	254},
+		{"255",	10,	255},
+	};
+	TEST_OK(kstrtou8, u8, "%hhu", test_u8_ok);
+}
+
+static void __init test_kstrtou8_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_u8_fail) = {
+		{"-2",	10},
+		{"-1",	10},
+		{"256",	10},
+		{"257",	10},
+		{"32766",	10},
+		{"32767",	10},
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtou8, u8, "%hhu", test_u8_fail);
+}
+
+static void __init test_kstrtos8_ok(void)
+{
+	DECLARE_TEST_OK(s8, struct test_s8);
+	static DEFINE_TEST_OK(struct test_s8, test_s8_ok) = {
+		{"-128",	10,	-128},
+		{"-127",	10,	-127},
+		{"-1",	10,	-1},
+		{"0",	10,	0},
+		{"1",	10,	1},
+		{"126",	10,	126},
+		{"127",	10,	127},
+	};
+	TEST_OK(kstrtos8, s8, "%hhd", test_s8_ok);
+}
+
+static void __init test_kstrtos8_fail(void)
+{
+	static DEFINE_TEST_FAIL(test_s8_fail) = {
+		{"-130",	10},
+		{"-129",	10},
+		{"128",	10},
+		{"129",	10},
+		{"254",	10},
+		{"255",	10},
+		{"256",	10},
+		{"257",	10},
+		{"32766",	10},
+		{"32767",	10},
+		{"32768",	10},
+		{"32769",	10},
+		{"65534",	10},
+		{"65535",	10},
+		{"65536",	10},
+		{"65537",	10},
+		{"2147483646",	10},
+		{"2147483647",	10},
+		{"2147483648",	10},
+		{"2147483649",	10},
+		{"4294967294",	10},
+		{"4294967295",	10},
+		{"4294967296",	10},
+		{"4294967297",	10},
+		{"9223372036854775806",	10},
+		{"9223372036854775807",	10},
+		{"9223372036854775808",	10},
+		{"9223372036854775809",	10},
+		{"18446744073709551614",	10},
+		{"18446744073709551615",	10},
+		{"18446744073709551616",	10},
+		{"18446744073709551617",	10},
+	};
+	TEST_FAIL(kstrtos8, s8, "%hhd", test_s8_fail);
+}
+
+static int __init test_kstrtox_init(void)
+{
+	test_kstrtoull_ok();
+	test_kstrtoull_fail();
+	test_kstrtoll_ok();
+	test_kstrtoll_fail();
+
+	test_kstrtou64_ok();
+	test_kstrtou64_fail();
+	test_kstrtos64_ok();
+	test_kstrtos64_fail();
+
+	test_kstrtou32_ok();
+	test_kstrtou32_fail();
+	test_kstrtos32_ok();
+	test_kstrtos32_fail();
+
+	test_kstrtou16_ok();
+	test_kstrtou16_fail();
+	test_kstrtos16_ok();
+	test_kstrtos16_fail();
+
+	test_kstrtou8_ok();
+	test_kstrtou8_fail();
+	test_kstrtos8_ok();
+	test_kstrtos8_fail();
+	return -EINVAL;
+}
+module_init(test_kstrtox_init);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index ac444ff..02bcdd5 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -120,147 +120,6 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtoll);
 
-/**
- * strict_strtoul - convert a string to an unsigned long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoul converts a string to an unsigned long only if the
- * string is really an unsigned long string, any string containing
- * any invalid char at the tail will be rejected and -EINVAL is returned,
- * only a newline char at the tail is acceptible because people generally
- * change a module parameter in the following way:
- *
- * 	echo 1024 > /sys/module/e1000/parameters/copybreak
- *
- * echo will append a newline to the tail.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- *
- * simple_strtoul just ignores the successive invalid characters and
- * return the converted value of prefix part of the string.
- */
-int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
-{
-	char *tail;
-	unsigned long val;
-
-	*res = 0;
-	if (!*cp)
-		return -EINVAL;
-
-	val = simple_strtoul(cp, &tail, base);
-	if (tail == cp)
-		return -EINVAL;
-
-	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-		*res = val;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(strict_strtoul);
-
-/**
- * strict_strtol - convert a string to a long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtol is similiar to strict_strtoul, but it allows the first
- * character of a string is '-'.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- */
-int strict_strtol(const char *cp, unsigned int base, long *res)
-{
-	int ret;
-	if (*cp == '-') {
-		ret = strict_strtoul(cp + 1, base, (unsigned long *)res);
-		if (!ret)
-			*res = -(*res);
-	} else {
-		ret = strict_strtoul(cp, base, (unsigned long *)res);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(strict_strtol);
-
-/**
- * strict_strtoull - convert a string to an unsigned long long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoull converts a string to an unsigned long long only if the
- * string is really an unsigned long long string, any string containing
- * any invalid char at the tail will be rejected and -EINVAL is returned,
- * only a newline char at the tail is acceptible because people generally
- * change a module parameter in the following way:
- *
- * 	echo 1024 > /sys/module/e1000/parameters/copybreak
- *
- * echo will append a newline to the tail of the string.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- *
- * simple_strtoull just ignores the successive invalid characters and
- * return the converted value of prefix part of the string.
- */
-int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
-{
-	char *tail;
-	unsigned long long val;
-
-	*res = 0;
-	if (!*cp)
-		return -EINVAL;
-
-	val = simple_strtoull(cp, &tail, base);
-	if (tail == cp)
-		return -EINVAL;
-	if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
-		*res = val;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL(strict_strtoull);
-
-/**
- * strict_strtoll - convert a string to a long long strictly
- * @cp: The string to be converted
- * @base: The number base to use
- * @res: The converted result value
- *
- * strict_strtoll is similiar to strict_strtoull, but it allows the first
- * character of a string is '-'.
- *
- * It returns 0 if conversion is successful and *res is set to the converted
- * value, otherwise it returns -EINVAL and *res is set to 0.
- */
-int strict_strtoll(const char *cp, unsigned int base, long long *res)
-{
-	int ret;
-	if (*cp == '-') {
-		ret = strict_strtoull(cp + 1, base, (unsigned long long *)res);
-		if (!ret)
-			*res = -(*res);
-	} else {
-		ret = strict_strtoull(cp, base, (unsigned long long *)res);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(strict_strtoll);
-
 static noinline_for_stack
 int skip_atoi(const char **s)
 {
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 58848e3..dfd9b31 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2804,9 +2804,9 @@ sub process {
 			WARN("consider using a completion\n" . $herecurr);
 
 		}
-# recommend strict_strto* over simple_strto*
+# recommend kstrto* over simple_strto*
 		if ($line =~ /\bsimple_(strto.*?)\s*\(/) {
-			WARN("consider using strict_$1 in preference to simple_$1\n" . $herecurr);
+			WARN("consider using kstrto* in preference to simple_$1\n" . $herecurr);
 		}
 # check for __initcall(), use device_initcall() explicitly please
 		if ($line =~ /^.\s*__initcall\s*\(/) {
-- 
cgit v0.10.2


From e359dc24d32e58c795fc339cb3e89ea6330fceae Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Mar 2011 16:34:40 -0700
Subject: sigma-firmware: loader for Analog Devices' SigmaStudio

Analog Devices' SigmaStudio can produce firmware blobs for devices with
these DSPs embedded (like some audio codecs).  Allow these device drivers
to easily parse and load them.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 3c56afc..b3a25a5 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -145,4 +145,16 @@ config ISCSI_IBFT
 	  detect iSCSI boot parameters dynamically during system boot, say Y.
 	  Otherwise, say N.
 
+config SIGMA
+	tristate "SigmaStudio firmware loader"
+	depends on I2C
+	select CRC32
+	default n
+	help
+	  Enable helper functions for working with Analog Devices SigmaDSP
+	  parts and binary firmwares produced by Analog Devices SigmaStudio.
+
+	  If unsure, say N here.  Drivers that need these helpers will select
+	  this option automatically.
+
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 20c17fc..00bb0b8 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_DMIID)		+= dmi-id.o
 obj-$(CONFIG_ISCSI_IBFT_FIND)	+= iscsi_ibft_find.o
 obj-$(CONFIG_ISCSI_IBFT)	+= iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)	+= memmap.o
+obj-$(CONFIG_SIGMA)		+= sigma.o
diff --git a/drivers/firmware/sigma.c b/drivers/firmware/sigma.c
new file mode 100644
index 0000000..c19cd2c
--- /dev/null
+++ b/drivers/firmware/sigma.c
@@ -0,0 +1,115 @@
+/*
+ * Load Analog Devices SigmaStudio firmware files
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <linux/sigma.h>
+
+/* Return: 0==OK, <0==error, =1 ==no more actions */
+static int
+process_sigma_action(struct i2c_client *client, struct sigma_firmware *ssfw)
+{
+	struct sigma_action *sa = (void *)(ssfw->fw->data + ssfw->pos);
+	size_t len = sigma_action_len(sa);
+	int ret = 0;
+
+	pr_debug("%s: instr:%i addr:%#x len:%zu\n", __func__,
+		sa->instr, sa->addr, len);
+
+	switch (sa->instr) {
+	case SIGMA_ACTION_WRITEXBYTES:
+	case SIGMA_ACTION_WRITESINGLE:
+	case SIGMA_ACTION_WRITESAFELOAD:
+		if (ssfw->fw->size < ssfw->pos + len)
+			return -EINVAL;
+		ret = i2c_master_send(client, (void *)&sa->addr, len);
+		if (ret < 0)
+			return -EINVAL;
+		break;
+
+	case SIGMA_ACTION_DELAY:
+		ret = 0;
+		udelay(len);
+		len = 0;
+		break;
+
+	case SIGMA_ACTION_END:
+		return 1;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* when arrive here ret=0 or sent data */
+	ssfw->pos += sigma_action_size(sa, len);
+	return ssfw->pos == ssfw->fw->size;
+}
+
+static int
+process_sigma_actions(struct i2c_client *client, struct sigma_firmware *ssfw)
+{
+	pr_debug("%s: processing %p\n", __func__, ssfw);
+
+	while (1) {
+		int ret = process_sigma_action(client, ssfw);
+		pr_debug("%s: action returned %i\n", __func__, ret);
+		if (ret == 1)
+			return 0;
+		else if (ret)
+			return ret;
+	}
+}
+
+int process_sigma_firmware(struct i2c_client *client, const char *name)
+{
+	int ret;
+	struct sigma_firmware_header *ssfw_head;
+	struct sigma_firmware ssfw;
+	const struct firmware *fw;
+	u32 crc;
+
+	pr_debug("%s: loading firmware %s\n", __func__, name);
+
+	/* first load the blob */
+	ret = request_firmware(&fw, name, &client->dev);
+	if (ret) {
+		pr_debug("%s: request_firmware() failed with %i\n", __func__, ret);
+		return ret;
+	}
+	ssfw.fw = fw;
+
+	/* then verify the header */
+	ret = -EINVAL;
+	if (fw->size < sizeof(*ssfw_head))
+		goto done;
+
+	ssfw_head = (void *)fw->data;
+	if (memcmp(ssfw_head->magic, SIGMA_MAGIC, ARRAY_SIZE(ssfw_head->magic)))
+		goto done;
+
+	crc = crc32(0, fw->data, fw->size);
+	pr_debug("%s: crc=%x\n", __func__, crc);
+	if (crc != ssfw_head->crc)
+		goto done;
+
+	ssfw.pos = sizeof(*ssfw_head);
+
+	/* finally process all of the actions */
+	ret = process_sigma_actions(client, &ssfw);
+
+ done:
+	release_firmware(fw);
+
+	pr_debug("%s: loaded %s\n", __func__, name);
+
+	return ret;
+}
+EXPORT_SYMBOL(process_sigma_firmware);
diff --git a/include/linux/sigma.h b/include/linux/sigma.h
new file mode 100644
index 0000000..e2accb3
--- /dev/null
+++ b/include/linux/sigma.h
@@ -0,0 +1,60 @@
+/*
+ * Load firmware files from Analog Devices SigmaStudio
+ *
+ * Copyright 2009-2011 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __SIGMA_FIRMWARE_H__
+#define __SIGMA_FIRMWARE_H__
+
+#include <linux/firmware.h>
+#include <linux/types.h>
+
+struct i2c_client;
+
+#define SIGMA_MAGIC "ADISIGM"
+
+struct sigma_firmware {
+	const struct firmware *fw;
+	size_t pos;
+};
+
+struct sigma_firmware_header {
+	unsigned char magic[7];
+	u8 version;
+	u32 crc;
+};
+
+enum {
+	SIGMA_ACTION_WRITEXBYTES = 0,
+	SIGMA_ACTION_WRITESINGLE,
+	SIGMA_ACTION_WRITESAFELOAD,
+	SIGMA_ACTION_DELAY,
+	SIGMA_ACTION_PLLWAIT,
+	SIGMA_ACTION_NOOP,
+	SIGMA_ACTION_END,
+};
+
+struct sigma_action {
+	u8 instr;
+	u8 len_hi;
+	u16 len;
+	u16 addr;
+	unsigned char payload[];
+};
+
+static inline u32 sigma_action_len(struct sigma_action *sa)
+{
+	return (sa->len_hi << 16) | sa->len;
+}
+
+static inline size_t sigma_action_size(struct sigma_action *sa, u32 payload_len)
+{
+	return sizeof(*sa) + payload_len + (payload_len % 2);
+}
+
+extern int process_sigma_firmware(struct i2c_client *client, const char *name);
+
+#endif
-- 
cgit v0.10.2


From 2092014df6e62b3ddfe8b6199da98cacfc055e23 Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Tue, 22 Mar 2011 16:34:41 -0700
Subject: drivers/mmc/host/omap.c: use resource_size()

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 379d2ff..2e032f0 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1417,7 +1417,7 @@ static int __init mmc_omap_probe(struct platform_device *pdev)
 	if (res == NULL || irq < 0)
 		return -ENXIO;
 
-	res = request_mem_region(res->start, res->end - res->start + 1,
+	res = request_mem_region(res->start, resource_size(res),
 				 pdev->name);
 	if (res == NULL)
 		return -EBUSY;
@@ -1457,7 +1457,7 @@ static int __init mmc_omap_probe(struct platform_device *pdev)
 
 	host->irq = irq;
 	host->phys_base = host->mem_res->start;
-	host->virt_base = ioremap(res->start, res->end - res->start + 1);
+	host->virt_base = ioremap(res->start, resource_size(res));
 	if (!host->virt_base)
 		goto err_ioremap;
 
@@ -1514,7 +1514,7 @@ err_free_mmc_host:
 err_ioremap:
 	kfree(host);
 err_free_mem_region:
-	release_mem_region(res->start, res->end - res->start + 1);
+	release_mem_region(res->start, resource_size(res));
 	return ret;
 }
 
-- 
cgit v0.10.2


From 984b203a7237d8d32090b48113c18eb8f824a2be Mon Sep 17 00:00:00 2001
From: Chris Ball <cjb@laptop.org>
Date: Tue, 22 Mar 2011 16:34:42 -0700
Subject: drivers/mmc/host/omap_hsmmc.c: use resource_size()

Use resource_size().

Signed-off-by: Chris Ball <cjb@laptop.org>
Cc: Madhusudhan Chikkature <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 158c0ee..259ece0 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2047,8 +2047,7 @@ static int __init omap_hsmmc_probe(struct platform_device *pdev)
 
 	res->start += pdata->reg_offset;
 	res->end += pdata->reg_offset;
-	res = request_mem_region(res->start, res->end - res->start + 1,
-							pdev->name);
+	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (res == NULL)
 		return -EBUSY;
 
@@ -2287,7 +2286,7 @@ err1:
 err_alloc:
 	omap_hsmmc_gpio_free(pdata);
 err:
-	release_mem_region(res->start, res->end - res->start + 1);
+	release_mem_region(res->start, resource_size(res));
 	return ret;
 }
 
@@ -2324,7 +2323,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res)
-		release_mem_region(res->start, res->end - res->start + 1);
+		release_mem_region(res->start, resource_size(res));
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
-- 
cgit v0.10.2


From b0781216e7bff68aca2fbcd275b4db7531d1e22f Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Mar 2011 16:34:43 -0700
Subject: scripts/checkpatch.pl: reset rpt_cleaners warnings

If you run checkpatch against multiple patches, and one of them has a
whitespace issue which can be helped via a script (rpt_cleaners), you will
see the same NOTE over and over for all subsequent patches.  It makes it
seem like those patches also have whitespace problems when in reality,
there's only one or two bad apples.

So reset rpt_cleaners back to 0 after we've issued the note so that it
only shows up near the patch with the actual problems.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index dfd9b31..19abef2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2944,6 +2944,7 @@ sub process {
 		if ($rpt_cleaners) {
 			print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n";
 			print "      scripts/cleanfile\n\n";
+			$rpt_cleaners = 0;
 		}
 	}
 
-- 
cgit v0.10.2


From 309c00c73f053a905d144b430d4fb55d811085e2 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Tue, 22 Mar 2011 16:34:44 -0700
Subject: checkpatch: warn about memset with swapped arguments

Because the second and third arguments of memset have the same type, it
turns out to be really easy to mix them up.

This bug comes up time after time, so checkpatch should really be checking
for it at patch submission time.

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 19abef2..8f9e394 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2902,6 +2902,11 @@ sub process {
 		    $line =~ /DEVICE_ATTR.*S_IWUGO/ ) {
 			WARN("Exporting world writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr);
 		}
+
+		# Check for memset with swapped arguments
+		if ($line =~ /memset.*\,(\ |)(0x|)0(\ |0|)\);/) {
+			ERROR("memset size is 3rd argument, not the second.\n" . $herecurr);
+		}
 	}
 
 	# If we have no input at all, then there is nothing to report on
-- 
cgit v0.10.2


From d03e1617f089c0bcbc22b9d4739e04a0b43b14fa Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 22 Mar 2011 16:34:45 -0700
Subject: crc32: add missed brackets in macro

Add brackets around typecasted argument in crc32() macro.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index e20dd1f..391a259 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -11,7 +11,7 @@
 extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
 extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
 
-#define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)data, length)
+#define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
 /*
  * Helpers for hash table generation of ethernet nics:
-- 
cgit v0.10.2


From 3fb0e584a68cd1c5085e69be441f2ad032aaee72 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 22 Mar 2011 16:34:46 -0700
Subject: epoll: move ready event check into proper inline

Move the event readiness check into a proper inline, and use it uniformly
inside ep_poll() code.  Events in the ->ovflist are no less ready than the
ones in ->rdllist.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Shawn Bohrer <shawn.bohrer@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ff12f7a..57298d0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -316,6 +316,19 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
 }
 
 /**
+ * ep_events_available - Checks if ready events might be available.
+ *
+ * @ep: Pointer to the eventpoll context.
+ *
+ * Returns: Returns a value different than zero if ready events are available,
+ *          or zero otherwise.
+ */
+static inline int ep_events_available(struct eventpoll *ep)
+{
+	return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+}
+
+/**
  * ep_call_nested - Perform a bound (possibly) nested call, by checking
  *                  that the recursion limit is not exceeded, and that
  *                  the same nested call (by the meaning of same cookie) is
@@ -1158,7 +1171,7 @@ retry:
 	spin_lock_irqsave(&ep->lock, flags);
 
 	res = 0;
-	if (list_empty(&ep->rdllist)) {
+	if (!ep_events_available(ep)) {
 		/*
 		 * We don't have any available event to return to the caller.
 		 * We need to sleep here, and we will be wake up by
@@ -1174,7 +1187,7 @@ retry:
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (!list_empty(&ep->rdllist) || timed_out)
+			if (ep_events_available(ep) || timed_out)
 				break;
 			if (signal_pending(current)) {
 				res = -EINTR;
@@ -1192,7 +1205,7 @@ retry:
 		set_current_state(TASK_RUNNING);
 	}
 	/* Is it worth to try to dig for events ? */
-	eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
+	eavail = ep_events_available(ep);
 
 	spin_unlock_irqrestore(&ep->lock, flags);
 
-- 
cgit v0.10.2


From f4d93ad74c18143abd3067ca3c8ffba7d00addf4 Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <shawn.bohrer@gmail.com>
Date: Tue, 22 Mar 2011 16:34:47 -0700
Subject: epoll: fix compiler warning and optimize the non-blocking path

Add a comment to ep_poll(), rename labels a bit clearly, fix a warning of
unused variable from gcc and optimize the non-blocking path a little.

Hinted-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>

hannes@cmpxchg.org:

: The non-blocking ep_poll path optimization introduced skipping over the
: return value setup.
:
: Initialize it properly, my userspace gets upset by epoll_wait() returning
: random things.
:
: In addition, remove the reinitialization at the fetch_events label, the
: return value is garuanteed to be zero when execution reaches there.

[hannes@cmpxchg.org: fix initialization]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shawn Bohrer <shawn.bohrer@gmail.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 57298d0..ed38801 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1148,12 +1148,29 @@ static inline struct timespec ep_set_mstimeout(long ms)
 	return timespec_add_safe(now, ts);
 }
 
+/**
+ * ep_poll - Retrieves ready events, and delivers them to the caller supplied
+ *           event buffer.
+ *
+ * @ep: Pointer to the eventpoll context.
+ * @events: Pointer to the userspace buffer where the ready events should be
+ *          stored.
+ * @maxevents: Size (in terms of number of events) of the caller event buffer.
+ * @timeout: Maximum timeout for the ready events fetch operation, in
+ *           milliseconds. If the @timeout is zero, the function will not block,
+ *           while if the @timeout is less than zero, the function will block
+ *           until at least one event has been retrieved (or an error
+ *           occurred).
+ *
+ * Returns: Returns the number of ready events which have been fetched, or an
+ *          error code, in case of error.
+ */
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		   int maxevents, long timeout)
 {
-	int res, eavail, timed_out = 0;
+	int res = 0, eavail, timed_out = 0;
 	unsigned long flags;
-	long slack;
+	long slack = 0;
 	wait_queue_t wait;
 	ktime_t expires, *to = NULL;
 
@@ -1164,13 +1181,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		to = &expires;
 		*to = timespec_to_ktime(end_time);
 	} else if (timeout == 0) {
+		/*
+		 * Avoid the unnecessary trip to the wait queue loop, if the
+		 * caller specified a non blocking operation.
+		 */
 		timed_out = 1;
+		spin_lock_irqsave(&ep->lock, flags);
+		goto check_events;
 	}
 
-retry:
+fetch_events:
 	spin_lock_irqsave(&ep->lock, flags);
 
-	res = 0;
 	if (!ep_events_available(ep)) {
 		/*
 		 * We don't have any available event to return to the caller.
@@ -1204,6 +1226,7 @@ retry:
 
 		set_current_state(TASK_RUNNING);
 	}
+check_events:
 	/* Is it worth to try to dig for events ? */
 	eavail = ep_events_available(ep);
 
@@ -1216,7 +1239,7 @@ retry:
 	 */
 	if (!res && eavail &&
 	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
-		goto retry;
+		goto fetch_events;
 
 	return res;
 }
-- 
cgit v0.10.2


From 1a530a6f23f7dca336311ef60c9ca26f3dc63688 Mon Sep 17 00:00:00 2001
From: David Daney <ddaney@caviumnetworks.com>
Date: Tue, 22 Mar 2011 16:34:48 -0700
Subject: binfmt_elf: quiet GCC-4.6 'set but not used' warning in
 load_elf_binary()

With GCC-4.6 we get warnings about things being 'set but not used'.

In load_elf_binary() this can happen with reloc_func_desc if ELF_PLAT_INIT
is defined, but doesn't use the reloc_func_desc argument.

Quiet the warning/error by marking reloc_func_desc as __maybe_unused.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d5b640b..b2fae009 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -570,7 +570,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	unsigned long elf_entry;
 	unsigned long interp_load_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
-	unsigned long reloc_func_desc = 0;
+	unsigned long reloc_func_desc __maybe_unused = 0;
 	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
 	struct {
-- 
cgit v0.10.2


From ea611b2699b51a762ef03f805f9616e65d98f68e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@gnu.org>
Date: Tue, 22 Mar 2011 16:34:49 -0700
Subject: init: return proper error code in do_mounts_rd()

In do_mounts_rd() if memory cannot be allocated, return -ENOMEM.

Signed-off-by: Davidlohr Bueso <dave@gnu.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 6e1ee69..fe9acb0 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -64,7 +64,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
 
 	buf = kmalloc(size, GFP_KERNEL);
 	if (!buf)
-		return -1;
+		return -ENOMEM;
 
 	minixsb = (struct minix_super_block *) buf;
 	ext2sb = (struct ext2_super_block *) buf;
-- 
cgit v0.10.2


From bc96ba7414ca4456661d0873856e0f363d32ba67 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 22 Mar 2011 16:34:52 -0700
Subject: rtc: convert DS1374 to dev_pm_ops

There is a general move to replace bus-specific PM ops with dev_pm_ops in
order to facilitate core improvements. Do this conversion for DS1374.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: john stultz <johnstul@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index d834a63..e6e71de 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -25,6 +25,7 @@
 #include <linux/bcd.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
+#include <linux/pm.h>
 
 #define DS1374_REG_TOD0		0x00 /* Time of Day */
 #define DS1374_REG_TOD1		0x01
@@ -409,32 +410,38 @@ static int __devexit ds1374_remove(struct i2c_client *client)
 }
 
 #ifdef CONFIG_PM
-static int ds1374_suspend(struct i2c_client *client, pm_message_t state)
+static int ds1374_suspend(struct device *dev)
 {
+	struct i2c_client *client = to_i2c_client(dev);
+
 	if (client->irq >= 0 && device_may_wakeup(&client->dev))
 		enable_irq_wake(client->irq);
 	return 0;
 }
 
-static int ds1374_resume(struct i2c_client *client)
+static int ds1374_resume(struct device *dev)
 {
+	struct i2c_client *client = to_i2c_client(dev);
+
 	if (client->irq >= 0 && device_may_wakeup(&client->dev))
 		disable_irq_wake(client->irq);
 	return 0;
 }
+
+static SIMPLE_DEV_PM_OPS(ds1374_pm, ds1374_suspend, ds1374_resume);
+
+#define DS1374_PM (&ds1374_pm)
 #else
-#define ds1374_suspend	NULL
-#define ds1374_resume	NULL
+#define DS1374_PM NULL
 #endif
 
 static struct i2c_driver ds1374_driver = {
 	.driver = {
 		.name = "rtc-ds1374",
 		.owner = THIS_MODULE,
+		.pm = DS1374_PM,
 	},
 	.probe = ds1374_probe,
-	.suspend = ds1374_suspend,
-	.resume = ds1374_resume,
 	.remove = __devexit_p(ds1374_remove),
 	.id_table = ds1374_id,
 };
-- 
cgit v0.10.2


From cf044f0ed526752b8c2aaae748220759608b3fc8 Mon Sep 17 00:00:00 2001
From: Ryan Mallon <ryan@bluewatersys.com>
Date: Tue, 22 Mar 2011 16:34:53 -0700
Subject: drivers/rtc/rtc-isl1208.c: add alarm support

Add alarm/wakeup support to rtc isl1208 driver

Signed-off-by: Ryan Mallon <ryan@bluewatersys.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 468200c..da8beb8 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -39,6 +39,8 @@
 #define ISL1208_REG_SR_BAT     (1<<1)	/* battery */
 #define ISL1208_REG_SR_RTCF    (1<<0)	/* rtc fail */
 #define ISL1208_REG_INT 0x08
+#define ISL1208_REG_INT_ALME   (1<<6)   /* alarm enable */
+#define ISL1208_REG_INT_IM     (1<<7)   /* interrupt/alarm mode */
 #define ISL1208_REG_09  0x09	/* reserved */
 #define ISL1208_REG_ATR 0x0a
 #define ISL1208_REG_DTR 0x0b
@@ -202,6 +204,30 @@ isl1208_i2c_set_usr(struct i2c_client *client, u16 usr)
 }
 
 static int
+isl1208_rtc_toggle_alarm(struct i2c_client *client, int enable)
+{
+	int icr = i2c_smbus_read_byte_data(client, ISL1208_REG_INT);
+
+	if (icr < 0) {
+		dev_err(&client->dev, "%s: reading INT failed\n", __func__);
+		return icr;
+	}
+
+	if (enable)
+		icr |= ISL1208_REG_INT_ALME | ISL1208_REG_INT_IM;
+	else
+		icr &= ~(ISL1208_REG_INT_ALME | ISL1208_REG_INT_IM);
+
+	icr = i2c_smbus_write_byte_data(client, ISL1208_REG_INT, icr);
+	if (icr < 0) {
+		dev_err(&client->dev, "%s: writing INT failed\n", __func__);
+		return icr;
+	}
+
+	return 0;
+}
+
+static int
 isl1208_rtc_proc(struct device *dev, struct seq_file *seq)
 {
 	struct i2c_client *const client = to_i2c_client(dev);
@@ -288,9 +314,8 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
 {
 	struct rtc_time *const tm = &alarm->time;
 	u8 regs[ISL1208_ALARM_SECTION_LEN] = { 0, };
-	int sr;
+	int icr, yr, sr = isl1208_i2c_get_sr(client);
 
-	sr = isl1208_i2c_get_sr(client);
 	if (sr < 0) {
 		dev_err(&client->dev, "%s: reading SR failed\n", __func__);
 		return sr;
@@ -313,6 +338,73 @@ isl1208_i2c_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
 		bcd2bin(regs[ISL1208_REG_MOA - ISL1208_REG_SCA] & 0x1f) - 1;
 	tm->tm_wday = bcd2bin(regs[ISL1208_REG_DWA - ISL1208_REG_SCA] & 0x03);
 
+	/* The alarm doesn't store the year so get it from the rtc section */
+	yr = i2c_smbus_read_byte_data(client, ISL1208_REG_YR);
+	if (yr < 0) {
+		dev_err(&client->dev, "%s: reading RTC YR failed\n", __func__);
+		return yr;
+	}
+	tm->tm_year = bcd2bin(yr) + 100;
+
+	icr = i2c_smbus_read_byte_data(client, ISL1208_REG_INT);
+	if (icr < 0) {
+		dev_err(&client->dev, "%s: reading INT failed\n", __func__);
+		return icr;
+	}
+	alarm->enabled = !!(icr & ISL1208_REG_INT_ALME);
+
+	return 0;
+}
+
+static int
+isl1208_i2c_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alarm)
+{
+	struct rtc_time *alarm_tm = &alarm->time;
+	u8 regs[ISL1208_ALARM_SECTION_LEN] = { 0, };
+	const int offs = ISL1208_REG_SCA;
+	unsigned long rtc_secs, alarm_secs;
+	struct rtc_time rtc_tm;
+	int err, enable;
+
+	err = isl1208_i2c_read_time(client, &rtc_tm);
+	if (err)
+		return err;
+	err = rtc_tm_to_time(&rtc_tm, &rtc_secs);
+	if (err)
+		return err;
+	err = rtc_tm_to_time(alarm_tm, &alarm_secs);
+	if (err)
+		return err;
+
+	/* If the alarm time is before the current time disable the alarm */
+	if (!alarm->enabled || alarm_secs <= rtc_secs)
+		enable = 0x00;
+	else
+		enable = 0x80;
+
+	/* Program the alarm and enable it for each setting */
+	regs[ISL1208_REG_SCA - offs] = bin2bcd(alarm_tm->tm_sec) | enable;
+	regs[ISL1208_REG_MNA - offs] = bin2bcd(alarm_tm->tm_min) | enable;
+	regs[ISL1208_REG_HRA - offs] = bin2bcd(alarm_tm->tm_hour) |
+		ISL1208_REG_HR_MIL | enable;
+
+	regs[ISL1208_REG_DTA - offs] = bin2bcd(alarm_tm->tm_mday) | enable;
+	regs[ISL1208_REG_MOA - offs] = bin2bcd(alarm_tm->tm_mon + 1) | enable;
+	regs[ISL1208_REG_DWA - offs] = bin2bcd(alarm_tm->tm_wday & 7) | enable;
+
+	/* write ALARM registers */
+	err = isl1208_i2c_set_regs(client, offs, regs,
+				  ISL1208_ALARM_SECTION_LEN);
+	if (err < 0) {
+		dev_err(&client->dev, "%s: writing ALARM section failed\n",
+			__func__);
+		return err;
+	}
+
+	err = isl1208_rtc_toggle_alarm(client, enable);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -391,12 +483,63 @@ isl1208_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 	return isl1208_i2c_read_alarm(to_i2c_client(dev), alarm);
 }
 
+static int
+isl1208_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	return isl1208_i2c_set_alarm(to_i2c_client(dev), alarm);
+}
+
+static irqreturn_t
+isl1208_rtc_interrupt(int irq, void *data)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+	struct i2c_client *client = data;
+	int handled = 0, sr, err;
+
+	/*
+	 * I2C reads get NAK'ed if we read straight away after an interrupt?
+	 * Using a mdelay/msleep didn't seem to help either, so we work around
+	 * this by continually trying to read the register for a short time.
+	 */
+	while (1) {
+		sr = isl1208_i2c_get_sr(client);
+		if (sr >= 0)
+			break;
+
+		if (time_after(jiffies, timeout)) {
+			dev_err(&client->dev, "%s: reading SR failed\n",
+				__func__);
+			return sr;
+		}
+	}
+
+	if (sr & ISL1208_REG_SR_ALM) {
+		dev_dbg(&client->dev, "alarm!\n");
+
+		/* Clear the alarm */
+		sr &= ~ISL1208_REG_SR_ALM;
+		sr = i2c_smbus_write_byte_data(client, ISL1208_REG_SR, sr);
+		if (sr < 0)
+			dev_err(&client->dev, "%s: writing SR failed\n",
+				__func__);
+		else
+			handled = 1;
+
+		/* Disable the alarm */
+		err = isl1208_rtc_toggle_alarm(client, 0);
+		if (err)
+			return err;
+	}
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
 static const struct rtc_class_ops isl1208_rtc_ops = {
 	.proc = isl1208_rtc_proc,
 	.read_time = isl1208_rtc_read_time,
 	.set_time = isl1208_rtc_set_time,
 	.read_alarm = isl1208_rtc_read_alarm,
-	/*.set_alarm    = isl1208_rtc_set_alarm, */
+	.set_alarm = isl1208_rtc_set_alarm,
 };
 
 /* sysfs interface */
@@ -488,11 +631,29 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	dev_info(&client->dev,
 		 "chip found, driver version " DRV_VERSION "\n");
 
+	if (client->irq > 0) {
+		rc = request_threaded_irq(client->irq, NULL,
+					  isl1208_rtc_interrupt,
+					  IRQF_SHARED,
+					  isl1208_driver.driver.name, client);
+		if (!rc) {
+			device_init_wakeup(&client->dev, 1);
+			enable_irq_wake(client->irq);
+		} else {
+			dev_err(&client->dev,
+				"Unable to request irq %d, no alarm support\n",
+				client->irq);
+			client->irq = 0;
+		}
+	}
+
 	rtc = rtc_device_register(isl1208_driver.driver.name,
 				  &client->dev, &isl1208_rtc_ops,
 				  THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
+	if (IS_ERR(rtc)) {
+		rc = PTR_ERR(rtc);
+		goto exit_free_irq;
+	}
 
 	i2c_set_clientdata(client, rtc);
 
@@ -514,6 +675,9 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 exit_unregister:
 	rtc_device_unregister(rtc);
+exit_free_irq:
+	if (client->irq)
+		free_irq(client->irq, client);
 
 	return rc;
 }
@@ -525,6 +689,8 @@ isl1208_remove(struct i2c_client *client)
 
 	sysfs_remove_group(&client->dev.kobj, &isl1208_rtc_sysfs_files);
 	rtc_device_unregister(rtc);
+	if (client->irq)
+		free_irq(client->irq, client);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 49d50fb1c28738ef6bad0c2b87d5355a1653fed5 Mon Sep 17 00:00:00 2001
From: Vasiliy Kulikov <segoon@openwall.com>
Date: Tue, 22 Mar 2011 16:34:53 -0700
Subject: drivers/rtc/rtc-ds1511.c: world-writable sysfs nvram file

Don't allow everybogy to write to NVRAM.

Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Andy Sharp <andy.sharp@onstor.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 3fffd70..fbabc77 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -468,7 +468,7 @@ ds1511_nvram_write(struct file *filp, struct kobject *kobj,
 static struct bin_attribute ds1511_nvram_attr = {
 	.attr = {
 		.name = "nvram",
-		.mode = S_IRUGO | S_IWUGO,
+		.mode = S_IRUGO | S_IWUSR,
 	},
 	.size = DS1511_RAM_MAX,
 	.read = ds1511_nvram_read,
-- 
cgit v0.10.2


From ff859ba6d166202d6fd8d6344a41be54e4c8a2b6 Mon Sep 17 00:00:00 2001
From: Andrew Chew <achew@nvidia.com>
Date: Tue, 22 Mar 2011 16:34:55 -0700
Subject: rtc: add real-time clock driver for NVIDIA Tegra

This is a platform driver that supports the built-in real-time clock on
Tegra SOCs.

Signed-off-by: Andrew Chew <achew@nvidia.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Acked-by: Wan ZongShun <mcuos.com@gmail.com>
Acked-by: Jon Mayo <jmayo@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 4941cad..e187887 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -985,4 +985,14 @@ config RTC_DRV_LPC32XX
 	  This driver can also be buillt as a module. If so, the module
 	  will be called rtc-lpc32xx.
 
+config RTC_DRV_TEGRA
+	tristate "NVIDIA Tegra Internal RTC driver"
+	depends on RTC_CLASS && ARCH_TEGRA
+	help
+	  If you say yes here you get support for the
+	  Tegra 200 series internal RTC module.
+
+	  This drive can also be built as a module. If so, the module
+	  will be called rtc-tegra.
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index 5f6c383..ca91c3c 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_RTC_DRV_STARFIRE)	+= rtc-starfire.o
 obj-$(CONFIG_RTC_DRV_STK17TA8)	+= rtc-stk17ta8.o
 obj-$(CONFIG_RTC_DRV_STMP)	+= rtc-stmp3xxx.o
 obj-$(CONFIG_RTC_DRV_SUN4V)	+= rtc-sun4v.o
+obj-$(CONFIG_RTC_DRV_TEGRA)	+= rtc-tegra.o
 obj-$(CONFIG_RTC_DRV_TEST)	+= rtc-test.o
 obj-$(CONFIG_RTC_DRV_TWL4030)	+= rtc-twl.o
 obj-$(CONFIG_RTC_DRV_TX4939)	+= rtc-tx4939.o
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
new file mode 100644
index 0000000..2fc31aa
--- /dev/null
+++ b/drivers/rtc/rtc-tegra.c
@@ -0,0 +1,488 @@
+/*
+ * An RTC driver for the NVIDIA Tegra 200 series internal RTC.
+ *
+ * Copyright (c) 2010, NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+
+/* set to 1 = busy every eight 32kHz clocks during copy of sec+msec to AHB */
+#define TEGRA_RTC_REG_BUSY			0x004
+#define TEGRA_RTC_REG_SECONDS			0x008
+/* when msec is read, the seconds are buffered into shadow seconds. */
+#define TEGRA_RTC_REG_SHADOW_SECONDS		0x00c
+#define TEGRA_RTC_REG_MILLI_SECONDS		0x010
+#define TEGRA_RTC_REG_SECONDS_ALARM0		0x014
+#define TEGRA_RTC_REG_SECONDS_ALARM1		0x018
+#define TEGRA_RTC_REG_MILLI_SECONDS_ALARM0	0x01c
+#define TEGRA_RTC_REG_INTR_MASK			0x028
+/* write 1 bits to clear status bits */
+#define TEGRA_RTC_REG_INTR_STATUS		0x02c
+
+/* bits in INTR_MASK */
+#define TEGRA_RTC_INTR_MASK_MSEC_CDN_ALARM	(1<<4)
+#define TEGRA_RTC_INTR_MASK_SEC_CDN_ALARM	(1<<3)
+#define TEGRA_RTC_INTR_MASK_MSEC_ALARM		(1<<2)
+#define TEGRA_RTC_INTR_MASK_SEC_ALARM1		(1<<1)
+#define TEGRA_RTC_INTR_MASK_SEC_ALARM0		(1<<0)
+
+/* bits in INTR_STATUS */
+#define TEGRA_RTC_INTR_STATUS_MSEC_CDN_ALARM	(1<<4)
+#define TEGRA_RTC_INTR_STATUS_SEC_CDN_ALARM	(1<<3)
+#define TEGRA_RTC_INTR_STATUS_MSEC_ALARM	(1<<2)
+#define TEGRA_RTC_INTR_STATUS_SEC_ALARM1	(1<<1)
+#define TEGRA_RTC_INTR_STATUS_SEC_ALARM0	(1<<0)
+
+struct tegra_rtc_info {
+	struct platform_device	*pdev;
+	struct rtc_device	*rtc_dev;
+	void __iomem		*rtc_base; /* NULL if not initialized. */
+	int			tegra_rtc_irq; /* alarm and periodic irq */
+	spinlock_t		tegra_rtc_lock;
+};
+
+/* RTC hardware is busy when it is updating its values over AHB once
+ * every eight 32kHz clocks (~250uS).
+ * outside of these updates the CPU is free to write.
+ * CPU is always free to read.
+ */
+static inline u32 tegra_rtc_check_busy(struct tegra_rtc_info *info)
+{
+	return readl(info->rtc_base + TEGRA_RTC_REG_BUSY) & 1;
+}
+
+/* Wait for hardware to be ready for writing.
+ * This function tries to maximize the amount of time before the next update.
+ * It does this by waiting for the RTC to become busy with its periodic update,
+ * then returning once the RTC first becomes not busy.
+ * This periodic update (where the seconds and milliseconds are copied to the
+ * AHB side) occurs every eight 32kHz clocks (~250uS).
+ * The behavior of this function allows us to make some assumptions without
+ * introducing a race, because 250uS is plenty of time to read/write a value.
+ */
+static int tegra_rtc_wait_while_busy(struct device *dev)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+
+	int retries = 500; /* ~490 us is the worst case, ~250 us is best. */
+
+	/* first wait for the RTC to become busy. this is when it
+	 * posts its updated seconds+msec registers to AHB side. */
+	while (tegra_rtc_check_busy(info)) {
+		if (!retries--)
+			goto retry_failed;
+		udelay(1);
+	}
+
+	/* now we have about 250 us to manipulate registers */
+	return 0;
+
+retry_failed:
+	dev_err(dev, "write failed:retry count exceeded.\n");
+	return -ETIMEDOUT;
+}
+
+static int tegra_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned long sec, msec;
+	unsigned long sl_irq_flags;
+
+	/* RTC hardware copies seconds to shadow seconds when a read
+	 * of milliseconds occurs. use a lock to keep other threads out. */
+	spin_lock_irqsave(&info->tegra_rtc_lock, sl_irq_flags);
+
+	msec = readl(info->rtc_base + TEGRA_RTC_REG_MILLI_SECONDS);
+	sec = readl(info->rtc_base + TEGRA_RTC_REG_SHADOW_SECONDS);
+
+	spin_unlock_irqrestore(&info->tegra_rtc_lock, sl_irq_flags);
+
+	rtc_time_to_tm(sec, tm);
+
+	dev_vdbg(dev, "time read as %lu. %d/%d/%d %d:%02u:%02u\n",
+		sec,
+		tm->tm_mon + 1,
+		tm->tm_mday,
+		tm->tm_year + 1900,
+		tm->tm_hour,
+		tm->tm_min,
+		tm->tm_sec
+	);
+
+	return 0;
+}
+
+static int tegra_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned long sec;
+	int ret;
+
+	/* convert tm to seconds. */
+	ret = rtc_valid_tm(tm);
+	if (ret)
+		return ret;
+
+	rtc_tm_to_time(tm, &sec);
+
+	dev_vdbg(dev, "time set to %lu. %d/%d/%d %d:%02u:%02u\n",
+		sec,
+		tm->tm_mon+1,
+		tm->tm_mday,
+		tm->tm_year+1900,
+		tm->tm_hour,
+		tm->tm_min,
+		tm->tm_sec
+	);
+
+	/* seconds only written if wait succeeded. */
+	ret = tegra_rtc_wait_while_busy(dev);
+	if (!ret)
+		writel(sec, info->rtc_base + TEGRA_RTC_REG_SECONDS);
+
+	dev_vdbg(dev, "time read back as %d\n",
+		readl(info->rtc_base + TEGRA_RTC_REG_SECONDS));
+
+	return ret;
+}
+
+static int tegra_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned long sec;
+	unsigned tmp;
+
+	sec = readl(info->rtc_base + TEGRA_RTC_REG_SECONDS_ALARM0);
+
+	if (sec == 0) {
+		/* alarm is disabled. */
+		alarm->enabled = 0;
+		alarm->time.tm_mon = -1;
+		alarm->time.tm_mday = -1;
+		alarm->time.tm_year = -1;
+		alarm->time.tm_hour = -1;
+		alarm->time.tm_min = -1;
+		alarm->time.tm_sec = -1;
+	} else {
+		/* alarm is enabled. */
+		alarm->enabled = 1;
+		rtc_time_to_tm(sec, &alarm->time);
+	}
+
+	tmp = readl(info->rtc_base + TEGRA_RTC_REG_INTR_STATUS);
+	alarm->pending = (tmp & TEGRA_RTC_INTR_STATUS_SEC_ALARM0) != 0;
+
+	return 0;
+}
+
+static int tegra_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned status;
+	unsigned long sl_irq_flags;
+
+	tegra_rtc_wait_while_busy(dev);
+	spin_lock_irqsave(&info->tegra_rtc_lock, sl_irq_flags);
+
+	/* read the original value, and OR in the flag. */
+	status = readl(info->rtc_base + TEGRA_RTC_REG_INTR_MASK);
+	if (enabled)
+		status |= TEGRA_RTC_INTR_MASK_SEC_ALARM0; /* set it */
+	else
+		status &= ~TEGRA_RTC_INTR_MASK_SEC_ALARM0; /* clear it */
+
+	writel(status, info->rtc_base + TEGRA_RTC_REG_INTR_MASK);
+
+	spin_unlock_irqrestore(&info->tegra_rtc_lock, sl_irq_flags);
+
+	return 0;
+}
+
+static int tegra_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned long sec;
+
+	if (alarm->enabled)
+		rtc_tm_to_time(&alarm->time, &sec);
+	else
+		sec = 0;
+
+	tegra_rtc_wait_while_busy(dev);
+	writel(sec, info->rtc_base + TEGRA_RTC_REG_SECONDS_ALARM0);
+	dev_vdbg(dev, "alarm read back as %d\n",
+		readl(info->rtc_base + TEGRA_RTC_REG_SECONDS_ALARM0));
+
+	/* if successfully written and alarm is enabled ... */
+	if (sec) {
+		tegra_rtc_alarm_irq_enable(dev, 1);
+
+		dev_vdbg(dev, "alarm set as %lu. %d/%d/%d %d:%02u:%02u\n",
+			sec,
+			alarm->time.tm_mon+1,
+			alarm->time.tm_mday,
+			alarm->time.tm_year+1900,
+			alarm->time.tm_hour,
+			alarm->time.tm_min,
+			alarm->time.tm_sec);
+	} else {
+		/* disable alarm if 0 or write error. */
+		dev_vdbg(dev, "alarm disabled\n");
+		tegra_rtc_alarm_irq_enable(dev, 0);
+	}
+
+	return 0;
+}
+
+static int tegra_rtc_proc(struct device *dev, struct seq_file *seq)
+{
+	if (!dev || !dev->driver)
+		return 0;
+
+	return seq_printf(seq, "name\t\t: %s\n", dev_name(dev));
+}
+
+static irqreturn_t tegra_rtc_irq_handler(int irq, void *data)
+{
+	struct device *dev = data;
+	struct tegra_rtc_info *info = dev_get_drvdata(dev);
+	unsigned long events = 0;
+	unsigned status;
+	unsigned long sl_irq_flags;
+
+	status = readl(info->rtc_base + TEGRA_RTC_REG_INTR_STATUS);
+	if (status) {
+		/* clear the interrupt masks and status on any irq. */
+		tegra_rtc_wait_while_busy(dev);
+		spin_lock_irqsave(&info->tegra_rtc_lock, sl_irq_flags);
+		writel(0, info->rtc_base + TEGRA_RTC_REG_INTR_MASK);
+		writel(status, info->rtc_base + TEGRA_RTC_REG_INTR_STATUS);
+		spin_unlock_irqrestore(&info->tegra_rtc_lock, sl_irq_flags);
+	}
+
+	/* check if Alarm */
+	if ((status & TEGRA_RTC_INTR_STATUS_SEC_ALARM0))
+		events |= RTC_IRQF | RTC_AF;
+
+	/* check if Periodic */
+	if ((status & TEGRA_RTC_INTR_STATUS_SEC_CDN_ALARM))
+		events |= RTC_IRQF | RTC_PF;
+
+	rtc_update_irq(info->rtc_dev, 1, events);
+
+	return IRQ_HANDLED;
+}
+
+static struct rtc_class_ops tegra_rtc_ops = {
+	.read_time	= tegra_rtc_read_time,
+	.set_time	= tegra_rtc_set_time,
+	.read_alarm	= tegra_rtc_read_alarm,
+	.set_alarm	= tegra_rtc_set_alarm,
+	.proc		= tegra_rtc_proc,
+	.alarm_irq_enable = tegra_rtc_alarm_irq_enable,
+};
+
+static int __devinit tegra_rtc_probe(struct platform_device *pdev)
+{
+	struct tegra_rtc_info *info;
+	struct resource *res;
+	int ret;
+
+	info = kzalloc(sizeof(struct tegra_rtc_info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev,
+			"Unable to allocate resources for device.\n");
+		ret = -EBUSY;
+		goto err_free_info;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
+		dev_err(&pdev->dev,
+			"Unable to request mem region for device.\n");
+		ret = -EBUSY;
+		goto err_free_info;
+	}
+
+	info->tegra_rtc_irq = platform_get_irq(pdev, 0);
+	if (info->tegra_rtc_irq <= 0) {
+		ret = -EBUSY;
+		goto err_release_mem_region;
+	}
+
+	info->rtc_base = ioremap_nocache(res->start, resource_size(res));
+	if (!info->rtc_base) {
+		dev_err(&pdev->dev, "Unable to grab IOs for device.\n");
+		ret = -EBUSY;
+		goto err_release_mem_region;
+	}
+
+	/* set context info. */
+	info->pdev = pdev;
+	info->tegra_rtc_lock = __SPIN_LOCK_UNLOCKED(info->tegra_rtc_lock);
+
+	platform_set_drvdata(pdev, info);
+
+	/* clear out the hardware. */
+	writel(0, info->rtc_base + TEGRA_RTC_REG_SECONDS_ALARM0);
+	writel(0xffffffff, info->rtc_base + TEGRA_RTC_REG_INTR_STATUS);
+	writel(0, info->rtc_base + TEGRA_RTC_REG_INTR_MASK);
+
+	device_init_wakeup(&pdev->dev, 1);
+
+	info->rtc_dev = rtc_device_register(
+		pdev->name, &pdev->dev, &tegra_rtc_ops, THIS_MODULE);
+	if (IS_ERR(info->rtc_dev)) {
+		ret = PTR_ERR(info->rtc_dev);
+		info->rtc_dev = NULL;
+		dev_err(&pdev->dev,
+			"Unable to register device (err=%d).\n",
+			ret);
+		goto err_iounmap;
+	}
+
+	ret = request_irq(info->tegra_rtc_irq, tegra_rtc_irq_handler,
+		IRQF_TRIGGER_HIGH, "rtc alarm", &pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Unable to request interrupt for device (err=%d).\n",
+			ret);
+		goto err_dev_unreg;
+	}
+
+	dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n");
+
+	return 0;
+
+err_dev_unreg:
+	rtc_device_unregister(info->rtc_dev);
+err_iounmap:
+	iounmap(info->rtc_base);
+err_release_mem_region:
+	release_mem_region(res->start, resource_size(res));
+err_free_info:
+	kfree(info);
+
+	return ret;
+}
+
+static int __devexit tegra_rtc_remove(struct platform_device *pdev)
+{
+	struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EBUSY;
+
+	free_irq(info->tegra_rtc_irq, &pdev->dev);
+	rtc_device_unregister(info->rtc_dev);
+	iounmap(info->rtc_base);
+	release_mem_region(res->start, resource_size(res));
+	kfree(info);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+
+	tegra_rtc_wait_while_busy(dev);
+
+	/* only use ALARM0 as a wake source. */
+	writel(0xffffffff, info->rtc_base + TEGRA_RTC_REG_INTR_STATUS);
+	writel(TEGRA_RTC_INTR_STATUS_SEC_ALARM0,
+		info->rtc_base + TEGRA_RTC_REG_INTR_MASK);
+
+	dev_vdbg(dev, "alarm sec = %d\n",
+		readl(info->rtc_base + TEGRA_RTC_REG_SECONDS_ALARM0));
+
+	dev_vdbg(dev, "Suspend (device_may_wakeup=%d) irq:%d\n",
+		device_may_wakeup(dev), info->tegra_rtc_irq);
+
+	/* leave the alarms on as a wake source. */
+	if (device_may_wakeup(dev))
+		enable_irq_wake(info->tegra_rtc_irq);
+
+	return 0;
+}
+
+static int tegra_rtc_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+
+	dev_vdbg(dev, "Resume (device_may_wakeup=%d)\n",
+		device_may_wakeup(dev));
+	/* alarms were left on as a wake source, turn them off. */
+	if (device_may_wakeup(dev))
+		disable_irq_wake(info->tegra_rtc_irq);
+
+	return 0;
+}
+#endif
+
+static void tegra_rtc_shutdown(struct platform_device *pdev)
+{
+	dev_vdbg(&pdev->dev, "disabling interrupts.\n");
+	tegra_rtc_alarm_irq_enable(&pdev->dev, 0);
+}
+
+MODULE_ALIAS("platform:tegra_rtc");
+static struct platform_driver tegra_rtc_driver = {
+	.remove		= __devexit_p(tegra_rtc_remove),
+	.shutdown	= tegra_rtc_shutdown,
+	.driver		= {
+		.name	= "tegra_rtc",
+		.owner	= THIS_MODULE,
+	},
+#ifdef CONFIG_PM
+	.suspend	= tegra_rtc_suspend,
+	.resume		= tegra_rtc_resume,
+#endif
+};
+
+static int __init tegra_rtc_init(void)
+{
+	return platform_driver_probe(&tegra_rtc_driver, tegra_rtc_probe);
+}
+module_init(tegra_rtc_init);
+
+static void __exit tegra_rtc_exit(void)
+{
+	platform_driver_unregister(&tegra_rtc_driver);
+}
+module_exit(tegra_rtc_exit);
+
+MODULE_AUTHOR("Jon Mayo <jmayo@nvidia.com>");
+MODULE_DESCRIPTION("driver for Tegra internal RTC");
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From 586f83e2b4c080073b115c1a0fcc2757f52839b8 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:34:56 -0700
Subject: pnp: only assign IORESOURCE_DMA if CONFIG_ISA_DMA_API is enabled

IORESOURCE_DMA cannot be assigned without utilizing the interface
provided by CONFIG_ISA_DMA_API, specifically request_dma() and
free_dma().  Thus, there's a strict dependency on the config option and
limits IORESOURCE_DMA only to architectures that support ISA-style DMA.

ia64 is not one of those architectures, so pnp_check_dma() no longer
needs to be special-cased for that architecture.

pnp_assign_resources() will now return -EINVAL if IORESOURCE_DMA is
attempted on such a kernel.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index 19bc736..fa4e0a5 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -142,7 +142,9 @@ void __pnp_remove_device(struct pnp_dev *dev);
 int pnp_check_port(struct pnp_dev *dev, struct resource *res);
 int pnp_check_mem(struct pnp_dev *dev, struct resource *res);
 int pnp_check_irq(struct pnp_dev *dev, struct resource *res);
+#ifdef CONFIG_ISA_DMA_API
 int pnp_check_dma(struct pnp_dev *dev, struct resource *res);
+#endif
 
 char *pnp_resource_type_name(struct resource *res);
 void dbg_pnp_show_resources(struct pnp_dev *dev, char *desc);
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 0a15664..ed9ce50 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -171,6 +171,7 @@ __add:
 	return 0;
 }
 
+#ifdef CONFIG_ISA_DMA_API
 static int pnp_assign_dma(struct pnp_dev *dev, struct pnp_dma *rule, int idx)
 {
 	struct resource *res, local_res;
@@ -210,6 +211,7 @@ __add:
 	pnp_add_dma_resource(dev, res->start, res->flags);
 	return 0;
 }
+#endif /* CONFIG_ISA_DMA_API */
 
 void pnp_init_resources(struct pnp_dev *dev)
 {
@@ -234,7 +236,8 @@ static void pnp_clean_resource_table(struct pnp_dev *dev)
 static int pnp_assign_resources(struct pnp_dev *dev, int set)
 {
 	struct pnp_option *option;
-	int nport = 0, nmem = 0, nirq = 0, ndma = 0;
+	int nport = 0, nmem = 0, nirq = 0;
+	int ndma __maybe_unused = 0;
 	int ret = 0;
 
 	pnp_dbg(&dev->dev, "pnp_assign_resources, try dependent set %d\n", set);
@@ -256,9 +259,11 @@ static int pnp_assign_resources(struct pnp_dev *dev, int set)
 		case IORESOURCE_IRQ:
 			ret = pnp_assign_irq(dev, &option->u.irq, nirq++);
 			break;
+#ifdef CONFIG_ISA_DMA_API
 		case IORESOURCE_DMA:
 			ret = pnp_assign_dma(dev, &option->u.dma, ndma++);
 			break;
+#endif
 		default:
 			ret = -EINVAL;
 			break;
diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c
index a925e6b..b0ecacb 100644
--- a/drivers/pnp/resource.c
+++ b/drivers/pnp/resource.c
@@ -409,9 +409,9 @@ int pnp_check_irq(struct pnp_dev *dev, struct resource *res)
 	return 1;
 }
 
+#ifdef CONFIG_ISA_DMA_API
 int pnp_check_dma(struct pnp_dev *dev, struct resource *res)
 {
-#ifndef CONFIG_IA64
 	int i;
 	struct pnp_dev *tdev;
 	struct resource *tres;
@@ -466,11 +466,8 @@ int pnp_check_dma(struct pnp_dev *dev, struct resource *res)
 	}
 
 	return 1;
-#else
-	/* IA64 does not have legacy DMA */
-	return 0;
-#endif
 }
+#endif /* CONFIG_ISA_DMA_API */
 
 unsigned long pnp_resource_type(struct resource *res)
 {
-- 
cgit v0.10.2


From 4061d68e1a59ffacdf216a55f254cb01d4d1f840 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:34:57 -0700
Subject: x86: only compile 8237A if CONFIG_ISA_DMA_API is enabled

8237A utilizes the interface provided by CONFIG_ISA_DMA_API, specifically
claim_dma_lock() and release_dma_lock().  Thus, there's a strict
dependency on the config option and the module should only be loaded if
the kernel supports ISA-style DMA.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 743642f..7338ef2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
 obj-y			+= bootflag.o e820.o
-obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
+obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
@@ -55,6 +55,7 @@ obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
 obj-$(CONFIG_INTEL_TXT)		+= tboot.o
+obj-$(CONFIG_ISA_DMA_API)	+= i8237.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
-- 
cgit v0.10.2


From 8df3bd9e18cdc3539edea550be34605a240e15d4 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:34:58 -0700
Subject: x86: only compile floppy driver if CONFIG_ISA_DMA_API is enabled

The generic floppy disk driver utilizies the interface provided by
CONFIG_ISA_DMA_API, specifically claim_dma_lock(), release_dma_lock(),
request_dma(), and free_dma().  Thus, there's a strict dependency on the
config option and the driver should only be loaded if the kernel supports
ISA-style DMA.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e1f65c4..720206f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -143,7 +143,7 @@ config GENERIC_GPIO
 	bool
 
 config ARCH_MAY_HAVE_PC_FDC
-	def_bool y
+	def_bool ISA_DMA_API
 
 config RWSEM_GENERIC_SPINLOCK
 	def_bool !X86_XADD
-- 
cgit v0.10.2


From 1c00f0161f5e5bf1a441ea834c923f4102456489 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:34:59 -0700
Subject: x86: allow CONFIG_ISA_DMA_API to be disabled

Not all 64-bit systems require ISA-style DMA, so allow it to be
configurable.  x86 utilizes the generic ISA DMA allocator from
kernel/dma.c, so require it only when CONFIG_ISA_DMA_API is enabled.

Disabling CONFIG_ISA_DMA_API is dependent on x86_64 since those machines
do not have ISA slots and benefit the most from disabling the option (and
on CONFIG_EXPERT as required by H.  Peter Anvin).

When disabled, this also avoids declaring claim_dma_lock(),
release_dma_lock(), request_dma(), and free_dma() since those interfaces
will no longer be provided.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 720206f..d57ddd7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -123,7 +123,7 @@ config NEED_SG_DMA_LENGTH
 	def_bool y
 
 config GENERIC_ISA_DMA
-	def_bool y
+	def_bool ISA_DMA_API
 
 config GENERIC_IOMAP
 	def_bool y
@@ -2002,9 +2002,13 @@ source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
 
-# x86_64 have no ISA slots, but do have ISA-style DMA.
+# x86_64 have no ISA slots, but can have ISA-style DMA.
 config ISA_DMA_API
-	def_bool y
+	bool "ISA-style DMA support" if (X86_64 && EXPERT)
+	default y
+	help
+	  Enables ISA-style DMA support for devices requiring such controllers.
+	  If unsure, say Y.
 
 if X86_32
 
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h
index ca1098a..97b6d81 100644
--- a/arch/x86/include/asm/dma.h
+++ b/arch/x86/include/asm/dma.h
@@ -151,6 +151,7 @@
 #define DMA_AUTOINIT		0x10
 
 
+#ifdef CONFIG_ISA_DMA_API
 extern spinlock_t  dma_spin_lock;
 
 static inline unsigned long claim_dma_lock(void)
@@ -164,6 +165,7 @@ static inline void release_dma_lock(unsigned long flags)
 {
 	spin_unlock_irqrestore(&dma_spin_lock, flags);
 }
+#endif /* CONFIG_ISA_DMA_API */
 
 /* enable/disable a specific DMA channel */
 static inline void enable_dma(unsigned int dmanr)
@@ -303,9 +305,11 @@ static inline int get_dma_residue(unsigned int dmanr)
 }
 
 
-/* These are in kernel/dma.c: */
+/* These are in kernel/dma.c because x86 uses CONFIG_GENERIC_ISA_DMA */
+#ifdef CONFIG_ISA_DMA_API
 extern int request_dma(unsigned int dmanr, const char *device_id);
 extern void free_dma(unsigned int dmanr);
+#endif
 
 /* From PCI */
 
-- 
cgit v0.10.2


From 0bc825d240abcaf5ed6e9d59b44215b51718ef5b Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Tue, 22 Mar 2011 16:35:00 -0700
Subject: codafs: fix compile warning when CONFIG_SYSCTL=n

When CONFIG_SYSCTL=n, we get the following warning:

fs/coda/sysctl.c:18: warning: `coda_tabl' defined but not used

Fix the warning by making sure coda_table and it's callee function are in
the same context.  Also clean up the code by removing extra #ifdef.

[akpm@linux-foundation.org: remove unneeded stub macros]
Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Cc: Jan Harkes <jaharkes@cs.cmu.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index c6405ce..06d27a4 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -13,7 +13,6 @@
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *fs_table_header;
-#endif
 
 static ctl_table coda_table[] = {
 	{
@@ -40,7 +39,6 @@ static ctl_table coda_table[] = {
 	{}
 };
 
-#ifdef CONFIG_SYSCTL
 static ctl_table fs_table[] = {
 	{
 		.procname	= "coda",
@@ -49,22 +47,18 @@ static ctl_table fs_table[] = {
 	},
 	{}
 };
-#endif
 
 void coda_sysctl_init(void)
 {
-#ifdef CONFIG_SYSCTL
 	if ( !fs_table_header )
 		fs_table_header = register_sysctl_table(fs_table);
-#endif
 }
 
 void coda_sysctl_clean(void)
 {
-#ifdef CONFIG_SYSCTL
 	if ( fs_table_header ) {
 		unregister_sysctl_table(fs_table_header);
 		fs_table_header = NULL;
 	}
-#endif
 }
+#endif
-- 
cgit v0.10.2


From 38829dc9d7b46b195ab99d62c8c53c21a7adc36b Mon Sep 17 00:00:00 2001
From: Harry Wei <jiaweiwei.xiyou@gmail.com>
Date: Tue, 22 Mar 2011 16:35:01 -0700
Subject: Documentation/CodingStyle: flesh out if-else examples

There is a missing case for "Chapter 3: Placing Braces and Spaces".  We
often know we should not use braces where a single statement.  The first
case is:

	if (condition)
		action();

Another case is:

	if (condition)
		do_this();
	else
		do_that();

However, I can not find a description of the second case.

Signed-off-by: Harry Wei <harryxiyou@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 1cd3478..58b0bf9 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -168,6 +168,13 @@ Do not unnecessarily use braces where a single statement will do.
 if (condition)
 	action();
 
+and
+
+if (condition)
+	do_this();
+else
+	do_that();
+
 This does not apply if one branch of a conditional statement is a single
 statement. Use braces in both branches.
 
-- 
cgit v0.10.2


From e06c374400148849cbf496fce7a28680c3604457 Mon Sep 17 00:00:00 2001
From: Christian Kujau <lists@nerdbynature.de>
Date: Tue, 22 Mar 2011 16:35:02 -0700
Subject: Documentation/Changes: minor corrections

I noticed the 'mcelog' program had no comment and then ended up "fixing"
a few more things:

  * reiserfsck -V does not print "reiserfsprogs" (any more?)
  * is "udevinfo" still shipped? udevd certainly is
  * grub2 doesn't have a 'grub' binary
  * add a "# how to get the mcelog version" comment

Signed-off-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/Changes b/Documentation/Changes
index 4fb88f1..5f4828a0 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -35,7 +35,7 @@ o  util-linux             2.10o                   # fdformat --version
 o  module-init-tools      0.9.10                  # depmod -V
 o  e2fsprogs              1.41.4                  # e2fsck -V
 o  jfsutils               1.1.3                   # fsck.jfs -V
-o  reiserfsprogs          3.6.3                   # reiserfsck -V 2>&1|grep reiserfsprogs
+o  reiserfsprogs          3.6.3                   # reiserfsck -V
 o  xfsprogs               2.6.0                   # xfs_db -V
 o  squashfs-tools         4.0                     # mksquashfs -version
 o  btrfs-progs            0.18                    # btrfsck
@@ -46,9 +46,9 @@ o  isdn4k-utils           3.1pre1                 # isdnctrl 2>&1|grep version
 o  nfs-utils              1.0.5                   # showmount --version
 o  procps                 3.2.0                   # ps --version
 o  oprofile               0.9                     # oprofiled --version
-o  udev                   081                     # udevinfo -V
-o  grub                   0.93                    # grub --version
-o  mcelog		  0.6
+o  udev                   081                     # udevd --version
+o  grub                   0.93                    # grub --version || grub-install --version
+o  mcelog                 0.6                     # mcelog --version
 o  iptables               1.4.2                   # iptables -V
 
 
-- 
cgit v0.10.2


From 12da58b0c89e27617aaedde7dcf99a8690875e91 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Tue, 22 Mar 2011 16:35:03 -0700
Subject: Documentation/vm/page-types.c: auto debugfs mount for hwpoison
 operation

page-types.c doesn't supply a way to specify the debugfs path and the
original debugfs path is not usual on most machines.  This patch supplies
a way to auto mount debugfs if needed.

This patch is heavily inspired by tools/perf/utils/debugfs.c

[akpm@linux-foundation.org: make functions static]
[akpm@linux-foundation.org: fix debugfs_mount() signature]
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index cc96ee2..7445caa 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -32,8 +32,20 @@
 #include <sys/types.h>
 #include <sys/errno.h>
 #include <sys/fcntl.h>
+#include <sys/mount.h>
+#include <sys/statfs.h>
+#include "../../include/linux/magic.h"
 
 
+#ifndef MAX_PATH
+# define MAX_PATH 256
+#endif
+
+#ifndef STR
+# define _STR(x) #x
+# define STR(x) _STR(x)
+#endif
+
 /*
  * pagemap kernel ABI bits
  */
@@ -152,6 +164,12 @@ static const char *page_flag_names[] = {
 };
 
 
+static const char *debugfs_known_mountpoints[] = {
+	"/sys/kernel/debug",
+	"/debug",
+	0,
+};
+
 /*
  * data structures
  */
@@ -184,7 +202,7 @@ static int		kpageflags_fd;
 static int		opt_hwpoison;
 static int		opt_unpoison;
 
-static const char	hwpoison_debug_fs[] = "/debug/hwpoison";
+static char		hwpoison_debug_fs[MAX_PATH+1];
 static int		hwpoison_inject_fd;
 static int		hwpoison_forget_fd;
 
@@ -464,21 +482,100 @@ static uint64_t kpageflags_flags(uint64_t flags)
 	return flags;
 }
 
+/* verify that a mountpoint is actually a debugfs instance */
+static int debugfs_valid_mountpoint(const char *debugfs)
+{
+	struct statfs st_fs;
+
+	if (statfs(debugfs, &st_fs) < 0)
+		return -ENOENT;
+	else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
+		return -ENOENT;
+
+	return 0;
+}
+
+/* find the path to the mounted debugfs */
+static const char *debugfs_find_mountpoint(void)
+{
+	const char **ptr;
+	char type[100];
+	FILE *fp;
+
+	ptr = debugfs_known_mountpoints;
+	while (*ptr) {
+		if (debugfs_valid_mountpoint(*ptr) == 0) {
+			strcpy(hwpoison_debug_fs, *ptr);
+			return hwpoison_debug_fs;
+		}
+		ptr++;
+	}
+
+	/* give up and parse /proc/mounts */
+	fp = fopen("/proc/mounts", "r");
+	if (fp == NULL)
+		perror("Can't open /proc/mounts for read");
+
+	while (fscanf(fp, "%*s %"
+		      STR(MAX_PATH)
+		      "s %99s %*s %*d %*d\n",
+		      hwpoison_debug_fs, type) == 2) {
+		if (strcmp(type, "debugfs") == 0)
+			break;
+	}
+	fclose(fp);
+
+	if (strcmp(type, "debugfs") != 0)
+		return NULL;
+
+	return hwpoison_debug_fs;
+}
+
+/* mount the debugfs somewhere if it's not mounted */
+
+static void debugfs_mount(void)
+{
+	const char **ptr;
+
+	/* see if it's already mounted */
+	if (debugfs_find_mountpoint())
+		return;
+
+	ptr = debugfs_known_mountpoints;
+	while (*ptr) {
+		if (mount(NULL, *ptr, "debugfs", 0, NULL) == 0) {
+			/* save the mountpoint */
+			strcpy(hwpoison_debug_fs, *ptr);
+			break;
+		}
+		ptr++;
+	}
+
+	if (*ptr == NULL) {
+		perror("mount debugfs");
+		exit(EXIT_FAILURE);
+	}
+}
+
 /*
  * page actions
  */
 
 static void prepare_hwpoison_fd(void)
 {
-	char buf[100];
+	char buf[MAX_PATH + 1];
+
+	debugfs_mount();
 
 	if (opt_hwpoison && !hwpoison_inject_fd) {
-		sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs);
+		snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn",
+			hwpoison_debug_fs);
 		hwpoison_inject_fd = checked_open(buf, O_WRONLY);
 	}
 
 	if (opt_unpoison && !hwpoison_forget_fd) {
-		sprintf(buf, "%s/unpoison-pfn", hwpoison_debug_fs);
+		snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn",
+			hwpoison_debug_fs);
 		hwpoison_forget_fd = checked_open(buf, O_WRONLY);
 	}
 }
-- 
cgit v0.10.2


From 2f09719af705db56032ae480a2d9c32c2a3fcbd3 Mon Sep 17 00:00:00 2001
From: Stuart Swales <stuart.swales.croftnuisk@gmail.com>
Date: Tue, 22 Mar 2011 16:35:04 -0700
Subject: adfs: fix E+/F+ dir size > 2048 crashing kernel

Kernel crashes in fs/adfs module when accessing directories with a large
number of objects on mounted Acorn ADFS E+/F+ format discs (or images) as
the existing code writes off the end of the fixed array of struct
buffer_head pointers.

Additionally, each directory access that didn't crash would leak a buffer
as nr_buffers was not adjusted correctly for E+/F+ discs (was always left
as one less than required).

The patch fixes this by allocating a dynamically-sized set of struct
buffer_head pointers if necessary for the E+/F+ case (many directories
still do in fact fit in 2048 bytes) and sets the correct nr_buffers so
that all buffers are released.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=26072

Tested by tar'ing the contents of my RISC PC's E+ format 20Gb HDD which
contains a number of large directories that previously crashed the kernel.

Signed-off-by: Stuart Swales <stuart.swales.croftnuisk@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 2ff622f..58588dd 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -79,6 +79,10 @@ struct adfs_dir {
 
 	int			nr_buffers;
 	struct buffer_head	*bh[4];
+
+	/* big directories need allocated buffers */
+	struct buffer_head	**bh_fplus;
+
 	unsigned int		pos;
 	unsigned int		parent_id;
 
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1796bb35..a7f41da 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/buffer_head.h>
+#include <linux/slab.h>
 #include "adfs.h"
 #include "dir_fplus.h"
 
@@ -22,30 +23,53 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
 
 	dir->nr_buffers = 0;
 
+	/* start off using fixed bh set - only alloc for big dirs */
+	dir->bh_fplus = &dir->bh[0];
+
 	block = __adfs_block_map(sb, id, 0);
 	if (!block) {
 		adfs_error(sb, "dir object %X has a hole at offset 0", id);
 		goto out;
 	}
 
-	dir->bh[0] = sb_bread(sb, block);
-	if (!dir->bh[0])
+	dir->bh_fplus[0] = sb_bread(sb, block);
+	if (!dir->bh_fplus[0])
 		goto out;
 	dir->nr_buffers += 1;
 
-	h = (struct adfs_bigdirheader *)dir->bh[0]->b_data;
+	h = (struct adfs_bigdirheader *)dir->bh_fplus[0]->b_data;
 	size = le32_to_cpu(h->bigdirsize);
 	if (size != sz) {
-		printk(KERN_WARNING "adfs: adfs_fplus_read: directory header size\n"
-				" does not match directory size\n");
+		printk(KERN_WARNING "adfs: adfs_fplus_read:"
+					" directory header size %X\n"
+					" does not match directory size %X\n",
+					size, sz);
 	}
 
 	if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 ||
 	    h->bigdirversion[2] != 0 || size & 2047 ||
-	    h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME))
+	    h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) {
+		printk(KERN_WARNING "adfs: dir object %X has"
+					" malformed dir header\n", id);
 		goto out;
+	}
 
 	size >>= sb->s_blocksize_bits;
+	if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
+		/* this directory is too big for fixed bh set, must allocate */
+		struct buffer_head **bh_fplus =
+			kzalloc(size * sizeof(struct buffer_head *),
+				GFP_KERNEL);
+		if (!bh_fplus) {
+			adfs_error(sb, "not enough memory for"
+					" dir object %X (%d blocks)", id, size);
+			goto out;
+		}
+		dir->bh_fplus = bh_fplus;
+		/* copy over the pointer to the block that we've already read */
+		dir->bh_fplus[0] = dir->bh[0];
+	}
+
 	for (blk = 1; blk < size; blk++) {
 		block = __adfs_block_map(sb, id, blk);
 		if (!block) {
@@ -53,25 +77,44 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
 			goto out;
 		}
 
-		dir->bh[blk] = sb_bread(sb, block);
-		if (!dir->bh[blk])
+		dir->bh_fplus[blk] = sb_bread(sb, block);
+		if (!dir->bh_fplus[blk]) {
+			adfs_error(sb,	"dir object %X failed read for"
+					" offset %d, mapped block %X",
+					id, blk, block);
 			goto out;
-		dir->nr_buffers = blk;
+		}
+
+		dir->nr_buffers += 1;
 	}
 
-	t = (struct adfs_bigdirtail *)(dir->bh[size - 1]->b_data + (sb->s_blocksize - 8));
+	t = (struct adfs_bigdirtail *)
+		(dir->bh_fplus[size - 1]->b_data + (sb->s_blocksize - 8));
 
 	if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) ||
 	    t->bigdirendmasseq != h->startmasseq ||
-	    t->reserved[0] != 0 || t->reserved[1] != 0)
+	    t->reserved[0] != 0 || t->reserved[1] != 0) {
+		printk(KERN_WARNING "adfs: dir object %X has "
+					"malformed dir end\n", id);
 		goto out;
+	}
 
 	dir->parent_id = le32_to_cpu(h->bigdirparent);
 	dir->sb = sb;
 	return 0;
+
 out:
-	for (i = 0; i < dir->nr_buffers; i++)
-		brelse(dir->bh[i]);
+	if (dir->bh_fplus) {
+		for (i = 0; i < dir->nr_buffers; i++)
+			brelse(dir->bh_fplus[i]);
+
+		if (&dir->bh[0] != dir->bh_fplus)
+			kfree(dir->bh_fplus);
+
+		dir->bh_fplus = NULL;
+	}
+
+	dir->nr_buffers = 0;
 	dir->sb = NULL;
 	return ret;
 }
@@ -79,7 +122,8 @@ out:
 static int
 adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos)
 {
-	struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data;
+	struct adfs_bigdirheader *h =
+		(struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
 	int ret = -ENOENT;
 
 	if (fpos <= le32_to_cpu(h->bigdirentries)) {
@@ -102,21 +146,27 @@ dir_memcpy(struct adfs_dir *dir, unsigned int offset, void *to, int len)
 	partial = sb->s_blocksize - offset;
 
 	if (partial >= len)
-		memcpy(to, dir->bh[buffer]->b_data + offset, len);
+		memcpy(to, dir->bh_fplus[buffer]->b_data + offset, len);
 	else {
 		char *c = (char *)to;
 
 		remainder = len - partial;
 
-		memcpy(c, dir->bh[buffer]->b_data + offset, partial);
-		memcpy(c + partial, dir->bh[buffer + 1]->b_data, remainder);
+		memcpy(c,
+			dir->bh_fplus[buffer]->b_data + offset,
+			partial);
+
+		memcpy(c + partial,
+			dir->bh_fplus[buffer + 1]->b_data,
+			remainder);
 	}
 }
 
 static int
 adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
 {
-	struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data;
+	struct adfs_bigdirheader *h =
+		(struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
 	struct adfs_bigdirentry bde;
 	unsigned int offset;
 	int i, ret = -ENOENT;
@@ -160,7 +210,7 @@ adfs_fplus_sync(struct adfs_dir *dir)
 	int i;
 
 	for (i = dir->nr_buffers - 1; i >= 0; i--) {
-		struct buffer_head *bh = dir->bh[i];
+		struct buffer_head *bh = dir->bh_fplus[i];
 		sync_dirty_buffer(bh);
 		if (buffer_req(bh) && !buffer_uptodate(bh))
 			err = -EIO;
@@ -174,8 +224,17 @@ adfs_fplus_free(struct adfs_dir *dir)
 {
 	int i;
 
-	for (i = 0; i < dir->nr_buffers; i++)
-		brelse(dir->bh[i]);
+	if (dir->bh_fplus) {
+		for (i = 0; i < dir->nr_buffers; i++)
+			brelse(dir->bh_fplus[i]);
+
+		if (&dir->bh[0] != dir->bh_fplus)
+			kfree(dir->bh_fplus);
+
+		dir->bh_fplus = NULL;
+	}
+
+	dir->nr_buffers = 0;
 	dir->sb = NULL;
 }
 
-- 
cgit v0.10.2


From 7a9730af9c596749425a98eba136152e5be4602a Mon Sep 17 00:00:00 2001
From: Stuart Swales <stuart.swales.croftnuisk@gmail.com>
Date: Tue, 22 Mar 2011 16:35:05 -0700
Subject: adfs: improve timestamp precision

ADFS (FileCore) storage complies with the RISC OS timestamp specification
(40-bit centiseconds since 01 Jan 1900 00:00:00).  It is desirable that
stored timestamp precision be maintained to facilitate a precise copy of
data and metadata from a hard disc (or image thereof) into a RISC OS
emulator (such as RPCEmu).

This patch implements a full-precision conversion from ADFS to Unix
timestamp as the existing driver, for ease of calculation with old 32-bit
compilers, uses the common trick of shifting the 40-bits representing
centiseconds around into 32-bits representing seconds thereby losing
precision.

Signed-off-by: Stuart Swales<stuart.swales.croftnuisk@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 09fe401..16d7ef2 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -174,50 +174,48 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
 
 /*
  * Convert an ADFS time to Unix time.  ADFS has a 40-bit centi-second time
- * referenced to 1 Jan 1900 (til 2248)
+ * referenced to 1 Jan 1900 (til 2248) so we need to discard 2208988800 seconds
+ * of time to convert from RISC OS epoch to Unix epoch.
  */
 static void
 adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
 {
 	unsigned int high, low;
+	/* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
+	 * 01 Jan 1900 00:00:00 (RISC OS epoch)
+	 */
+	static const s64 nsec_unix_epoch_diff_risc_os_epoch =
+							2208988800000000000LL;
+	s64 nsec;
 
 	if (ADFS_I(inode)->stamped == 0)
 		goto cur_time;
 
-	high = ADFS_I(inode)->loadaddr << 24;
-	low  = ADFS_I(inode)->execaddr;
+	high = ADFS_I(inode)->loadaddr & 0xFF; /* top 8 bits of timestamp */
+	low  = ADFS_I(inode)->execaddr;    /* bottom 32 bits of timestamp */
 
-	high |= low >> 8;
-	low  &= 255;
+	/* convert 40-bit centi-seconds to 32-bit seconds
+	 * going via nanoseconds to retain precision
+	 */
+	nsec = (((s64) high << 32) | (s64) low) * 10000000; /* cs to ns */
 
 	/* Files dated pre  01 Jan 1970 00:00:00. */
-	if (high < 0x336e996a)
+	if (nsec < nsec_unix_epoch_diff_risc_os_epoch)
 		goto too_early;
 
-	/* Files dated post 18 Jan 2038 03:14:05. */
-	if (high >= 0x656e9969)
-		goto too_late;
-
-	/* discard 2208988800 (0x336e996a00) seconds of time */
-	high -= 0x336e996a;
+	/* convert from RISC OS to Unix epoch */
+	nsec -= nsec_unix_epoch_diff_risc_os_epoch;
 
-	/* convert 40-bit centi-seconds to 32-bit seconds */
-	tv->tv_sec = (((high % 100) << 8) + low) / 100 + (high / 100 << 8);
-	tv->tv_nsec = 0;
+	*tv = ns_to_timespec(nsec);
 	return;
 
  cur_time:
-	*tv = CURRENT_TIME_SEC;
+	*tv = CURRENT_TIME;
 	return;
 
  too_early:
 	tv->tv_sec = tv->tv_nsec = 0;
 	return;
-
- too_late:
-	tv->tv_sec = 0x7ffffffd;
-	tv->tv_nsec = 0;
-	return;
 }
 
 /*
-- 
cgit v0.10.2


From da23ef0549d4205ca9b576cf6cce9a80d0c3e43a Mon Sep 17 00:00:00 2001
From: Stuart Swales <stuart.swales.croftnuisk@gmail.com>
Date: Tue, 22 Mar 2011 16:35:06 -0700
Subject: adfs: add hexadecimal filetype suffix option

ADFS (FileCore) storage complies with the RISC OS filetype specification
(12 bits of file type information is stored in the file load address,
rather than using a file extension).  The existing driver largely ignores
this information and does not present it to the end user.

It is desirable that stored filetypes be made visible to the end user to
facilitate a precise copy of data and metadata from a hard disc (or image
thereof) into a RISC OS emulator (such as RPCEmu) or to a network share
which can be accessed by real Acorn systems.

This patch implements a per-mount filetype suffix option (use -o
ftsuffix=1) to present any filetype as a ,xyz hexadecimal suffix on each
file.  This type suffix is compatible with that used by RISC OS systems
that access network servers using NFS client software and by RPCemu's host
filing system.

Signed-off-by: Stuart Swales <stuart.swales.croftnuisk@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.txt
index 9e8811f..5949766 100644
--- a/Documentation/filesystems/adfs.txt
+++ b/Documentation/filesystems/adfs.txt
@@ -9,6 +9,9 @@ Mount options for ADFS
 		will be nnn.  Default 0700.
   othmask=nnn	The permission mask for ADFS 'other' permissions
 		will be nnn.  Default 0077.
+  ftsuffix=n	When ftsuffix=0, no file type suffix will be applied.
+		When ftsuffix=1, a hexadecimal suffix corresponding to
+		the RISC OS file type will be added.  Default 0.
 
 Mapping of ADFS permissions to Linux permissions
 ------------------------------------------------
@@ -55,3 +58,18 @@ Mapping of ADFS permissions to Linux permissions
 
   You can therefore tailor the permission translation to whatever you
   desire the permissions should be under Linux.
+
+RISC OS file type suffix
+------------------------
+
+  RISC OS file types are stored in bits 19..8 of the file load address.
+
+  To enable non-RISC OS systems to be used to store files without losing
+  file type information, a file naming convention was devised (initially
+  for use with NFS) such that a hexadecimal suffix of the form ,xyz
+  denoted the file type: e.g. BasicFile,ffb is a BASIC (0xffb) file.  This
+  naming convention is now also used by RISC OS emulators such as RPCEmu.
+
+  Mounting an ADFS disc with option ftsuffix=1 will cause appropriate file
+  type suffixes to be appended to file names read from a directory.  If the
+  ftsuffix option is zero or omitted, no file type suffixes will be added.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 58588dd..a8a58d8 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -50,6 +50,7 @@ struct adfs_sb_info {
 	gid_t		s_gid;		/* owner gid				 */
 	umode_t		s_owner_mask;	/* ADFS owner perm -> unix perm		 */
 	umode_t		s_other_mask;	/* ADFS other perm -> unix perm		 */
+	int		s_ftsuffix;	/* ,xyz hex filetype suffix option */
 
 	__u32		s_ids_per_zone;	/* max. no ids in one zone		 */
 	__u32		s_idlen;	/* length of ID in map			 */
@@ -93,7 +94,7 @@ struct adfs_dir {
 /*
  * This is the overall maximum name length
  */
-#define ADFS_MAX_NAME_LEN	256
+#define ADFS_MAX_NAME_LEN	(256 + 4) /* +4 for ,xyz hex filetype suffix */
 struct object_info {
 	__u32		parent_id;		/* parent object id	*/
 	__u32		file_id;		/* object id		*/
@@ -101,10 +102,26 @@ struct object_info {
 	__u32		execaddr;		/* execution address	*/
 	__u32		size;			/* size			*/
 	__u8		attr;			/* RISC OS attributes	*/
-	unsigned char	name_len;		/* name length		*/
+	unsigned int	name_len;		/* name length		*/
 	char		name[ADFS_MAX_NAME_LEN];/* file name		*/
+
+	/* RISC OS file type (12-bit: derived from loadaddr) */
+	__u16		filetype;
 };
 
+/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */
+static inline int append_filetype_suffix(char *buf, __u16 filetype)
+{
+	if (filetype == -1)
+		return 0;
+
+	*buf++ = ',';
+	*buf++ = hex_asc_lo(filetype >> 8);
+	*buf++ = hex_asc_lo(filetype >> 4);
+	*buf++ = hex_asc_lo(filetype >> 0);
+	return 4;
+}
+
 struct adfs_dir_ops {
 	int	(*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir);
 	int	(*setpos)(struct adfs_dir *dir, unsigned int fpos);
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bafc712..4bbe853 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
 			*buf++ = *ptr;
 		ptr++;
 	}
-	*buf = '\0';
 
 	return buf - old_buf;
 }
@@ -208,7 +207,8 @@ release_buffers:
  * convert a disk-based directory entry to a Linux ADFS directory entry
  */
 static inline void
-adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de)
+adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj,
+	struct adfs_direntry *de)
 {
 	obj->name_len =	adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN);
 	obj->file_id  = adfs_readval(de->dirinddiscadd, 3);
@@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de)
 	obj->execaddr = adfs_readval(de->direxec, 4);
 	obj->size     = adfs_readval(de->dirlen,  4);
 	obj->attr     = de->newdiratts;
+	obj->filetype = -1;
+
+	/*
+	 * object is a file and is filetyped and timestamped?
+	 * RISC OS 12-bit filetype is stored in load_address[19:8]
+	 */
+	if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
+		(0xfff00000 == (0xfff00000 & obj->loadaddr))) {
+		obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
+
+		/* optionally append the ,xyz hex filetype suffix */
+		if (ADFS_SB(dir->sb)->s_ftsuffix)
+			obj->name_len +=
+				append_filetype_suffix(
+					&obj->name[obj->name_len],
+					obj->filetype);
+	}
 }
 
 /*
@@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj)
 	if (!de.dirobname[0])
 		return -ENOENT;
 
-	adfs_dir2obj(obj, &de);
+	adfs_dir2obj(dir, obj, &de);
 
 	return 0;
 }
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index a7f41da..d9e3bee 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -197,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
 		if (obj->name[i] == '/')
 			obj->name[i] = '.';
 
+	obj->filetype = -1;
+
+	/*
+	 * object is a file and is filetyped and timestamped?
+	 * RISC OS 12-bit filetype is stored in load_address[19:8]
+	 */
+	if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
+		(0xfff00000 == (0xfff00000 & obj->loadaddr))) {
+		obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
+
+		/* optionally append the ,xyz hex filetype suffix */
+		if (ADFS_SB(dir->sb)->s_ftsuffix)
+			obj->name_len +=
+				append_filetype_suffix(
+					&obj->name[obj->name_len],
+					obj->filetype);
+	}
+
 	dir->pos += 1;
 	ret = 0;
 out:
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 16d7ef2..92444e9 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -78,26 +78,13 @@ static const struct address_space_operations adfs_aops = {
 	.bmap		= _adfs_bmap
 };
 
-static inline unsigned int
-adfs_filetype(struct inode *inode)
-{
-	unsigned int type;
-
-	if (ADFS_I(inode)->stamped)
-		type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff;
-	else
-		type = (unsigned int) -1;
-
-	return type;
-}
-
 /*
  * Convert ADFS attributes and filetype to Linux permission.
  */
 static umode_t
 adfs_atts2mode(struct super_block *sb, struct inode *inode)
 {
-	unsigned int filetype, attr = ADFS_I(inode)->attr;
+	unsigned int attr = ADFS_I(inode)->attr;
 	umode_t mode, rmask;
 	struct adfs_sb_info *asb = ADFS_SB(sb);
 
@@ -106,9 +93,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode)
 		return S_IFDIR | S_IXUGO | mode;
 	}
 
-	filetype = adfs_filetype(inode);
-
-	switch (filetype) {
+	switch (ADFS_I(inode)->filetype) {
 	case 0xfc0:	/* LinkFS */
 		return S_IFLNK|S_IRWXUGO;
 
@@ -277,7 +262,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
 	ADFS_I(inode)->loadaddr  = obj->loadaddr;
 	ADFS_I(inode)->execaddr  = obj->execaddr;
 	ADFS_I(inode)->attr      = obj->attr;
-	ADFS_I(inode)->stamped	  = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
+	ADFS_I(inode)->filetype  = obj->filetype;
+	ADFS_I(inode)->stamped   = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
 
 	inode->i_mode	 = adfs_atts2mode(sb, inode);
 	adfs_adfs2unix_time(&inode->i_mtime, inode);
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06d7388..c8bf36a 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -138,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
 		seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
 	if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
 		seq_printf(seq, ",othmask=%o", asb->s_other_mask);
+	if (asb->s_ftsuffix != 0)
+		seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix);
 
 	return 0;
 }
 
-enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err};
+enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err};
 
 static const match_table_t tokens = {
 	{Opt_uid, "uid=%u"},
 	{Opt_gid, "gid=%u"},
 	{Opt_ownmask, "ownmask=%o"},
 	{Opt_othmask, "othmask=%o"},
+	{Opt_ftsuffix, "ftsuffix=%u"},
 	{Opt_err, NULL}
 };
 
@@ -189,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options)
 				return -EINVAL;
 			asb->s_other_mask = option;
 			break;
+		case Opt_ftsuffix:
+			if (match_int(args, &option))
+				return -EINVAL;
+			asb->s_ftsuffix = option;
+			break;
 		default:
 			printk("ADFS-fs: unrecognised mount option \"%s\" "
 					"or missing value\n", p);
@@ -366,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 	asb->s_gid = 0;
 	asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
 	asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
+	asb->s_ftsuffix = 0;
 
 	if (parse_options(sb, data))
 		goto error;
@@ -445,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root);
 	root_obj.name_len  = 0;
-	root_obj.loadaddr  = 0;
-	root_obj.execaddr  = 0;
+	/* Set root object date as 01 Jan 1987 00:00:00 */
+	root_obj.loadaddr  = 0xfff0003f;
+	root_obj.execaddr  = 0xec22c000;
 	root_obj.size	   = ADFS_NEWDIR_SIZE;
 	root_obj.attr	   = ADFS_NDA_DIRECTORY   | ADFS_NDA_OWNER_READ |
 			     ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ;
+	root_obj.filetype  = -1;
 
 	/*
 	 * If this is a F+ disk with variable length directories,
@@ -463,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
 		asb->s_dir     = &adfs_f_dir_ops;
 		asb->s_namelen = ADFS_F_NAME_LEN;
 	}
+	/*
+	 * ,xyz hex filetype suffix may be added by driver
+	 * to files that have valid RISC OS filetype
+	 */
+	if (asb->s_ftsuffix)
+		asb->s_namelen += 4;
 
 	sb->s_d_op = &adfs_dentry_operations;
 	root = adfs_iget(sb, &root_obj);
-- 
cgit v0.10.2


From 77d1c8eb8a8e0989f4c46e9a40bbd4185d34974e Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Date: Tue, 22 Mar 2011 16:35:06 -0700
Subject: pps: remove unreachable code

Remove code enabled only when CONFIG_PREEMPT_RT is turned on because it is
not used in the vanilla kernel.

Signed-off-by: Alexander Gordeev <lasaine@lvk.cs.msu.su>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/pps/generators/pps_gen_parport.c b/drivers/pps/generators/pps_gen_parport.c
index b93af3e..dcd39fb 100644
--- a/drivers/pps/generators/pps_gen_parport.c
+++ b/drivers/pps/generators/pps_gen_parport.c
@@ -216,11 +216,6 @@ static void parport_attach(struct parport *port)
 
 	hrtimer_init(&device.timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	device.timer.function = hrtimer_event;
-#ifdef CONFIG_PREEMPT_RT
-	/* hrtimer interrupt will run in the interrupt context with this */
-	device.timer.irqsafe = 1;
-#endif
-
 	hrtimer_start(&device.timer, next_intr_time(&device), HRTIMER_MODE_ABS);
 
 	return;
-- 
cgit v0.10.2


From e91f90bb0bb10be9cc8efd09a3cf4ecffcad0db1 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Tue, 22 Mar 2011 16:35:10 -0700
Subject: aio: wake all waiters when destroying ctx

The test program below will hang because io_getevents() uses
add_wait_queue_exclusive(), which means the wake_up() in io_destroy() only
wakes up one of the threads.  Fix this by using wake_up_all() in the aio
code paths where we want to make sure no one gets stuck.

	// t.c -- compile with gcc -lpthread -laio t.c

	#include <libaio.h>
	#include <pthread.h>
	#include <stdio.h>
	#include <unistd.h>

	static const int nthr = 2;

	void *getev(void *ctx)
	{
		struct io_event ev;
		io_getevents(ctx, 1, 1, &ev, NULL);
		printf("io_getevents returned\n");
		return NULL;
	}

	int main(int argc, char *argv[])
	{
		io_context_t ctx = 0;
		pthread_t thread[nthr];
		int i;

		io_setup(1024, &ctx);

		for (i = 0; i < nthr; ++i)
			pthread_create(&thread[i], NULL, getev, ctx);

		sleep(1);

		io_destroy(ctx);

		for (i = 0; i < nthr; ++i)
			pthread_join(thread[i], NULL);

		return 0;
	}

Signed-off-by: Roland Dreier <roland@purestorage.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/aio.c b/fs/aio.c
index 7f54f43..ebb6a22 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -520,7 +520,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 	ctx->reqs_active--;
 
 	if (unlikely(!ctx->reqs_active && ctx->dead))
-		wake_up(&ctx->wait);
+		wake_up_all(&ctx->wait);
 }
 
 static void aio_fput_routine(struct work_struct *data)
@@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *ioctx)
 	 * by other CPUs at this point.  Right now, we rely on the
 	 * locking done by the above calls to ensure this consistency.
 	 */
-	wake_up(&ioctx->wait);
+	wake_up_all(&ioctx->wait);
 	put_ioctx(ioctx);	/* once for the lookup */
 }
 
-- 
cgit v0.10.2


From b12d12596992f608f5506a8dabe4d1299594bd1e Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 22 Mar 2011 16:35:11 -0700
Subject: fs/devpts/inode.c: correctly check d_alloc_name() return code in
 devpts_pty_new()

d_alloc_name return NULL in case error, but we expect errno in
devpts_pty_new.

Addresses http://bugzilla.openvz.org/show_bug.cgi?id=1758

Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c6bd815..2f27e57 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -502,7 +502,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
 	mutex_lock(&root->d_inode->i_mutex);
 
 	dentry = d_alloc_name(root, s);
-	if (!IS_ERR(dentry)) {
+	if (dentry) {
 		d_add(dentry, inode);
 		fsnotify_create(root->d_inode, dentry);
 	} else {
-- 
cgit v0.10.2


From 565d76cb7d5fd7cb010fd690602280a69ab116ef Mon Sep 17 00:00:00 2001
From: Jim Keniston <jkenisto@linux.vnet.ibm.com>
Date: Tue, 22 Mar 2011 16:35:12 -0700
Subject: zlib: slim down zlib_deflate() workspace when possible

Instead of always creating a huge (268K) deflate_workspace with the
maximum compression parameters (windowBits=15, memLevel=8), allow the
caller to obtain a smaller workspace by specifying smaller parameter
values.

For example, when capturing oops and panic reports to a medium with
limited capacity, such as NVRAM, compression may be the only way to
capture the whole report.  In this case, a small workspace (24K works
fine) is a win, whether you allocate the workspace when you need it (i.e.,
during an oops or panic) or at boot time.

I've verified that this patch works with all accepted values of windowBits
(positive and negative), memLevel, and compression level.

Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/crypto/deflate.c b/crypto/deflate.c
index cbc7a33..b5ccae2 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -48,7 +48,8 @@ static int deflate_comp_init(struct deflate_ctx *ctx)
 	int ret = 0;
 	struct z_stream_s *stream = &ctx->comp_stream;
 
-	stream->workspace = vzalloc(zlib_deflate_workspacesize());
+	stream->workspace = vzalloc(zlib_deflate_workspacesize(
+				-DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL));
 	if (!stream->workspace) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 739b8fc..d11d761 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -85,6 +85,7 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
 	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
 	struct z_stream_s *stream = &ctx->comp_stream;
 	struct nlattr *tb[ZLIB_COMP_MAX + 1];
+	int window_bits, mem_level;
 	size_t workspacesize;
 	int ret;
 
@@ -94,7 +95,14 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
 
 	zlib_comp_exit(ctx);
 
-	workspacesize = zlib_deflate_workspacesize();
+	window_bits = tb[ZLIB_COMP_WINDOWBITS]
+					? nla_get_u32(tb[ZLIB_COMP_WINDOWBITS])
+					: MAX_WBITS;
+	mem_level = tb[ZLIB_COMP_MEMLEVEL]
+					? nla_get_u32(tb[ZLIB_COMP_MEMLEVEL])
+					: DEF_MEM_LEVEL;
+
+	workspacesize = zlib_deflate_workspacesize(window_bits, mem_level);
 	stream->workspace = vzalloc(workspacesize);
 	if (!stream->workspace)
 		return -ENOMEM;
@@ -106,12 +114,8 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
 				tb[ZLIB_COMP_METHOD]
 					? nla_get_u32(tb[ZLIB_COMP_METHOD])
 					: Z_DEFLATED,
-				tb[ZLIB_COMP_WINDOWBITS]
-					? nla_get_u32(tb[ZLIB_COMP_WINDOWBITS])
-					: MAX_WBITS,
-				tb[ZLIB_COMP_MEMLEVEL]
-					? nla_get_u32(tb[ZLIB_COMP_MEMLEVEL])
-					: DEF_MEM_LEVEL,
+				window_bits,
+				mem_level,
 				tb[ZLIB_COMP_STRATEGY]
 					? nla_get_u32(tb[ZLIB_COMP_STRATEGY])
 					: Z_DEFAULT_STRATEGY);
diff --git a/drivers/net/ppp_deflate.c b/drivers/net/ppp_deflate.c
index 4358330..31e9407 100644
--- a/drivers/net/ppp_deflate.c
+++ b/drivers/net/ppp_deflate.c
@@ -129,7 +129,7 @@ static void *z_comp_alloc(unsigned char *options, int opt_len)
 
 	state->strm.next_in   = NULL;
 	state->w_size         = w_size;
-	state->strm.workspace = vmalloc(zlib_deflate_workspacesize());
+	state->strm.workspace = vmalloc(zlib_deflate_workspacesize(-w_size, 8));
 	if (state->strm.workspace == NULL)
 		goto out_free;
 
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index f5ec2d4..faccd47 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void)
 	if (!workspace)
 		return ERR_PTR(-ENOMEM);
 
-	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
+	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
+						MAX_WBITS, MAX_MEM_LEVEL));
 	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
 	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
 	if (!workspace->def_strm.workspace ||
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index fd05a0b..5a00102 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -40,12 +40,13 @@ static z_stream inf_strm, def_strm;
 
 static int __init alloc_workspaces(void)
 {
-	def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
+	def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
+							MAX_MEM_LEVEL));
 	if (!def_strm.workspace) {
-		printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize());
+		printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
 		return -ENOMEM;
 	}
-	D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize()));
+	D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL)));
 	inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
 	if (!inf_strm.workspace) {
 		printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize());
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
index 44bbfd2..961f02b 100644
--- a/fs/logfs/compr.c
+++ b/fs/logfs/compr.c
@@ -81,7 +81,7 @@ error:
 
 int __init logfs_compr_init(void)
 {
-	size_t size = max(zlib_deflate_workspacesize(),
+	size_t size = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
 			zlib_inflate_workspacesize());
 	stream.workspace = vmalloc(size);
 	if (!stream.workspace)
diff --git a/include/linux/zlib.h b/include/linux/zlib.h
index 40c49cb..9c5a6b4 100644
--- a/include/linux/zlib.h
+++ b/include/linux/zlib.h
@@ -179,11 +179,16 @@ typedef z_stream *z_streamp;
 
                         /* basic functions */
 
-extern int zlib_deflate_workspacesize (void);
+extern int zlib_deflate_workspacesize (int windowBits, int memLevel);
 /*
    Returns the number of bytes that needs to be allocated for a per-
-   stream workspace.  A pointer to this number of bytes should be
-   returned in stream->workspace before calling zlib_deflateInit().
+   stream workspace with the specified parameters.  A pointer to this
+   number of bytes should be returned in stream->workspace before
+   you call zlib_deflateInit() or zlib_deflateInit2().  If you call
+   zlib_deflateInit(), specify windowBits = MAX_WBITS and memLevel =
+   MAX_MEM_LEVEL here.  If you call zlib_deflateInit2(), the windowBits
+   and memLevel parameters passed to zlib_deflateInit2() must not
+   exceed those passed here.
 */
 
 /* 
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index 46a31e5..d63381e 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -176,6 +176,7 @@ int zlib_deflateInit2(
     deflate_state *s;
     int noheader = 0;
     deflate_workspace *mem;
+    char *next;
 
     ush *overlay;
     /* We overlay pending_buf and d_buf+l_buf. This works since the average
@@ -199,6 +200,21 @@ int zlib_deflateInit2(
 	strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
         return Z_STREAM_ERROR;
     }
+
+    /*
+     * Direct the workspace's pointers to the chunks that were allocated
+     * along with the deflate_workspace struct.
+     */
+    next = (char *) mem;
+    next += sizeof(*mem);
+    mem->window_memory = (Byte *) next;
+    next += zlib_deflate_window_memsize(windowBits);
+    mem->prev_memory = (Pos *) next;
+    next += zlib_deflate_prev_memsize(windowBits);
+    mem->head_memory = (Pos *) next;
+    next += zlib_deflate_head_memsize(memLevel);
+    mem->overlay_memory = next;
+
     s = (deflate_state *) &(mem->deflate_memory);
     strm->state = (struct internal_state *)s;
     s->strm = strm;
@@ -1247,7 +1263,18 @@ static block_state deflate_slow(
     return flush == Z_FINISH ? finish_done : block_done;
 }
 
-int zlib_deflate_workspacesize(void)
+int zlib_deflate_workspacesize(int windowBits, int memLevel)
 {
-    return sizeof(deflate_workspace);
+    if (windowBits < 0) /* undocumented feature: suppress zlib header */
+        windowBits = -windowBits;
+
+    /* Since the return value is typically passed to vmalloc() unchecked... */
+    BUG_ON(memLevel < 1 || memLevel > MAX_MEM_LEVEL || windowBits < 9 ||
+							windowBits > 15);
+
+    return sizeof(deflate_workspace)
+        + zlib_deflate_window_memsize(windowBits)
+        + zlib_deflate_prev_memsize(windowBits)
+        + zlib_deflate_head_memsize(memLevel)
+        + zlib_deflate_overlay_memsize(memLevel);
 }
diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h
index 6b15a90..b640b64 100644
--- a/lib/zlib_deflate/defutil.h
+++ b/lib/zlib_deflate/defutil.h
@@ -241,12 +241,21 @@ typedef struct deflate_state {
 typedef struct deflate_workspace {
     /* State memory for the deflator */
     deflate_state deflate_memory;
-    Byte window_memory[2 * (1 << MAX_WBITS)];
-    Pos prev_memory[1 << MAX_WBITS];
-    Pos head_memory[1 << (MAX_MEM_LEVEL + 7)];
-    char overlay_memory[(1 << (MAX_MEM_LEVEL + 6)) * (sizeof(ush)+2)];
+    Byte *window_memory;
+    Pos *prev_memory;
+    Pos *head_memory;
+    char *overlay_memory;
 } deflate_workspace;
 
+#define zlib_deflate_window_memsize(windowBits) \
+	(2 * (1 << (windowBits)) * sizeof(Byte))
+#define zlib_deflate_prev_memsize(windowBits) \
+	((1 << (windowBits)) * sizeof(Pos))
+#define zlib_deflate_head_memsize(memLevel) \
+	((1 << ((memLevel)+7)) * sizeof(Pos))
+#define zlib_deflate_overlay_memsize(memLevel) \
+	((1 << ((memLevel)+6)) * (sizeof(ush)+2))
+
 /* Output a byte on the stream.
  * IN assertion: there is enough room in pending_buf.
  */
-- 
cgit v0.10.2


From c50e3f512a5a15a73acd94e6ec8ed63cd512e04f Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 22 Mar 2011 16:35:13 -0700
Subject: bloat-o-meter: include read-only data section in report

I'm not sure why the read-only data section is excluded from the report,
it seems as relevant as the other data sections (b and d).

I've stripped the symbols starting with __mod_ as they can have their
names dynamically generated and thus comparison between binaries is not
possible.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: Andi Kleen <andi@firstfloor.org>
Acked-by: Nathan Lynch <ntl@pobox.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index 6501a50..6129020 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -17,7 +17,9 @@ def getsizes(file):
     sym = {}
     for l in os.popen("nm --size-sort " + file).readlines():
         size, type, name = l[:-1].split()
-        if type in "tTdDbB":
+        if type in "tTdDbBrR":
+            # strip generated symbols
+            if name[:6] == "__mod_": continue
             # function names begin with '.' on 64-bit powerpc
             if "." in name[1:]: name = "static." + name.split(".")[0]
             sym[name] = sym.get(name, 0) + int(size, 16)
-- 
cgit v0.10.2


From 5a8826463c19b0d1a2fc60b2adac0ec318047844 Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Wed, 23 Mar 2011 08:35:07 +0100
Subject: ALSA: HDA: Realtek: Avoid unnecessary volume control index on
 Surround/Side

Similar to commit 7e59e097c09b82760bb0fe08b0fa2b704d76c3f4, this patch
avoids unnecessary volume control indices for more
Realtek auto-parsers, e g the ALC66x family, on the "Surround" and "Side"
controls.
These indices cause these volume controls to be ignored by PulseAudio and
vmaster and should be removed whenever possible.

Cc: stable@kernel.org
Reported-by: Jan Losinski <losinski@wh2.tu-dresden.de>
Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 28f95d1..5d582de 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -16008,9 +16008,12 @@ static int alc861_auto_create_multi_out_ctls(struct hda_codec *codec,
 				return err;
 		} else {
 			const char *name = pfx;
-			if (!name)
+			int index = i;
+			if (!name) {
 				name = chname[i];
-			err = __alc861_create_out_sw(codec, name, nid, i, 3);
+				index = 0;
+			}
+			err = __alc861_create_out_sw(codec, name, nid, index, 3);
 			if (err < 0)
 				return err;
 		}
@@ -17161,16 +17164,19 @@ static int alc861vd_auto_create_multi_out_ctls(struct alc_spec *spec,
 				return err;
 		} else {
 			const char *name = pfx;
-			if (!name)
+			int index = i;
+			if (!name) {
 				name = chname[i];
+				index = 0;
+			}
 			err = __add_pb_vol_ctrl(spec, ALC_CTL_WIDGET_VOL,
-						name, i,
+						name, index,
 					  HDA_COMPOSE_AMP_VAL(nid_v, 3, 0,
 							      HDA_OUTPUT));
 			if (err < 0)
 				return err;
 			err = __add_pb_sw_ctrl(spec, ALC_CTL_BIND_MUTE,
-					       name, i,
+					       name, index,
 					  HDA_COMPOSE_AMP_VAL(nid_s, 3, 2,
 							      HDA_INPUT));
 			if (err < 0)
@@ -19219,12 +19225,15 @@ static int alc662_auto_create_multi_out_ctls(struct hda_codec *codec,
 				return err;
 		} else {
 			const char *name = pfx;
-			if (!name)
+			int index = i;
+			if (!name) {
 				name = chname[i];
-			err = __alc662_add_vol_ctl(spec, name, nid, i, 3);
+				index = 0;
+			}
+			err = __alc662_add_vol_ctl(spec, name, nid, index, 3);
 			if (err < 0)
 				return err;
-			err = __alc662_add_sw_ctl(spec, name, mix, i, 3);
+			err = __alc662_add_sw_ctl(spec, name, mix, index, 3);
 			if (err < 0)
 				return err;
 		}
-- 
cgit v0.10.2


From 04948c7f80b9446009c1c4791bb93e79729724fb Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 23 Mar 2011 08:24:58 +0100
Subject: smp: add missing init.h include

Commit 34db18a054c6 ("smp: move smp setup functions to kernel/smp.c")
causes this build error on s390 because of a missing init.h include:

  CC      arch/s390/kernel/asm-offsets.s
  In file included from /home2/heicarst/linux-2.6/arch/s390/include/asm/spinlock.h:14:0,
  from include/linux/spinlock.h:87,
  from include/linux/seqlock.h:29,
  from include/linux/time.h:8,
  from include/linux/timex.h:56,
  from include/linux/sched.h:57,
  from arch/s390/kernel/asm-offsets.c:10:
  include/linux/smp.h:117:20: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'setup_nr_cpu_ids'
  include/linux/smp.h:118:20: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'smp_init'

Fix it by adding the include statement.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 48159dd..74243c8 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/cpumask.h>
+#include <linux/init.h>
 
 extern void cpu_idle(void);
 
-- 
cgit v0.10.2


From 2130781e2aaab66e5a9f2fdc8af35da0153f405c Mon Sep 17 00:00:00 2001
From: Cesar Eduardo Barros <cesarb@cesarb.net>
Date: Tue, 22 Mar 2011 23:03:13 -0300
Subject: sys_swapon: fix inode locking

A conflict between 52c50567d8ab ("mm: swap: unlock swapfile inode mutex
before closing file on bad swapfiles") and 83ef99befc32 ("sys_swapon:
remove did_down variable") caused a double unlock of the inode mutex
(once in bad_swap: before the filp_close, once at the end just before
returning).

The patch which added the extra unlock cleared did_down to avoid
unlocking twice, but the other patch removed the did_down variable.

To fix, set inode to NULL after the first unlock, since it will be used
after that point only for the final unlock.

While checking this patch, I found a path which could unlock without
locking, in case the same inode was added as a swapfile twice. To fix,
move the setting of the inode variable further down, to just before
claim_swapfile, which will lock the inode before doing anything else.

Cc: Mel Gorman <mgorman@suse.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Cesar Eduardo Barros <cesarb@cesarb.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swapfile.c b/mm/swapfile.c
index aafcf36..71b42ec 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2088,7 +2088,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
 	p->swap_file = swap_file;
 	mapping = swap_file->f_mapping;
-	inode = mapping->host;
 
 	for (i = 0; i < nr_swapfiles; i++) {
 		struct swap_info_struct *q = swap_info[i];
@@ -2101,6 +2100,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		}
 	}
 
+	inode = mapping->host;
+	/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
 	error = claim_swapfile(p, inode);
 	if (unlikely(error))
 		goto bad_swap;
@@ -2187,8 +2188,10 @@ bad_swap:
 	spin_unlock(&swap_lock);
 	vfree(swap_map);
 	if (swap_file) {
-		if (inode && S_ISREG(inode->i_mode))
+		if (inode && S_ISREG(inode->i_mode)) {
 			mutex_unlock(&inode->i_mutex);
+			inode = NULL;
+		}
 		filp_close(swap_file, NULL);
 	}
 out:
-- 
cgit v0.10.2